| HTMLGetRankedNamedEntities | Extract a grouped, relevancy-ranked list of named entities from a web page. |
Description: The HTMLGetRankedNamedEntities call is utilized to extract a grouped, relevancy-ranked list of named entities (people, companies, organizations, etc.) from a posted HTML document. AlchemyAPI will extract text from the posted HTML document (ignoring navigation links, advertisements, and other undesireable content), and perform entity extraction operations.
Endpoint: http://access.alchemyapi.com/calls/html/HTMLGetRankedNamedEntities
| http argument | parameter description | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| apikey | your private api key
(required parameter) |
||||||||||
| html | HTML document content (must be uri-argument encoded)
(required parameter) |
||||||||||
| url | HTML document URL
(optional parameter, must be uri-argument encoded) |
||||||||||
| outputMode | desired API output format Possible values: xml (default) json rdf rel-tag rel-tag-raw (optional parameter) |
||||||||||
| jsonp | desired JSONP callback (optional parameter, requires "outputMode" to be set to json) |
||||||||||
| disambiguate | whether to disambiguate detected entities. Possible values: 1 - enabled (default) 0 - disabled (optional parameter) |
||||||||||
| linkedData | whether to include Linked Data content links with disambiguated entities. Possible values: 1 - enabled (default) 0 - disabled (optional parameter. disambiguation must be enabled to utilize the linkedData feature.) |
||||||||||
| coreference | whether to resolve he/she/etc coreferences into detected entities. Possible values: 1 - enabled (default) 0 - disabled (optional parameter) |
||||||||||
| quotations | whether to enable quotations extraction. Possible values: 1 - enabled 0 - disabled (default) (optional parameter) |
||||||||||
| sentiment | whether to enable entity-level sentiment analysis. Possible values: 1 - enabled 0 - disabled (default) (optional parameter - Note that enabling this option will incur usage of one (1) additional AlchemyAPI transaction) |
||||||||||
| showSourceText | whether to include the original 'source text' the entities were extracted from within the API response. Possible values: 1 - enabled 0 - disabled (default) (optional parameter) |
||||||||||
| sourceText | where to obtain the text that will be processed by this API call. AlchemyAPI supports multiple modes of text extraction: web page cleaning (removes ads, navigation links, etc.), raw text extraction (processes all web page text, including ads / nav links), visual constraint queries, and XPath queries. Possible values:
|
||||||||||
| cquery | a visual constraints query to apply to the web page. Constraint queries enable API operations to be performed on a targeted area of a web page, such as a story title or product description. (optional parameter, used when sourceText is set to 'cquery'. must be uri-argument encoded) |
||||||||||
| xpath | an XPath query to apply to the web page. XPath queries enable API operations to be performed on a targeted area of a web page, such as a story title or product description. (optional parameter, used when sourceText is set to 'xpath'. must be uri-argument encoded) |
||||||||||
| maxRetrieve | maximum number of named entities to extract (default: 50)
(optional parameter) |
||||||||||
| baseUrl | rel-tag output base http url (optional parameter, used with rel-tag or rel-tag-raw outputMode. must be uri-argument encoded) |
<results>
<status>REQUEST_STATUS</status>
<language>DOCUMENT_LANGUAGE</language>
<url>DOCUMENT_URL</url>
<text>DOCUMENT_TEXT</text>
<entities>
<entity>
<type>DETECTED_TYPE</type>
<relevance>DETECTED_RELEVANCE</relevance>
<count>DETECTED_COUNT</count>
<text>DETECTED_ENTITY</text>
<disambiguated>
<name>DISAMBIGUATED_ENTITY</name>
<subType>ENTITY_SUBTYPE</subType>
<website>WEBSITE</website>
<geo>LATITUDE LONGITUDE</geo>
<dbpedia>LINKED_DATA_DBPEDIA</dbpedia>
<yago>LINKED_DATA_YAGO</yago>
<opencyc>LINKED_DATA_OPENCYC</opencyc>
<umbel>LINKED_DATA_UMBEL</umbel>
<freebase>LINKED_DATA_FREEBASE</freebase>
<ciaFactbook>LINKED_DATA_FACTBOOK</ciaFactbook>
<census>LINKED_DATA_CENSUS</census>
<geonames>LINKED_DATA_GEONAMES</geonames>
<musicBrainz>LINKED_DATA_MUSICBRAINZ</musicBrainz>
<crunchbase>CRUNCHBASE_WEB_LINK</crunchbase>
<semanticCrunchbase>LINKED_DATA_CRUNCHBASE</semanticCrunchbase>
</disambiguated>
<quotations>
<quotation>ENTITY_QUOTATION</quotation>
</quotations>
<sentiment>
<type>SENTIMENT_LABEL</type>
<score>SENTIMENT_SCORE</score>
<mixed>SENTIMENT_MIXED</mixed>
</sentiment>
</entity>
</entities>
</results>
{
"status": "REQUEST_STATUS",
"language": "DOCUMENT_LANGUAGE",
"url": "DOCUMENT_URL",
"text": "DOCUMENT_TEXT",
"entities": [
"entity": {
"type": "DETECTED_TYPE",
"relevance": "DETECTED_RELEVANCE",
"count": "DETECTED_COUNT",
"text": "DETECTED_ENTITY"
"disambiguated": {
"name": "DISAMBIGUATED_ENTITY",
"subType": "ENTITY_SUBTYPE",
"website": "WEBSITE",
"geo": "LATITUDE LONGITUDE",
"dbpedia": "LINKED_DATA_DBPEDIA",
"yago": "LINKED_DATA_YAGO",
"opencyc": "LINKED_DATA_OPENCYC",
"umbel": "LINKED_DATA_UMBEL",
"freebase": "LINKED_DATA_FREEBASE",
"ciaFactbook": "LINKED_DATA_FACTBOOK",
"census": "LINKED_DATA_CENSUS",
"geonames": "LINKED_DATA_GEONAMES",
"musicBrainz": "LINKED_DATA_MUSICBRAINZ",
"crunchbase": "CRUNCHBASE_WEB_LINK",
"semanticCrunchbase": "LINKED_DATA_CRUNCHBASE"
},
"quotations": [
{
"quotation": "ENTITY_QUOTATION"
}
],
"sentiment": {
"type": "SENTIMENT_LABEL",
"score": "SENTIMENT_SCORE",
"mixed": "SENTIMENT_MIXED"
}
}
]
}
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:aapi="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#"
xml:base="http://rdf.alchemyapi.com/rdf/v1/r/response.rdf">
<rdf:Description rdf:ID="DOCUMENT_HASH">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#DocInfo"/>
<aapi:ResultStatus>REQUEST_STATUS</aapi:ResultStatus>
<aapi:Language>DOCUMENT_LANGUAGE</aapi:Language>
<aapi:URL>DOCUMENT_URL</aapi:URL>
<aapi:DocText>DOCUMENT_TEXT</aapi:DocText>
</rdf:Description>
<rdf:Description rdf:ID="DOCUMENT_HASH-ENTITY_NUM">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#EntityOccurrences"/>
<aapi:Doc>DOCUMENT_HASH</aapi:Doc>
<aapi:EntityType>DETECTED_TYPE</aapi:EntityType>
<aapi:Relevance>DETECTED_RELEVANCE</aapi:Relevance>
<aapi:NumOccurs>DETECTED_COUNT</aapi:NumOccurs>
<aapi:Name>DETECTED_ENTITY</aapi:Name>
<aapi:Disambiguation>
<rdf:Description rdf:about="#DOCUMENT_HASH-ENTITY_NUM">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#Disambiguation"/>
<aapi:Doc>DOCUMENT_HASH</aapi:Doc>
<aapi:ResolvedName>DISAMBIGUATED_ENTITY</aapi:ResolvedName>
<aapi:SubType>ENTITY_SUBTYPE</aapi:SubType>
<aapi:URL>WEBSITE</aapi:URL>
<aapi:Geo>LATITUDE LONGITUDE</aapi:Geo>
<owl:sameAs rdf:resource="LINKED_DATA_DBPEDIA"/>
<owl:sameAs rdf:resource="LINKED_DATA_YAGO"/>
<owl:sameAs rdf:resource="LINKED_DATA_OPENCYC"/>
<owl:sameAs rdf:resource="LINKED_DATA_UMBEL"/>
<owl:sameAs rdf:resource="LINKED_DATA_FREEBASE"/>
<owl:sameAs rdf:resource="LINKED_DATA_FACTBOOK"/>
<owl:sameAs rdf:resource="LINKED_DATA_CENSUS"/>
<owl:sameAs rdf:resource="LINKED_DATA_GEONAMES"/>
<owl:sameAs rdf:resource="LINKED_DATA_MUSICBRAINZ"/>
<owl:sameAs rdf:resource="LINKED_DATA_CRUNCHBASE"/>
</rdf:Description>
</aapi:Disambiguation>
<aapi:Quotations>
<rdf:Description rdf:about="#DOCUMENT_HASH-ENTITY_NUM">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#Quotations"/>
<aapi:Doc>DOCUMENT_HASH</aapi:Doc>
<aapi:Quotation>ENTITY_QUOTATION</aapi:Quotation>
</rdf:Description>
</aapi:Quotations>
<aapi:Sentiment>
<rdf:Description rdf:about="#DOCUMENT_HASH-ENTITY_NUM">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#Sentiment"/>
<aapi:Doc>DOCUMENT_HASH</aapi:Doc>
<aapi:SentimentType>SENTIMENT_LABEL</aapi:SentimentType>
<aapi:SentimentScore>SENTIMENT_SCORE</aapi:SentimentScore>
<aapi:SentimentMixed>SENTIMENT_MIXED</aapi:SentimentMixed>
</rdf:Description>
</aapi:Sentiment>
</rdf:Description>
</rdf:RDF>
<results>
<status>REQUEST_STATUS</status>
<language>DOCUMENT_LANGUAGE</language>
<url>REQUESTED_URL</url>
<text>DOCUMENT_TEXT</text>
<microformats>
<a href="REQUESTED_BASE_URL/DETECTED_ENTITY" rel="tag">DETECTED_ENTITY</a>
<a href="REQUESTED_BASE_URL/DETECTED_ENTITY" rel="tag">DETECTED_ENTITY</a>
</microformats>
</results>
<a href="REQUESTED_BASE_URL/DETECTED_ENTITY" rel="tag">DETECTED_ENTITY</a>
<a href="REQUESTED_BASE_URL/DETECTED_ENTITY" rel="tag">DETECTED_ENTITY</a>
| field name | field description | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| status | success / failure status indicating whether the request was processed. Possible values: OK ERROR |
||||||||||||||||||||||||||||||||
| language | detected language (english, french, ..) for the submitted content. | ||||||||||||||||||||||||||||||||
| url | http url information was requested for. | ||||||||||||||||||||||||||||||||
| type | the detected entity type. Possible values: (click to see list) |
||||||||||||||||||||||||||||||||
| relevance | relevance score for a detected entity. Possible values: (0.0 - 1.0) [1.0 = most relevant] |
||||||||||||||||||||||||||||||||
| count | number of times an entity was seen within the source web page. | ||||||||||||||||||||||||||||||||
| text | the detected entity text. | ||||||||||||||||||||||||||||||||
| disambiguated | disambiguation information for the detected entity (sent only if disambiguation occurred)
|
||||||||||||||||||||||||||||||||
| quotations | extracted quotations for the detected entity (sent only if quotations extraction is enabled)
|
||||||||||||||||||||||||||||||||
| sentiment | sentiment for the detected entity (sent only if entity-level sentiment analysis is enabled)
|
||||||||||||||||||||||||||||||||
| statusInfo | failure status information (sent only if "status" == "ERROR"). Possible values: invalid-api-key page-is-not-html |
Description: The HTMLGetNamedEntities call is utilized to extract named entities (people, companies, organizations, etc.) from a posted HTML document. AlchemyAPI will extract text from the posted HTML document (ignoring navigation links, advertisements, and other undesireable content), and perform entity extraction operations.
Endpoint: http://access.alchemyapi.com/calls/html/HTMLGetNamedEntities
| http argument | parameter description | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
| apikey | your private api key
(required parameter) |
||||||||||
| html | HTML document content (must be uri-argument encoded)
(required parameter) |
||||||||||
| url | HTML document URL
(optional parameter, must be uri-argument encoded) |
||||||||||
| outputMode | desired API output format Possible values: xml (default) json rdf (optional parameter) |
||||||||||
| disambiguate | whether to disambiguate detected entities. Possible values: 1 - enabled (default) 0 - disabled (optional parameter) |
||||||||||
| linkedData | whether to include Linked Data content links with disambiguated entities. Possible values: 1 - enabled (default) 0 - disabled (optional parameter. disambiguation must be enabled to utilize the linkedData feature.) |
||||||||||
| coreference | whether to resolve he/she/etc coreferences into detected entities. Possible values: 1 - enabled (default) 0 - disabled (optional parameter) |
||||||||||
| quotations | whether to enable quotations extraction. Possible values: 1 - enabled 0 - disabled (default) (optional parameter) |
||||||||||
| showSourceText | whether to include the original 'source text' the entities were extracted from within the API response. Possible values: 1 - enabled 0 - disabled (default) (optional parameter) |
||||||||||
| sourceText | where to obtain the text that will be processed by this API call. AlchemyAPI supports multiple modes of text extraction: web page cleaning (removes ads, navigation links, etc.), raw text extraction (processes all web page text, including ads / nav links), visual constraint queries, and XPath queries. Possible values:
|
||||||||||
| cquery | a visual constraints query to apply to the web page. Constraint queries enable API operations to be performed on a targeted area of a web page, such as a story title or product description. (optional parameter, used when sourceText is set to 'cquery'. must be uri-argument encoded) |
||||||||||
| xpath | an XPath query to apply to the web page. XPath queries enable API operations to be performed on a targeted area of a web page, such as a story title or product description. (optional parameter, used when sourceText is set to 'xpath'. must be uri-argument encoded) |
<results>
<status>REQUEST_STATUS</status>
<language>DOCUMENT_LANGUAGE</language>
<url>DOCUMENT_URL</url>
<text>DOCUMENT_TEXT</text>
<entities>
<entity>
<type>DETECTED_TYPE</type>
<start>START_POS</start>
<end>END_POS</end>
<text>DETECTED_ENTITY</text>
<disambiguated>
<name>DISAMBIGUATED_ENTITY</name>
<subType>ENTITY_SUBTYPE</subType>
<website>WEBSITE</website>
<geo>LATITUDE LONGITUDE</geo>
<dbpedia>LINKED_DATA_DBPEDIA</dbpedia>
<yago>LINKED_DATA_YAGO</yago>
<opencyc>LINKED_DATA_OPENCYC</opencyc>
<umbel>LINKED_DATA_UMBEL</umbel>
<freebase>LINKED_DATA_FREEBASE</freebase>
<ciaFactbook>LINKED_DATA_FACTBOOK</ciaFactbook>
<census>LINKED_DATA_CENSUS</census>
<geonames>LINKED_DATA_GEONAMES</geonames>
<musicBrainz>LINKED_DATA_MUSICBRAINZ</musicBrainz>
<crunchbase>CRUNCHBASE_WEB_LINK</crunchbase>
<semanticCrunchbase>LINKED_DATA_CRUNCHBASE</semanticCrunchbase>
</disambiguated>
<quotations>
<quotation>ENTITY_QUOTATION</quotation>
</quotations>
</entity>
</entities>
</results>
{
"status": "REQUEST_STATUS",
"language": "DOCUMENT_LANGUAGE",
"url": "DOCUMENT_URL",
"text": "DOCUMENT_TEXT",
"entities": [
"entity": {
"type": "DETECTED_TYPE",
"start": "START_POS",
"end": "END_POS",
"text": "DETECTED_ENTITY"
"disambiguated": {
"name": "DISAMBIGUATED_ENTITY",
"subType": "ENTITY_SUBTYPE",
"website": "WEBSITE",
"geo": "LATITUDE LONGITUDE",
"dbpedia": "LINKED_DATA_DBPEDIA",
"yago": "LINKED_DATA_YAGO",
"opencyc": "LINKED_DATA_OPENCYC",
"umbel": "LINKED_DATA_UMBEL",
"freebase": "LINKED_DATA_FREEBASE",
"ciaFactbook": "LINKED_DATA_FACTBOOK",
"census": "LINKED_DATA_CENSUS",
"geonames": "LINKED_DATA_GEONAMES",
"musicBrainz": "LINKED_DATA_MUSICBRAINZ",
"crunchbase": "CRUNCHBASE_WEB_LINK",
"semanticCrunchbase": "LINKED_DATA_CRUNCHBASE"
},
"quotations": [
{
"quotation": "ENTITY_QUOTATION"
}
]
}
]
}
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:aapi="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#"
xml:base="http://rdf.alchemyapi.com/rdf/v1/r/response.rdf">
<rdf:Description rdf:ID="DOCUMENT_HASH">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#DocInfo"/>
<aapi:ResultStatus>REQUEST_STATUS</aapi:ResultStatus>
<aapi:Language>DOCUMENT_LANGUAGE</aapi:Language>
<aapi:URL>DOCUMENT_URL</aapi:URL>
<aapi:DocText>DOCUMENT_TEXT</aapi:DocText>
</rdf:Description>
<rdf:Description rdf:ID="DOCUMENT_HASH-ENTITY_NUM">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#EntityOccurrence"/>
<aapi:Doc>DOCUMENT_HASH</aapi:Doc>
<aapi:EntityType>DETECTED_TYPE</aapi:EntityType>
<aapi:TextStartPos>START_POS</aapi:TextStartPos>
<aapi:TextEndPos>END_POS</aapi:TextEndPos>
<aapi:Name>DETECTED_ENTITY</aapi:Name>
<aapi:Disambiguation>
<rdf:Description rdf:about="#DOCUMENT_HASH-ENTITY_NUM">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#Disambiguation"/>
<aapi:Doc>DOCUMENT_HASH</aapi:Doc>
<aapi:ResolvedName>DISAMBIGUATED_ENTITY</aapi:ResolvedName>
<aapi:SubType>ENTITY_SUBTYPE</aapi:SubType>
<aapi:URL>WEBSITE</aapi:URL>
<aapi:Geo>LATITUDE LONGITUDE</aapi:Geo>
<owl:sameAs rdf:resource="LINKED_DATA_DBPEDIA"/>
<owl:sameAs rdf:resource="LINKED_DATA_YAGO"/>
<owl:sameAs rdf:resource="LINKED_DATA_OPENCYC"/>
<owl:sameAs rdf:resource="LINKED_DATA_UMBEL"/>
<owl:sameAs rdf:resource="LINKED_DATA_FREEBASE"/>
<owl:sameAs rdf:resource="LINKED_DATA_FACTBOOK"/>
<owl:sameAs rdf:resource="LINKED_DATA_CENSUS"/>
<owl:sameAs rdf:resource="LINKED_DATA_GEONAMES"/>
<owl:sameAs rdf:resource="LINKED_DATA_MUSICBRAINZ"/>
<owl:sameAs rdf:resource="LINKED_DATA_CRUNCHBASE"/>
</rdf:Description>
</aapi:Disambiguation>
<aapi:Quotations>
<rdf:Description rdf:about="#DOCUMENT_HASH-ENTITY_NUM">
<rdf:type rdf:resource="http://rdf.alchemyapi.com/rdf/v1/s/aapi-schema#Quotations"/>
<aapi:Doc>DOCUMENT_HASH</aapi:Doc>
<aapi:Quotation>ENTITY_QUOTATION</aapi:Quotation>
</rdf:Description>
</aapi:Quotations>
</rdf:Description>
</rdf:RDF>
| field name | field description | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| status | success / failure status indicating whether the request was processed. Possible values: OK ERROR |
||||||||||||||||||||||||||||||||
| language | detected language (english, french, ..) for the submitted content. | ||||||||||||||||||||||||||||||||
| url | http url information was requested for. | ||||||||||||||||||||||||||||||||
| type | the detected entity type. Possible values: (click to see list) |
||||||||||||||||||||||||||||||||
| start | start offset (in bytes) of this entity in the text stream.
Only included in API responses for AlchemyAPI subscription users |
||||||||||||||||||||||||||||||||
| end | end offset (in bytes) of this entity in the text stream.
Only included in API responses for AlchemyAPI subscription users |
||||||||||||||||||||||||||||||||
| text | the detected entity text. | ||||||||||||||||||||||||||||||||
| disambiguated | disambiguation information for the detected entity (sent only if disambiguation occurred)
|
||||||||||||||||||||||||||||||||
| quotations | extracted quotations for the detected entity (sent only if quotations extraction is enabled)
|
||||||||||||||||||||||||||||||||
| statusInfo | failure status information (sent only if "status" == "ERROR"). Possible values: invalid-api-key page-is-not-html |