xy z
for all tags "p" allowing PCDATA */ for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) { return(0); } } } return(1); } /** * htmlNewDocNoDtD: * @URI: URI for the dtd, or NULL * @ExternalID: the external ID of the DTD, or NULL * * Creates a new HTML document without a DTD node if @URI and @ExternalID * are NULL * * Returns a new document, do not initialize the DTD if not provided */ htmlDocPtr htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { xmlDocPtr cur; /* * Allocate a new document and fill the fields. */ cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc)); if (cur == NULL) { htmlErrMemory(NULL, "HTML document creation failed\n"); return(NULL); } memset(cur, 0, sizeof(xmlDoc)); cur->type = XML_HTML_DOCUMENT_NODE; cur->version = NULL; cur->intSubset = NULL; cur->doc = cur; cur->name = NULL; cur->children = NULL; cur->extSubset = NULL; cur->oldNs = NULL; cur->encoding = NULL; cur->standalone = 1; cur->compression = 0; cur->ids = NULL; cur->refs = NULL; cur->_private = NULL; cur->charset = XML_CHAR_ENCODING_UTF8; if ((ExternalID != NULL) || (URI != NULL)) xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); return(cur); } /** * htmlNewDoc: * @URI: URI for the dtd, or NULL * @ExternalID: the external ID of the DTD, or NULL * * Creates a new HTML document * * Returns a new document */ htmlDocPtr htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) { if ((URI == NULL) && (ExternalID == NULL)) return(htmlNewDocNoDtD( BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd", BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN")); return(htmlNewDocNoDtD(URI, ExternalID)); } /************************************************************************ * * * The parser itself * * Relates to http://www.w3.org/TR/html40 * * * ************************************************************************/ /************************************************************************ * * * The parser itself * * * ************************************************************************/ static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt); /** * htmlParseHTMLName: * @ctxt: an HTML parser context * * parse an HTML tag or attribute name, note that we convert it to lowercase * since HTML names are not case-sensitive. * * Returns the Tag Name parsed or NULL */ static const xmlChar * htmlParseHTMLName(htmlParserCtxtPtr ctxt) { int i = 0; xmlChar loc[HTML_PARSER_BUFFER_SIZE]; if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && (CUR != ':')) return(NULL); while ((i < HTML_PARSER_BUFFER_SIZE) && ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || (CUR == ':') || (CUR == '-') || (CUR == '_'))) { if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; else loc[i] = CUR; i++; NEXT; } return(xmlDictLookup(ctxt->dict, loc, i)); } /** * htmlParseName: * @ctxt: an HTML parser context * * parse an HTML name, this routine is case sensitive. * * Returns the Name parsed or NULL */ static const xmlChar * htmlParseName(htmlParserCtxtPtr ctxt) { const xmlChar *in; const xmlChar *ret; int count = 0; GROW; /* * Accelerator for simple ASCII names */ in = ctxt->input->cur; if (((*in >= 0x61) && (*in <= 0x7A)) || ((*in >= 0x41) && (*in <= 0x5A)) || (*in == '_') || (*in == ':')) { in++; while (((*in >= 0x61) && (*in <= 0x7A)) || ((*in >= 0x41) && (*in <= 0x5A)) || ((*in >= 0x30) && (*in <= 0x39)) || (*in == '_') || (*in == '-') || (*in == ':') || (*in == '.')) in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; ctxt->input->col += count; return(ret); } } return(htmlParseNameComplex(ctxt)); } static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt) { int len = 0, l; int c; int count = 0; /* * Handler for more complex cases */ GROW; c = CUR_CHAR(l); if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ (!IS_LETTER(c) && (c != '_') && (c != ':'))) { return(NULL); } while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { if (count++ > 100) { count = 0; GROW; } len += l; NEXTL(l); c = CUR_CHAR(l); } return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); } /** * htmlParseHTMLAttribute: * @ctxt: an HTML parser context * @stop: a char stop value * * parse an HTML attribute value till the stop (quote), if * stop is 0 then it stops at the first space * * Returns the attribute parsed or NULL */ static xmlChar * htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { xmlChar *buffer = NULL; int buffer_size = 0; xmlChar *out = NULL; const xmlChar *name = NULL; const xmlChar *cur = NULL; const htmlEntityDesc * ent; /* * allocate a translation buffer. */ buffer_size = HTML_PARSER_BUFFER_SIZE; buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { htmlErrMemory(ctxt, "buffer allocation failed\n"); return(NULL); } out = buffer; /* * Ok loop until we reach one of the ending chars */ while ((CUR != 0) && (CUR != stop)) { if ((stop == 0) && (CUR == '>')) break; if ((stop == 0) && (IS_BLANK_CH(CUR))) break; if (CUR == '&') { if (NXT(1) == '#') { unsigned int c; int bits; c = htmlParseCharRef(ctxt); if (c < 0x80) { *out++ = c; bits= -6; } else if (c < 0x800) { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } else if (c < 0x10000) { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } else { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } for ( ; bits >= 0; bits-= 6) { *out++ = ((c >> bits) & 0x3F) | 0x80; } if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBuffer(buffer); out = &buffer[indx]; } } else { ent = htmlParseEntityRef(ctxt, &name); if (name == NULL) { *out++ = '&'; if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBuffer(buffer); out = &buffer[indx]; } } else if (ent == NULL) { *out++ = '&'; cur = name; while (*cur != 0) { if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBuffer(buffer); out = &buffer[indx]; } *out++ = *cur++; } } else { unsigned int c; int bits; if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBuffer(buffer); out = &buffer[indx]; } c = (xmlChar)ent->value; if (c < 0x80) { *out++ = c; bits= -6; } else if (c < 0x800) { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } else if (c < 0x10000) { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } else { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } for ( ; bits >= 0; bits-= 6) { *out++ = ((c >> bits) & 0x3F) | 0x80; } } } } else { unsigned int c; int bits, l; if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBuffer(buffer); out = &buffer[indx]; } c = CUR_CHAR(l); if (c < 0x80) { *out++ = c; bits= -6; } else if (c < 0x800) { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } else if (c < 0x10000) { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } else { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } for ( ; bits >= 0; bits-= 6) { *out++ = ((c >> bits) & 0x3F) | 0x80; } NEXT; } } *out++ = 0; return(buffer); } /** * htmlParseEntityRef: * @ctxt: an HTML parser context * @str: location to store the entity name * * parse an HTML ENTITY references * * [68] EntityRef ::= '&' Name ';' * * Returns the associated htmlEntityDescPtr if found, or NULL otherwise, * if non-NULL *str will have to be freed by the caller. */ const htmlEntityDesc * htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) { const xmlChar *name; const htmlEntityDesc * ent = NULL; if (str != NULL) *str = NULL; if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); if (CUR == '&') { NEXT; name = htmlParseName(ctxt); if (name == NULL) { htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, "htmlParseEntityRef: no name\n", NULL, NULL); } else { GROW; if (CUR == ';') { if (str != NULL) *str = name; /* * Lookup the entity in the table. */ ent = htmlEntityLookup(name); if (ent != NULL) /* OK that's ugly !!! */ NEXT; } else { htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, "htmlParseEntityRef: expecting ';'\n", NULL, NULL); if (str != NULL) *str = name; } } } return(ent); } /** * htmlParseAttValue: * @ctxt: an HTML parser context * * parse a value for an attribute * Note: the parser won't do substitution of entities here, this * will be handled later in xmlStringGetNodeList, unless it was * asked for ctxt->replaceEntities != 0 * * Returns the AttValue parsed or NULL. */ static xmlChar * htmlParseAttValue(htmlParserCtxtPtr ctxt) { xmlChar *ret = NULL; if (CUR == '"') { NEXT; ret = htmlParseHTMLAttribute(ctxt, '"'); if (CUR != '"') { htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, "AttValue: \" expected\n", NULL, NULL); } else NEXT; } else if (CUR == '\'') { NEXT; ret = htmlParseHTMLAttribute(ctxt, '\''); if (CUR != '\'') { htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, "AttValue: ' expected\n", NULL, NULL); } else NEXT; } else { /* * That's an HTMLism, the attribute value may not be quoted */ ret = htmlParseHTMLAttribute(ctxt, 0); if (ret == NULL) { htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, "AttValue: no value found\n", NULL, NULL); } } return(ret); } /** * htmlParseSystemLiteral: * @ctxt: an HTML parser context * * parse an HTML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") * * Returns the SystemLiteral parsed or NULL */ static xmlChar * htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { const xmlChar *q; xmlChar *ret = NULL; if (CUR == '"') { NEXT; q = CUR_PTR; while ((IS_CHAR_CH(CUR)) && (CUR != '"')) NEXT; if (!IS_CHAR_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished SystemLiteral\n", NULL, NULL); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) NEXT; if (!IS_CHAR_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished SystemLiteral\n", NULL, NULL); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, " or ' expected\n", NULL, NULL); } return(ret); } /** * htmlParsePubidLiteral: * @ctxt: an HTML parser context * * parse an HTML public literal * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * * Returns the PubidLiteral parsed or NULL. */ static xmlChar * htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { const xmlChar *q; xmlChar *ret = NULL; /* * Name ::= (Letter | '_') (NameChar)* */ if (CUR == '"') { NEXT; q = CUR_PTR; while (IS_PUBIDCHAR_CH(CUR)) NEXT; if (CUR != '"') { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished PubidLiteral\n", NULL, NULL); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')) NEXT; if (CUR != '\'') { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished PubidLiteral\n", NULL, NULL); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, "PubidLiteral \" or ' expected\n", NULL, NULL); } return(ret); } /** * htmlParseScript: * @ctxt: an HTML parser context * * parse the content of an HTML SCRIPT or STYLE element * http://www.w3.org/TR/html4/sgml/dtd.html#Script * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet * http://www.w3.org/TR/html4/types.html#type-script * http://www.w3.org/TR/html4/types.html#h-6.15 * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1 * * Script data ( %Script; in the DTD) can be the content of the SCRIPT * element and the value of intrinsic event attributes. User agents must * not evaluate script data as HTML markup but instead must pass it on as * data to a script engine. * NOTES: * - The content is passed like CDATA * - the attributes for style and scripting "onXXX" are also described * as CDATA but SGML allows entities references in attributes so their * processing is identical as other attributes */ static void htmlParseScript(htmlParserCtxtPtr ctxt) { xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; int cur,l; SHRINK; cur = CUR_CHAR(l); while (IS_CHAR_CH(cur)) { if ((cur == '<') && (NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) { if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (ctxt->sax->cdataBlock!= NULL) { /* * Insert as CDATA, which is the same as HTML_PRESERVE_NODE */ ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); } else if (ctxt->sax->characters != NULL) { ctxt->sax->characters(ctxt->userData, buf, nbchar); } } nbchar = 0; htmlParseComment(ctxt); cur = CUR_CHAR(l); continue; } else if ((cur == '<') && (NXT(1) == '/')) { /* * One should break here, the specification is clear: * Authors should therefore escape "" within the content. * Escape mechanisms are specific to each scripting or * style sheet language. * * In recovery mode, only break if end tag match the * current tag, effectively ignoring all tags inside the * script/style block and treating the entire block as * CDATA. */ if (ctxt->recovery) { if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2, xmlStrlen(ctxt->name)) == 0) { break; /* while */ } else { htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, "Element %s embeds close tag\n", ctxt->name, NULL); } } else { if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) || ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) { break; /* while */ } } } COPY_BUF(l,buf,nbchar,cur); if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { if (ctxt->sax->cdataBlock!= NULL) { /* * Insert as CDATA, which is the same as HTML_PRESERVE_NODE */ ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); } else if (ctxt->sax->characters != NULL) { ctxt->sax->characters(ctxt->userData, buf, nbchar); } nbchar = 0; } GROW; NEXTL(l); cur = CUR_CHAR(l); } if (!(IS_CHAR_CH(cur))) { htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, "Invalid char in CDATA 0x%X\n", cur); NEXT; } if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (ctxt->sax->cdataBlock!= NULL) { /* * Insert as CDATA, which is the same as HTML_PRESERVE_NODE */ ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); } else if (ctxt->sax->characters != NULL) { ctxt->sax->characters(ctxt->userData, buf, nbchar); } } } /** * htmlParseCharData: * @ctxt: an HTML parser context * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ static void htmlParseCharData(htmlParserCtxtPtr ctxt) { xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; int cur, l; SHRINK; cur = CUR_CHAR(l); while (((cur != '<') || (ctxt->token == '<')) && ((cur != '&') || (ctxt->token == '&')) && (IS_CHAR(cur))) { COPY_BUF(l,buf,nbchar,cur); if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { /* * Ok the segment is to be consumed as chars. */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); } else { htmlCheckParagraph(ctxt); if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, nbchar); } } nbchar = 0; } NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { SHRINK; GROW; cur = CUR_CHAR(l); } } if (nbchar != 0) { buf[nbchar] = 0; /* * Ok the segment is to be consumed as chars. */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); } else { htmlCheckParagraph(ctxt); if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, nbchar); } } } else { /* * Loop detection */ if (cur == 0) ctxt->instate = XML_PARSER_EOF; } } /** * htmlParseExternalID: * @ctxt: an HTML parser context * @publicID: a xmlChar** receiving PubidLiteral * * Parse an External ID or a Public ID * * [75] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * * [83] PublicID ::= 'PUBLIC' S PubidLiteral * * Returns the function returns SystemLiteral and in the second * case publicID receives PubidLiteral, is strict is off * it is possible to return NULL and have publicID set. */ static xmlChar * htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) { xmlChar *URI = NULL; if ((UPPER == 'S') && (UPP(1) == 'Y') && (UPP(2) == 'S') && (UPP(3) == 'T') && (UPP(4) == 'E') && (UPP(5) == 'M')) { SKIP(6); if (!IS_BLANK_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED, "Space required after 'SYSTEM'\n", NULL, NULL); } SKIP_BLANKS; URI = htmlParseSystemLiteral(ctxt); if (URI == NULL) { htmlParseErr(ctxt, XML_ERR_URI_REQUIRED, "htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL); } } else if ((UPPER == 'P') && (UPP(1) == 'U') && (UPP(2) == 'B') && (UPP(3) == 'L') && (UPP(4) == 'I') && (UPP(5) == 'C')) { SKIP(6); if (!IS_BLANK_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED, "Space required after 'PUBLIC'\n", NULL, NULL); } SKIP_BLANKS; *publicID = htmlParsePubidLiteral(ctxt); if (*publicID == NULL) { htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED, "htmlParseExternalID: PUBLIC, no Public Identifier\n", NULL, NULL); } SKIP_BLANKS; if ((CUR == '"') || (CUR == '\'')) { URI = htmlParseSystemLiteral(ctxt); } } return(URI); } /** * xmlParsePI: * @ctxt: an XML parser context * * parse an XML Processing Instruction. * * [16] PI ::= '' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */ static void htmlParsePI(htmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; int size = HTML_PARSER_BUFFER_SIZE; int cur, l; const xmlChar *target; xmlParserInputState state; int count = 0; if ((RAW == '<') && (NXT(1) == '?')) { state = ctxt->instate; ctxt->instate = XML_PARSER_PI; /* * this is a Processing Instruction. */ SKIP(2); SHRINK; /* * Parse the target name and check for special support like * namespace. */ target = htmlParseName(ctxt); if (target != NULL) { if (RAW == '>') { SKIP(1); /* * SAX: PI detected. */ if ((ctxt->sax) && (!ctxt->disableSAX) && (ctxt->sax->processingInstruction != NULL)) ctxt->sax->processingInstruction(ctxt->userData, target, NULL); ctxt->instate = state; return; } buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); if (buf == NULL) { htmlErrMemory(ctxt, NULL); ctxt->instate = state; return; } cur = CUR; if (!IS_BLANK(cur)) { htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED, "ParsePI: PI %s space expected\n", target, NULL); } SKIP_BLANKS; cur = CUR_CHAR(l); while (IS_CHAR(cur) && (cur != '>')) { if (len + 5 >= size) { xmlChar *tmp; size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { htmlErrMemory(ctxt, NULL); xmlFree(buf); ctxt->instate = state; return; } buf = tmp; } count++; if (count > 50) { GROW; count = 0; } COPY_BUF(l,buf,len,cur); NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { SHRINK; GROW; cur = CUR_CHAR(l); } } buf[len] = 0; if (cur != '>') { htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED, "ParsePI: PI %s never end ...\n", target, NULL); } else { SKIP(1); /* * SAX: PI detected. */ if ((ctxt->sax) && (!ctxt->disableSAX) && (ctxt->sax->processingInstruction != NULL)) ctxt->sax->processingInstruction(ctxt->userData, target, buf); } xmlFree(buf); } else { htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, "PI is not started correctly", NULL, NULL); } ctxt->instate = state; } } /** * htmlParseComment: * @ctxt: an HTML parser context * * Parse an XML (SGML) comment * * [15] Comment ::= '' */ static void htmlParseComment(htmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len; int size = HTML_PARSER_BUFFER_SIZE; int q, ql; int r, rl; int cur, l; xmlParserInputState state; /* * Check that there is a comment right here. */ if ((RAW != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return; state = ctxt->instate; ctxt->instate = XML_PARSER_COMMENT; SHRINK; SKIP(4); buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); if (buf == NULL) { htmlErrMemory(ctxt, "buffer allocation failed\n"); ctxt->instate = state; return; } q = CUR_CHAR(ql); NEXTL(ql); r = CUR_CHAR(rl); NEXTL(rl); cur = CUR_CHAR(l); len = 0; while (IS_CHAR(cur) && ((cur != '>') || (r != '-') || (q != '-'))) { if (len + 5 >= size) { xmlChar *tmp; size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { xmlFree(buf); htmlErrMemory(ctxt, "growing buffer failed\n"); ctxt->instate = state; return; } buf = tmp; } COPY_BUF(ql,buf,len,q); q = r; ql = rl; r = cur; rl = l; NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { SHRINK; GROW; cur = CUR_CHAR(l); } } buf[len] = 0; if (!IS_CHAR(cur)) { htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, "Comment not terminated \n */ base += 2; } } if (incomment) { if (base + 3 > len) return(-1); if ((buf[base] == '-') && (buf[base + 1] == '-') && (buf[base + 2] == '>')) { incomment = 0; base += 2; } continue; } if (buf[base] == first) { if (third != 0) { if ((buf[base + 1] != next) || (buf[base + 2] != third)) continue; } else if (next != 0) { if (buf[base + 1] != next) continue; } ctxt->checkIndex = 0; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, "HPP: lookup '%c' found at %d\n", first, base); else if (third == 0) xmlGenericError(xmlGenericErrorContext, "HPP: lookup '%c%c' found at %d\n", first, next, base); else xmlGenericError(xmlGenericErrorContext, "HPP: lookup '%c%c%c' found at %d\n", first, next, third, base); #endif return(base - (in->cur - in->base)); } } ctxt->checkIndex = base; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, "HPP: lookup '%c' failed\n", first); else if (third == 0) xmlGenericError(xmlGenericErrorContext, "HPP: lookup '%c%c' failed\n", first, next); else xmlGenericError(xmlGenericErrorContext, "HPP: lookup '%c%c%c' failed\n", first, next, third); #endif return(-1); } /** * htmlParseTryOrFinish: * @ctxt: an HTML parser context * @terminate: last chunk indicator * * Try to progress on parsing * * Returns zero if no parsing was possible */ static int htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { int ret = 0; htmlParserInputPtr in; int avail = 0; xmlChar cur, next; #ifdef DEBUG_PUSH switch (ctxt->instate) { case XML_PARSER_EOF: xmlGenericError(xmlGenericErrorContext, "HPP: try EOF\n"); break; case XML_PARSER_START: xmlGenericError(xmlGenericErrorContext, "HPP: try START\n"); break; case XML_PARSER_MISC: xmlGenericError(xmlGenericErrorContext, "HPP: try MISC\n");break; case XML_PARSER_COMMENT: xmlGenericError(xmlGenericErrorContext, "HPP: try COMMENT\n");break; case XML_PARSER_PROLOG: xmlGenericError(xmlGenericErrorContext, "HPP: try PROLOG\n");break; case XML_PARSER_START_TAG: xmlGenericError(xmlGenericErrorContext, "HPP: try START_TAG\n");break; case XML_PARSER_CONTENT: xmlGenericError(xmlGenericErrorContext, "HPP: try CONTENT\n");break; case XML_PARSER_CDATA_SECTION: xmlGenericError(xmlGenericErrorContext, "HPP: try CDATA_SECTION\n");break; case XML_PARSER_END_TAG: xmlGenericError(xmlGenericErrorContext, "HPP: try END_TAG\n");break; case XML_PARSER_ENTITY_DECL: xmlGenericError(xmlGenericErrorContext, "HPP: try ENTITY_DECL\n");break; case XML_PARSER_ENTITY_VALUE: xmlGenericError(xmlGenericErrorContext, "HPP: try ENTITY_VALUE\n");break; case XML_PARSER_ATTRIBUTE_VALUE: xmlGenericError(xmlGenericErrorContext, "HPP: try ATTRIBUTE_VALUE\n");break; case XML_PARSER_DTD: xmlGenericError(xmlGenericErrorContext, "HPP: try DTD\n");break; case XML_PARSER_EPILOG: xmlGenericError(xmlGenericErrorContext, "HPP: try EPILOG\n");break; case XML_PARSER_PI: xmlGenericError(xmlGenericErrorContext, "HPP: try PI\n");break; case XML_PARSER_SYSTEM_LITERAL: xmlGenericError(xmlGenericErrorContext, "HPP: try SYSTEM_LITERAL\n");break; } #endif while (1) { in = ctxt->input; if (in == NULL) break; if (in->buf == NULL) avail = in->length - (in->cur - in->base); else avail = in->buf->buffer->use - (in->cur - in->base); if ((avail == 0) && (terminate)) { htmlAutoCloseOnEnd(ctxt); if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { /* * SAX: end of the document processing. */ ctxt->instate = XML_PARSER_EOF; if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); } } if (avail < 1) goto done; cur = in->cur[0]; if (cur == 0) { SKIP(1); continue; } switch (ctxt->instate) { case XML_PARSER_EOF: /* * Document parsing is done ! */ goto done; case XML_PARSER_START: /* * Very first chars read from the document flow. */ cur = in->cur[0]; if (IS_BLANK_CH(cur)) { SKIP_BLANKS; if (in->buf == NULL) avail = in->length - (in->cur - in->base); else avail = in->buf->buffer->use - (in->cur - in->base); } if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) ctxt->sax->startDocument(ctxt->userData); cur = in->cur[0]; next = in->cur[1]; if ((cur == '<') && (next == '!') && (UPP(2) == 'D') && (UPP(3) == 'O') && (UPP(4) == 'C') && (UPP(5) == 'T') && (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing internal subset\n"); #endif htmlParseDocTypeDecl(ctxt); ctxt->instate = XML_PARSER_PROLOG; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering PROLOG\n"); #endif } else { ctxt->instate = XML_PARSER_MISC; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering MISC\n"); #endif } break; case XML_PARSER_MISC: SKIP_BLANKS; if (in->buf == NULL) avail = in->length - (in->cur - in->base); else avail = in->buf->buffer->use - (in->cur - in->base); if (avail < 2) goto done; cur = in->cur[0]; next = in->cur[1]; if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing Comment\n"); #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing PI\n"); #endif htmlParsePI(ctxt); ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '!') && (UPP(2) == 'D') && (UPP(3) == 'O') && (UPP(4) == 'C') && (UPP(5) == 'T') && (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing internal subset\n"); #endif htmlParseDocTypeDecl(ctxt); ctxt->instate = XML_PARSER_PROLOG; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering PROLOG\n"); #endif } else if ((cur == '<') && (next == '!') && (avail < 9)) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering START_TAG\n"); #endif } break; case XML_PARSER_PROLOG: SKIP_BLANKS; if (in->buf == NULL) avail = in->length - (in->cur - in->base); else avail = in->buf->buffer->use - (in->cur - in->base); if (avail < 2) goto done; cur = in->cur[0]; next = in->cur[1]; if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing Comment\n"); #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing PI\n"); #endif htmlParsePI(ctxt); ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering START_TAG\n"); #endif } break; case XML_PARSER_EPILOG: if (in->buf == NULL) avail = in->length - (in->cur - in->base); else avail = in->buf->buffer->use - (in->cur - in->base); if (avail < 1) goto done; cur = in->cur[0]; if (IS_BLANK_CH(cur)) { htmlParseCharData(ctxt); goto done; } if (avail < 2) goto done; next = in->cur[1]; if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing Comment\n"); #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing PI\n"); #endif htmlParsePI(ctxt); ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else { ctxt->errNo = XML_ERR_DOCUMENT_END; ctxt->wellFormed = 0; ctxt->instate = XML_PARSER_EOF; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering EOF\n"); #endif if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); goto done; } break; case XML_PARSER_START_TAG: { const xmlChar *name; int failed; const htmlElemDesc * info; if (avail < 2) goto done; cur = in->cur[0]; if (cur != '<') { ctxt->instate = XML_PARSER_CONTENT; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; } if (in->cur[1] == '/') { ctxt->instate = XML_PARSER_END_TAG; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering END_TAG\n"); #endif break; } if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; failed = htmlParseStartTag(ctxt); name = ctxt->name; if (failed || (name == NULL)) { if (CUR == '>') NEXT; break; } /* * Lookup the info for that element. */ info = htmlTagLookup(name); if (info == NULL) { htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG, "Tag %s invalid\n", name, NULL); } /* * Check for an Empty Element labeled the XML/SGML way */ if ((CUR == '/') && (NXT(1) == '>')) { SKIP(2); if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, name); htmlnamePop(ctxt); ctxt->instate = XML_PARSER_CONTENT; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; } if (CUR == '>') { NEXT; } else { htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, "Couldn't find end of Start Tag %s\n", name, NULL); /* * end of parsing of this node. */ if (xmlStrEqual(name, ctxt->name)) { nodePop(ctxt); htmlnamePop(ctxt); } ctxt->instate = XML_PARSER_CONTENT; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; } /* * Check for an Empty Element from DTD definition */ if ((info != NULL) && (info->empty)) { if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, name); htmlnamePop(ctxt); } ctxt->instate = XML_PARSER_CONTENT; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; } case XML_PARSER_CONTENT: { long cons; /* * Handle preparsed entities and charRef */ if (ctxt->token != 0) { xmlChar chr[2] = { 0 , 0 } ; chr[0] = (xmlChar) ctxt->token; htmlCheckParagraph(ctxt); if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) ctxt->sax->characters(ctxt->userData, chr, 1); ctxt->token = 0; ctxt->checkIndex = 0; } if ((avail == 1) && (terminate)) { cur = in->cur[0]; if ((cur != '<') && (cur != '&')) { if (ctxt->sax != NULL) { if (IS_BLANK_CH(cur)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace( ctxt->userData, &cur, 1); } else { htmlCheckParagraph(ctxt); if (ctxt->sax->characters != NULL) ctxt->sax->characters( ctxt->userData, &cur, 1); } } ctxt->token = 0; ctxt->checkIndex = 0; in->cur++; break; } } if (avail < 2) goto done; cur = in->cur[0]; next = in->cur[1]; cons = ctxt->nbChars; if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { /* * Handle SCRIPT/STYLE separately */ if ((!terminate) && (htmlParseLookupSequence(ctxt, '<', '/', 0, 0) < 0)) goto done; htmlParseScript(ctxt); if ((cur == '<') && (next == '/')) { ctxt->instate = XML_PARSER_END_TAG; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering END_TAG\n"); #endif break; } } else { /* * Sometimes DOCTYPE arrives in the middle of the document */ if ((cur == '<') && (next == '!') && (UPP(2) == 'D') && (UPP(3) == 'O') && (UPP(4) == 'C') && (UPP(5) == 'T') && (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, "Misplaced DOCTYPE declaration\n", BAD_CAST "DOCTYPE" , NULL); htmlParseDocTypeDecl(ctxt); } else if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && (htmlParseLookupSequence( ctxt, '-', '-', '>', 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing Comment\n"); #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_CONTENT; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing PI\n"); #endif htmlParsePI(ctxt); ctxt->instate = XML_PARSER_CONTENT; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else if ((cur == '<') && (next == '/')) { ctxt->instate = XML_PARSER_END_TAG; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering END_TAG\n"); #endif break; } else if (cur == '<') { ctxt->instate = XML_PARSER_START_TAG; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering START_TAG\n"); #endif break; } else if (cur == '&') { if ((!terminate) && (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing Reference\n"); #endif /* TODO: check generation of subtrees if noent !!! */ htmlParseReference(ctxt); } else { /* * check that the text sequence is complete * before handing out the data to the parser * to avoid problems with erroneous end of * data detection. */ if ((!terminate) && (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0)) goto done; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing char data\n"); #endif htmlParseCharData(ctxt); } } if (cons == ctxt->nbChars) { if (ctxt->node != NULL) { htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "detected an error in element content\n", NULL, NULL); } NEXT; break; } break; } case XML_PARSER_END_TAG: if (avail < 2) goto done; if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParseEndTag(ctxt); if (ctxt->nameNr == 0) { ctxt->instate = XML_PARSER_EPILOG; } else { ctxt->instate = XML_PARSER_CONTENT; } ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_CDATA_SECTION: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == CDATA\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_DTD: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == DTD\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_COMMENT: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == COMMENT\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_PI: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == PI\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_ENTITY_DECL: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == ENTITY_DECL\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_ENTITY_VALUE: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == ENTITY_VALUE\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering DTD\n"); #endif break; case XML_PARSER_ATTRIBUTE_VALUE: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == ATTRIBUTE_VALUE\n", NULL, NULL); ctxt->instate = XML_PARSER_START_TAG; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering START_TAG\n"); #endif break; case XML_PARSER_SYSTEM_LITERAL: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_IGNORE: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == XML_PARSER_IGNORE\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; case XML_PARSER_PUBLIC_LITERAL: htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "HPP: internal error, state == XML_PARSER_LITERAL\n", NULL, NULL); ctxt->instate = XML_PARSER_CONTENT; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering CONTENT\n"); #endif break; } } done: if ((avail == 0) && (terminate)) { htmlAutoCloseOnEnd(ctxt); if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { /* * SAX: end of the document processing. */ ctxt->instate = XML_PARSER_EOF; if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); } } if ((ctxt->myDoc != NULL) && ((terminate) || (ctxt->instate == XML_PARSER_EOF) || (ctxt->instate == XML_PARSER_EPILOG))) { xmlDtdPtr dtd; dtd = xmlGetIntSubset(ctxt->myDoc); if (dtd == NULL) ctxt->myDoc->intSubset = xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); #endif return(ret); } /** * htmlParseChunk: * @ctxt: an HTML parser context * @chunk: an char array * @size: the size in byte of the chunk * @terminate: last chunk indicator * * Parse a Chunk of memory * * Returns zero if no error, the xmlParserErrors otherwise. */ int htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { if ((ctxt == NULL) || (ctxt->input == NULL)) { htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "htmlParseChunk: context error\n", NULL, NULL); return(XML_ERR_INTERNAL_ERROR); } if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { int base = ctxt->input->base - ctxt->input->buf->buffer->content; int cur = ctxt->input->cur - ctxt->input->base; int res; res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); if (res < 0) { ctxt->errNo = XML_PARSER_EOF; ctxt->disableSAX = 1; return (XML_PARSER_EOF); } ctxt->input->base = ctxt->input->buf->buffer->content + base; ctxt->input->cur = ctxt->input->base + cur; ctxt->input->end = &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); #endif #if 0 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) htmlParseTryOrFinish(ctxt, terminate); #endif } else if (ctxt->instate != XML_PARSER_EOF) { if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { xmlParserInputBufferPtr in = ctxt->input->buf; if ((in->encoder != NULL) && (in->buffer != NULL) && (in->raw != NULL)) { int nbchars; nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); if (nbchars < 0) { htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, "encoder error\n", NULL, NULL); return(XML_ERR_INVALID_ENCODING); } } } } htmlParseTryOrFinish(ctxt, terminate); if (terminate) { if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->instate != XML_PARSER_EPILOG) && (ctxt->instate != XML_PARSER_MISC)) { ctxt->errNo = XML_ERR_DOCUMENT_END; ctxt->wellFormed = 0; } if (ctxt->instate != XML_PARSER_EOF) { if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); } ctxt->instate = XML_PARSER_EOF; } return((xmlParserErrors) ctxt->errNo); } /************************************************************************ * * * User entry points * * * ************************************************************************/ /** * htmlCreatePushParserCtxt: * @sax: a SAX handler * @user_data: The user data returned on SAX callbacks * @chunk: a pointer to an array of chars * @size: number of chars in the array * @filename: an optional file name or URI * @enc: an optional encoding * * Create a parser context for using the HTML parser in push mode * The value of @filename is used for fetching external entities * and error/warning reports. * * Returns the new parser context or NULL */ htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename, xmlCharEncoding enc) { htmlParserCtxtPtr ctxt; htmlParserInputPtr inputStream; xmlParserInputBufferPtr buf; xmlInitParser(); buf = xmlAllocParserInputBuffer(enc); if (buf == NULL) return(NULL); ctxt = htmlNewParserCtxt(); if (ctxt == NULL) { xmlFreeParserInputBuffer(buf); return(NULL); } if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder) ctxt->charset=XML_CHAR_ENCODING_UTF8; if (sax != NULL) { if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler) xmlFree(ctxt->sax); ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler)); if (ctxt->sax == NULL) { xmlFree(buf); xmlFree(ctxt); return(NULL); } memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler)); if (user_data != NULL) ctxt->userData = user_data; } if (filename == NULL) { ctxt->directory = NULL; } else { ctxt->directory = xmlParserGetDirectory(filename); } inputStream = htmlNewInputStream(ctxt); if (inputStream == NULL) { xmlFreeParserCtxt(ctxt); xmlFree(buf); return(NULL); } if (filename == NULL) inputStream->filename = NULL; else inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) filename); inputStream->buf = buf; inputStream->base = inputStream->buf->buffer->content; inputStream->cur = inputStream->buf->buffer->content; inputStream->end = &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; inputPush(ctxt, inputStream); if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { int base = ctxt->input->base - ctxt->input->buf->buffer->content; int cur = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, size, chunk); ctxt->input->base = ctxt->input->buf->buffer->content + base; ctxt->input->cur = ctxt->input->base + cur; ctxt->input->end = &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); #endif } return(ctxt); } #endif /* LIBXML_PUSH_ENABLED */ /** * htmlSAXParseDoc: * @cur: a pointer to an array of xmlChar * @encoding: a free form C string describing the HTML document encoding, or NULL * @sax: the SAX handler block * @userData: if using SAX, this pointer will be provided on callbacks. * * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks * to handle parse events. If sax is NULL, fallback to the default DOM * behavior and return a tree. * * Returns the resulting document tree unless SAX is NULL or the document is * not well formed. */ htmlDocPtr htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData) { htmlDocPtr ret; htmlParserCtxtPtr ctxt; xmlInitParser(); if (cur == NULL) return(NULL); ctxt = htmlCreateDocParserCtxt(cur, encoding); if (ctxt == NULL) return(NULL); if (sax != NULL) { if (ctxt->sax != NULL) xmlFree (ctxt->sax); ctxt->sax = sax; ctxt->userData = userData; } htmlParseDocument(ctxt); ret = ctxt->myDoc; if (sax != NULL) { ctxt->sax = NULL; ctxt->userData = NULL; } htmlFreeParserCtxt(ctxt); return(ret); } /** * htmlParseDoc: * @cur: a pointer to an array of xmlChar * @encoding: a free form C string describing the HTML document encoding, or NULL * * parse an HTML in-memory document and build a tree. * * Returns the resulting document tree */ htmlDocPtr htmlParseDoc(xmlChar *cur, const char *encoding) { return(htmlSAXParseDoc(cur, encoding, NULL, NULL)); } /** * htmlCreateFileParserCtxt: * @filename: the filename * @encoding: a free form C string describing the HTML document encoding, or NULL * * Create a parser context for a file content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time. * * Returns the new parser context or NULL */ htmlParserCtxtPtr htmlCreateFileParserCtxt(const char *filename, const char *encoding) { htmlParserCtxtPtr ctxt; htmlParserInputPtr inputStream; char *canonicFilename; /* htmlCharEncoding enc; */ xmlChar *content, *content_line = (xmlChar *) "charset="; if (filename == NULL) return(NULL); ctxt = htmlNewParserCtxt(); if (ctxt == NULL) { return(NULL); } canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename); if (canonicFilename == NULL) { #ifdef LIBXML_SAX1_ENABLED if (xmlDefaultSAXHandler.error != NULL) { xmlDefaultSAXHandler.error(NULL, "out of memory\n"); } #endif xmlFreeParserCtxt(ctxt); return(NULL); } inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt); xmlFree(canonicFilename); if (inputStream == NULL) { xmlFreeParserCtxt(ctxt); return(NULL); } inputPush(ctxt, inputStream); /* set encoding */ if (encoding) { content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1); if (content) { strcpy ((char *)content, (char *)content_line); strcat ((char *)content, (char *)encoding); htmlCheckEncoding (ctxt, content); xmlFree (content); } } return(ctxt); } /** * htmlSAXParseFile: * @filename: the filename * @encoding: a free form C string describing the HTML document encoding, or NULL * @sax: the SAX handler block * @userData: if using SAX, this pointer will be provided on callbacks. * * parse an HTML file and build a tree. Automatic support for ZLIB/Compress * compressed document is provided by default if found at compile-time. * It use the given SAX function block to handle the parsing callback. * If sax is NULL, fallback to the default DOM tree building routines. * * Returns the resulting document tree unless SAX is NULL or the document is * not well formed. */ htmlDocPtr htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax, void *userData) { htmlDocPtr ret; htmlParserCtxtPtr ctxt; htmlSAXHandlerPtr oldsax = NULL; xmlInitParser(); ctxt = htmlCreateFileParserCtxt(filename, encoding); if (ctxt == NULL) return(NULL); if (sax != NULL) { oldsax = ctxt->sax; ctxt->sax = sax; ctxt->userData = userData; } htmlParseDocument(ctxt); ret = ctxt->myDoc; if (sax != NULL) { ctxt->sax = oldsax; ctxt->userData = NULL; } htmlFreeParserCtxt(ctxt); return(ret); } /** * htmlParseFile: * @filename: the filename * @encoding: a free form C string describing the HTML document encoding, or NULL * * parse an HTML file and build a tree. Automatic support for ZLIB/Compress * compressed document is provided by default if found at compile-time. * * Returns the resulting document tree */ htmlDocPtr htmlParseFile(const char *filename, const char *encoding) { return(htmlSAXParseFile(filename, encoding, NULL, NULL)); } /** * htmlHandleOmittedElem: * @val: int 0 or 1 * * Set and return the previous value for handling HTML omitted tags. * * Returns the last value for 0 for no handling, 1 for auto insertion. */ int htmlHandleOmittedElem(int val) { int old = htmlOmittedDefaultValue; htmlOmittedDefaultValue = val; return(old); } /** * htmlElementAllowedHere: * @parent: HTML parent element * @elt: HTML element * * Checks whether an HTML element may be a direct child of a parent element. * Note - doesn't check for deprecated elements * * Returns 1 if allowed; 0 otherwise. */ int htmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) { const char** p ; if ( ! elt || ! parent || ! parent->subelts ) return 0 ; for ( p = parent->subelts; *p; ++p ) if ( !xmlStrcmp((const xmlChar *)*p, elt) ) return 1 ; return 0 ; } /** * htmlElementStatusHere: * @parent: HTML parent element * @elt: HTML element * * Checks whether an HTML element may be a direct child of a parent element. * and if so whether it is valid or deprecated. * * Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID */ htmlStatus htmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) { if ( ! parent || ! elt ) return HTML_INVALID ; if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) ) return HTML_INVALID ; return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ; } /** * htmlAttrAllowed: * @elt: HTML element * @attr: HTML attribute * @legacy: whether to allow deprecated attributes * * Checks whether an attribute is valid for an element * Has full knowledge of Required and Deprecated attributes * * Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID */ htmlStatus htmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) { const char** p ; if ( !elt || ! attr ) return HTML_INVALID ; if ( elt->attrs_req ) for ( p = elt->attrs_req; *p; ++p) if ( !xmlStrcmp((const xmlChar*)*p, attr) ) return HTML_REQUIRED ; if ( elt->attrs_opt ) for ( p = elt->attrs_opt; *p; ++p) if ( !xmlStrcmp((const xmlChar*)*p, attr) ) return HTML_VALID ; if ( legacy && elt->attrs_depr ) for ( p = elt->attrs_depr; *p; ++p) if ( !xmlStrcmp((const xmlChar*)*p, attr) ) return HTML_DEPRECATED ; return HTML_INVALID ; } /** * htmlNodeStatus: * @node: an htmlNodePtr in a tree * @legacy: whether to allow deprecated elements (YES is faster here * for Element nodes) * * Checks whether the tree node is valid. Experimental (the author * only uses the HTML enhancements in a SAX parser) * * Return: for Element nodes, a return from htmlElementAllowedHere (if * legacy allowed) or htmlElementStatusHere (otherwise). * for Attribute nodes, a return from htmlAttrAllowed * for other nodes, HTML_NA (no checks performed) */ htmlStatus htmlNodeStatus(const htmlNodePtr node, int legacy) { if ( ! node ) return HTML_INVALID ; switch ( node->type ) { case XML_ELEMENT_NODE: return legacy ? ( htmlElementAllowedHere ( htmlTagLookup(node->parent->name) , node->name ) ? HTML_VALID : HTML_INVALID ) : htmlElementStatusHere( htmlTagLookup(node->parent->name) , htmlTagLookup(node->name) ) ; case XML_ATTRIBUTE_NODE: return htmlAttrAllowed( htmlTagLookup(node->parent->name) , node->name, legacy) ; default: return HTML_NA ; } } /************************************************************************ * * * New set (2.6.0) of simpler and more flexible APIs * * * ************************************************************************/ /** * DICT_FREE: * @str: a string * * Free a string if it is not owned by the "dict" dictionnary in the * current scope */ #define DICT_FREE(str) \ if ((str) && ((!dict) || \ (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ xmlFree((char *)(str)); /** * htmlCtxtReset: * @ctxt: an HTML parser context * * Reset a parser context */ void htmlCtxtReset(htmlParserCtxtPtr ctxt) { xmlParserInputPtr input; xmlDictPtr dict; if (ctxt == NULL) return; dict = ctxt->dict; while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ xmlFreeInputStream(input); } ctxt->inputNr = 0; ctxt->input = NULL; ctxt->spaceNr = 0; if (ctxt->spaceTab != NULL) { ctxt->spaceTab[0] = -1; ctxt->space = &ctxt->spaceTab[0]; } else { ctxt->space = NULL; } ctxt->nodeNr = 0; ctxt->node = NULL; ctxt->nameNr = 0; ctxt->name = NULL; DICT_FREE(ctxt->version); ctxt->version = NULL; DICT_FREE(ctxt->encoding); ctxt->encoding = NULL; DICT_FREE(ctxt->directory); ctxt->directory = NULL; DICT_FREE(ctxt->extSubURI); ctxt->extSubURI = NULL; DICT_FREE(ctxt->extSubSystem); ctxt->extSubSystem = NULL; if (ctxt->myDoc != NULL) xmlFreeDoc(ctxt->myDoc); ctxt->myDoc = NULL; ctxt->standalone = -1; ctxt->hasExternalSubset = 0; ctxt->hasPErefs = 0; ctxt->html = 1; ctxt->external = 0; ctxt->instate = XML_PARSER_START; ctxt->token = 0; ctxt->wellFormed = 1; ctxt->nsWellFormed = 1; ctxt->valid = 1; ctxt->vctxt.userData = ctxt; ctxt->vctxt.error = xmlParserValidityError; ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->record_info = 0; ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; ctxt->depth = 0; ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; xmlInitNodeInfoSeq(&ctxt->node_seq); if (ctxt->attsDefault != NULL) { xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); ctxt->attsDefault = NULL; } if (ctxt->attsSpecial != NULL) { xmlHashFree(ctxt->attsSpecial, NULL); ctxt->attsSpecial = NULL; } } /** * htmlCtxtUseOptions: * @ctxt: an HTML parser context * @options: a combination of htmlParserOption(s) * * Applies the options to the parser context * * Returns 0 in case of success, the set of unknown or unimplemented options * in case of error. */ int htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) { if (ctxt == NULL) return(-1); if (options & HTML_PARSE_NOWARNING) { ctxt->sax->warning = NULL; ctxt->vctxt.warning = NULL; options -= XML_PARSE_NOWARNING; ctxt->options |= XML_PARSE_NOWARNING; } if (options & HTML_PARSE_NOERROR) { ctxt->sax->error = NULL; ctxt->vctxt.error = NULL; ctxt->sax->fatalError = NULL; options -= XML_PARSE_NOERROR; ctxt->options |= XML_PARSE_NOERROR; } if (options & HTML_PARSE_PEDANTIC) { ctxt->pedantic = 1; options -= XML_PARSE_PEDANTIC; ctxt->options |= XML_PARSE_PEDANTIC; } else ctxt->pedantic = 0; if (options & XML_PARSE_NOBLANKS) { ctxt->keepBlanks = 0; ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; options -= XML_PARSE_NOBLANKS; ctxt->options |= XML_PARSE_NOBLANKS; } else ctxt->keepBlanks = 1; if (options & HTML_PARSE_RECOVER) { ctxt->recovery = 1; } else ctxt->recovery = 0; if (options & HTML_PARSE_COMPACT) { ctxt->options |= HTML_PARSE_COMPACT; options -= HTML_PARSE_COMPACT; } ctxt->dictNames = 0; return (options); } /** * htmlDoRead: * @ctxt: an HTML parser context * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * @reuse: keep the context for reuse * * Common front-end for the htmlRead functions * * Returns the resulting document tree or NULL */ static htmlDocPtr htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, int options, int reuse) { htmlDocPtr ret; htmlCtxtUseOptions(ctxt, options); ctxt->html = 1; if (encoding != NULL) { xmlCharEncodingHandlerPtr hdlr; hdlr = xmlFindCharEncodingHandler(encoding); if (hdlr != NULL) xmlSwitchToEncoding(ctxt, hdlr); } if ((URL != NULL) && (ctxt->input != NULL) && (ctxt->input->filename == NULL)) ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); htmlParseDocument(ctxt); ret = ctxt->myDoc; ctxt->myDoc = NULL; if (!reuse) { if ((ctxt->dictNames) && (ret != NULL) && (ret->dict == ctxt->dict)) ctxt->dict = NULL; xmlFreeParserCtxt(ctxt); } return (ret); } /** * htmlReadDoc: * @cur: a pointer to a zero terminated string * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML in-memory document and build a tree. * * Returns the resulting document tree */ htmlDocPtr htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) { htmlParserCtxtPtr ctxt; if (cur == NULL) return (NULL); ctxt = xmlCreateDocParserCtxt(cur); if (ctxt == NULL) return (NULL); return (htmlDoRead(ctxt, URL, encoding, options, 0)); } /** * htmlReadFile: * @filename: a file or URL * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML file from the filesystem or the network. * * Returns the resulting document tree */ htmlDocPtr htmlReadFile(const char *filename, const char *encoding, int options) { htmlParserCtxtPtr ctxt; ctxt = htmlCreateFileParserCtxt(filename, encoding); if (ctxt == NULL) return (NULL); return (htmlDoRead(ctxt, NULL, NULL, options, 0)); } /** * htmlReadMemory: * @buffer: a pointer to a char array * @size: the size of the array * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML in-memory document and build a tree. * * Returns the resulting document tree */ htmlDocPtr htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) { htmlParserCtxtPtr ctxt; ctxt = xmlCreateMemoryParserCtxt(buffer, size); if (ctxt == NULL) return (NULL); if (ctxt->sax != NULL) memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); return (htmlDoRead(ctxt, URL, encoding, options, 0)); } /** * htmlReadFd: * @fd: an open file descriptor * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML from a file descriptor and build a tree. * * Returns the resulting document tree */ htmlDocPtr htmlReadFd(int fd, const char *URL, const char *encoding, int options) { htmlParserCtxtPtr ctxt; xmlParserInputBufferPtr input; xmlParserInputPtr stream; if (fd < 0) return (NULL); input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); if (input == NULL) return (NULL); ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { xmlFreeParserInputBuffer(input); return (NULL); } stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (stream == NULL) { xmlFreeParserInputBuffer(input); xmlFreeParserCtxt(ctxt); return (NULL); } inputPush(ctxt, stream); return (htmlDoRead(ctxt, URL, encoding, options, 0)); } /** * htmlReadIO: * @ioread: an I/O read function * @ioclose: an I/O close function * @ioctx: an I/O handler * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an HTML document from I/O functions and source and build a tree. * * Returns the resulting document tree */ htmlDocPtr htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, const char *URL, const char *encoding, int options) { htmlParserCtxtPtr ctxt; xmlParserInputBufferPtr input; xmlParserInputPtr stream; if (ioread == NULL) return (NULL); input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, XML_CHAR_ENCODING_NONE); if (input == NULL) return (NULL); ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { xmlFreeParserInputBuffer(input); return (NULL); } stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (stream == NULL) { xmlFreeParserInputBuffer(input); xmlFreeParserCtxt(ctxt); return (NULL); } inputPush(ctxt, stream); return (htmlDoRead(ctxt, URL, encoding, options, 0)); } /** * htmlCtxtReadDoc: * @ctxt: an HTML parser context * @cur: a pointer to a zero terminated string * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML in-memory document and build a tree. * This reuses the existing @ctxt parser context * * Returns the resulting document tree */ htmlDocPtr htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, const char *URL, const char *encoding, int options) { xmlParserInputPtr stream; if (cur == NULL) return (NULL); if (ctxt == NULL) return (NULL); htmlCtxtReset(ctxt); stream = xmlNewStringInputStream(ctxt, cur); if (stream == NULL) { return (NULL); } inputPush(ctxt, stream); return (htmlDoRead(ctxt, URL, encoding, options, 1)); } /** * htmlCtxtReadFile: * @ctxt: an HTML parser context * @filename: a file or URL * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML file from the filesystem or the network. * This reuses the existing @ctxt parser context * * Returns the resulting document tree */ htmlDocPtr htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, const char *encoding, int options) { xmlParserInputPtr stream; if (filename == NULL) return (NULL); if (ctxt == NULL) return (NULL); htmlCtxtReset(ctxt); stream = xmlLoadExternalEntity(filename, NULL, ctxt); if (stream == NULL) { return (NULL); } inputPush(ctxt, stream); return (htmlDoRead(ctxt, NULL, encoding, options, 1)); } /** * htmlCtxtReadMemory: * @ctxt: an HTML parser context * @buffer: a pointer to a char array * @size: the size of the array * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML in-memory document and build a tree. * This reuses the existing @ctxt parser context * * Returns the resulting document tree */ htmlDocPtr htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, const char *URL, const char *encoding, int options) { xmlParserInputBufferPtr input; xmlParserInputPtr stream; if (ctxt == NULL) return (NULL); if (buffer == NULL) return (NULL); htmlCtxtReset(ctxt); input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); if (input == NULL) { return(NULL); } stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (stream == NULL) { xmlFreeParserInputBuffer(input); return(NULL); } inputPush(ctxt, stream); return (htmlDoRead(ctxt, URL, encoding, options, 1)); } /** * htmlCtxtReadFd: * @ctxt: an HTML parser context * @fd: an open file descriptor * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an XML from a file descriptor and build a tree. * This reuses the existing @ctxt parser context * * Returns the resulting document tree */ htmlDocPtr htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, const char *URL, const char *encoding, int options) { xmlParserInputBufferPtr input; xmlParserInputPtr stream; if (fd < 0) return (NULL); if (ctxt == NULL) return (NULL); htmlCtxtReset(ctxt); input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); if (input == NULL) return (NULL); stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (stream == NULL) { xmlFreeParserInputBuffer(input); return (NULL); } inputPush(ctxt, stream); return (htmlDoRead(ctxt, URL, encoding, options, 1)); } /** * htmlCtxtReadIO: * @ctxt: an HTML parser context * @ioread: an I/O read function * @ioclose: an I/O close function * @ioctx: an I/O handler * @URL: the base URL to use for the document * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * * parse an HTML document from I/O functions and source and build a tree. * This reuses the existing @ctxt parser context * * Returns the resulting document tree */ htmlDocPtr htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, const char *URL, const char *encoding, int options) { xmlParserInputBufferPtr input; xmlParserInputPtr stream; if (ioread == NULL) return (NULL); if (ctxt == NULL) return (NULL); htmlCtxtReset(ctxt); input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, XML_CHAR_ENCODING_NONE); if (input == NULL) return (NULL); stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (stream == NULL) { xmlFreeParserInputBuffer(input); return (NULL); } inputPush(ctxt, stream); return (htmlDoRead(ctxt, URL, encoding, options, 1)); } #define bottom_HTMLparser #include "elfgcchack.h" #endif /* LIBXML_HTML_ENABLED */