diff --git a/LICENSE.xmlparser b/LICENSE.xmlparser new file mode 100644 index 0000000..ea3a3be --- /dev/null +++ b/LICENSE.xmlparser @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2011-2022 Hiltjo Posthuma + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/fetch.c b/fetch.c index ff76730..c948fda 100644 --- a/fetch.c +++ b/fetch.c @@ -2,30 +2,34 @@ #include #include "fetch.h" #include "parse.h" -#include "yxml.h" +#include "xml.h" -#define XMLBUFSIZE 4096 /* reads feed from stdin and parses */ int fetch_rss(void) { - yxml_t parser; - yxml_ret_t ret; - char c, buf[XMLBUFSIZE]; + XMLParser x = { 0 }; - yxml_init(&parser, buf, XMLBUFSIZE); + x.xmlattr = xmlattr; + x.xmlattrend = xmlattrend; + x.xmlattrstart = xmlattrstart; + x.xmlattrentity = xmlattrentity; + x.xmlcdatastart = xmlcdatastart; + x.xmlcdata = xmlcdata; + x.xmlcdataend = xmlcdataend; + x.xmlcommentstart = xmlcommentstart; + x.xmlcomment = xmlcomment; + x.xmlcommentend = xmlcommentend; + x.xmldata = xmldata; + x.xmldataend = xmldataend; + x.xmldataentity = xmldataentity; + x.xmldatastart = xmldatastart; + x.xmltagend = xmltagend; + x.xmltagstart = xmltagstart; + x.xmltagstartparsed = xmltagstartparsed; - /* read from stdin 1 byte at a time */ - while (read(0, &c, 1) > 0 && c != '\0') { - ret = yxml_parse(&parser, c); - if (parse_item(&parser, ret) < 0) { - fprintf(stderr, "failed to parse item(s)\n"); - return -1; - } - } - - if (yxml_eof(&parser) < 0) - fprintf(stderr, "raw feed had invalid xml.\n"); + x.getnext = getchar; + xml_parse(&x); return 0; } diff --git a/xml.c b/xml.c new file mode 100644 index 0000000..e9de86d --- /dev/null +++ b/xml.c @@ -0,0 +1,454 @@ +#include +#include +#include +#include + +#include "xml.h" + +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) + +static void +xml_parseattrs(XMLParser *x) +{ + size_t namelen = 0, valuelen; + int c, endsep, endname = 0, valuestart = 0; + + while ((c = GETNEXT()) != EOF) { + if (ISSPACE(c)) { + if (namelen) + endname = 1; + continue; + } else if (c == '?') + ; /* ignore */ + else if (c == '=') { + x->name[namelen] = '\0'; + valuestart = 1; + endname = 1; + } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) { + /* attribute without value */ + x->name[namelen] = '\0'; + if (x->xmlattrstart) + x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0); + if (x->xmlattrend) + x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); + endname = 0; + x->name[0] = c; + namelen = 1; + } else if (namelen && valuestart) { + /* attribute with value */ + if (x->xmlattrstart) + x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); + + valuelen = 0; + if (c == '\'' || c == '"') { + endsep = c; + } else { + endsep = ' '; /* ISSPACE() */ + goto startvalue; + } + + while ((c = GETNEXT()) != EOF) { +startvalue: + if (c == '&') { /* entities */ + x->data[valuelen] = '\0'; + /* call data function with data before entity if there is data */ + if (valuelen && x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + while ((c = GETNEXT()) != EOF) { + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) + break; + if (valuelen < sizeof(x->data) - 1) + x->data[valuelen++] = c; + else { + /* entity too long for buffer, handle as normal data */ + x->data[valuelen] = '\0'; + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + break; + } + if (c == ';') { + x->data[valuelen] = '\0'; + if (x->xmlattrentity) + x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + valuelen = 0; + break; + } + } + } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) { + if (valuelen < sizeof(x->data) - 1) { + x->data[valuelen++] = c; + } else { + x->data[valuelen] = '\0'; + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + } + } + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) { + x->data[valuelen] = '\0'; + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + if (x->xmlattrend) + x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); + break; + } + } + namelen = endname = valuestart = 0; + } else if (namelen < sizeof(x->name) - 1) { + x->name[namelen++] = c; + } + if (c == '>') { + break; + } else if (c == '/') { + x->isshorttag = 1; + x->name[0] = '\0'; + namelen = 0; + } + } +} + +static void +xml_parsecomment(XMLParser *x) +{ + size_t datalen = 0, i = 0; + int c; + + if (x->xmlcommentstart) + x->xmlcommentstart(x); + while ((c = GETNEXT()) != EOF) { + if (c == '-' || c == '>') { + if (x->xmlcomment && datalen) { + x->data[datalen] = '\0'; + x->xmlcomment(x, x->data, datalen); + datalen = 0; + } + } + + if (c == '-') { + if (++i > 2) { + if (x->xmlcomment) + for (; i > 2; i--) + x->xmlcomment(x, "-", 1); + i = 2; + } + continue; + } else if (c == '>' && i == 2) { + if (x->xmlcommentend) + x->xmlcommentend(x); + return; + } else if (i) { + if (x->xmlcomment) { + for (; i > 0; i--) + x->xmlcomment(x, "-", 1); + } + i = 0; + } + + if (datalen < sizeof(x->data) - 1) { + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if (x->xmlcomment) + x->xmlcomment(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } +} + +static void +xml_parsecdata(XMLParser *x) +{ + size_t datalen = 0, i = 0; + int c; + + if (x->xmlcdatastart) + x->xmlcdatastart(x); + while ((c = GETNEXT()) != EOF) { + if (c == ']' || c == '>') { + if (x->xmlcdata && datalen) { + x->data[datalen] = '\0'; + x->xmlcdata(x, x->data, datalen); + datalen = 0; + } + } + + if (c == ']') { + if (++i > 2) { + if (x->xmlcdata) + for (; i > 2; i--) + x->xmlcdata(x, "]", 1); + i = 2; + } + continue; + } else if (c == '>' && i == 2) { + if (x->xmlcdataend) + x->xmlcdataend(x); + return; + } else if (i) { + if (x->xmlcdata) + for (; i > 0; i--) + x->xmlcdata(x, "]", 1); + i = 0; + } + + if (datalen < sizeof(x->data) - 1) { + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if (x->xmlcdata) + x->xmlcdata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } +} + +static int +codepointtoutf8(long r, char *s) +{ + if (r == 0) { + return 0; /* NUL byte */ + } else if (r <= 0x7F) { + /* 1 byte: 0aaaaaaa */ + s[0] = r; + return 1; + } else if (r <= 0x07FF) { + /* 2 bytes: 00000aaa aabbbbbb */ + s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ + s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ + return 2; + } else if (r <= 0xFFFF) { + /* 3 bytes: aaaabbbb bbcccccc */ + s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ + s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ + s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ + return 3; + } else { + /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ + s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ + s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ + s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ + s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ + return 4; + } +} + +static int +namedentitytostr(const char *e, char *buf, size_t bufsiz) +{ + static const struct { + const char *entity; + int c; + } entities[] = { + { "amp;", '&' }, + { "lt;", '<' }, + { "gt;", '>' }, + { "apos;", '\'' }, + { "quot;", '"' }, + }; + size_t i; + + /* buffer is too small */ + if (bufsiz < 2) + return -1; + + for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) { + if (!strcmp(e, entities[i].entity)) { + buf[0] = entities[i].c; + buf[1] = '\0'; + return 1; + } + } + return -1; +} + +static int +numericentitytostr(const char *e, char *buf, size_t bufsiz) +{ + long l; + int len; + char *end; + + /* buffer is too small */ + if (bufsiz < 5) + return -1; + + errno = 0; + /* hex (16) or decimal (10) */ + if (*e == 'x') + l = strtol(++e, &end, 16); + else + l = strtol(e, &end, 10); + /* invalid value or not a well-formed entity or invalid code point */ + if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff || + (l >= 0xd800 && l <= 0xdfff)) + return -1; + len = codepointtoutf8(l, buf); + buf[len] = '\0'; + + return len; +} + +/* convert named- or numeric entity string to buffer string + * returns byte-length of string or -1 on failure. */ +int +xml_entitytostr(const char *e, char *buf, size_t bufsiz) +{ + /* doesn't start with & */ + if (e[0] != '&') + return -1; + /* numeric entity */ + if (e[1] == '#') + return numericentitytostr(e + 2, buf, bufsiz); + else /* named entity */ + return namedentitytostr(e + 1, buf, bufsiz); +} + +void +xml_parse(XMLParser *x) +{ + size_t datalen, tagdatalen; + int c, isend; + + while ((c = GETNEXT()) != EOF && c != '<') + ; /* skip until < */ + + while (c != EOF) { + if (c == '<') { /* parse tag */ + if ((c = GETNEXT()) == EOF) + return; + + if (c == '!') { /* CDATA and comments */ + for (tagdatalen = 0; (c = GETNEXT()) != EOF;) { + /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */ + if (tagdatalen <= sizeof("[CDATA[") - 1) + x->data[tagdatalen++] = c; + if (c == '>') + break; + else if (c == '-' && tagdatalen == sizeof("--") - 1 && + (x->data[0] == '-')) { + xml_parsecomment(x); + break; + } else if (c == '[') { + if (tagdatalen == sizeof("[CDATA[") - 1 && + !strncmp(x->data, "[CDATA[", tagdatalen)) { + xml_parsecdata(x); + break; + } + } + } + } else { + /* normal tag (open, short open, close), processing instruction. */ + x->tag[0] = c; + x->taglen = 1; + x->isshorttag = isend = 0; + + /* treat processing instruction as shorttag, don't strip "?" prefix. */ + if (c == '?') { + x->isshorttag = 1; + } else if (c == '/') { + if ((c = GETNEXT()) == EOF) + return; + x->tag[0] = c; + isend = 1; + } + + while ((c = GETNEXT()) != EOF) { + if (c == '/') + x->isshorttag = 1; /* short tag */ + else if (c == '>' || ISSPACE(c)) { + x->tag[x->taglen] = '\0'; + if (isend) { /* end tag, starts with xmltagend) + x->xmltagend(x, x->tag, x->taglen, x->isshorttag); + x->tag[0] = '\0'; + x->taglen = 0; + } else { + /* start tag */ + if (x->xmltagstart) + x->xmltagstart(x, x->tag, x->taglen); + if (ISSPACE(c)) + xml_parseattrs(x); + if (x->xmltagstartparsed) + x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); + } + /* call tagend for shortform or processing instruction */ + if (x->isshorttag) { + if (x->xmltagend) + x->xmltagend(x, x->tag, x->taglen, x->isshorttag); + x->tag[0] = '\0'; + x->taglen = 0; + } + break; + } else if (x->taglen < sizeof(x->tag) - 1) + x->tag[x->taglen++] = c; /* NOTE: tag name truncation */ + } + } + } else { + /* parse tag data */ + datalen = 0; + if (x->xmldatastart) + x->xmldatastart(x); + while ((c = GETNEXT()) != EOF) { + if (c == '&') { + if (datalen) { + x->data[datalen] = '\0'; + if (x->xmldata) + x->xmldata(x, x->data, datalen); + } + x->data[0] = c; + datalen = 1; + while ((c = GETNEXT()) != EOF) { + if (c == '<') + break; + if (datalen < sizeof(x->data) - 1) + x->data[datalen++] = c; + else { + /* entity too long for buffer, handle as normal data */ + x->data[datalen] = '\0'; + if (x->xmldata) + x->xmldata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + break; + } + if (c == ';') { + x->data[datalen] = '\0'; + if (x->xmldataentity) + x->xmldataentity(x, x->data, datalen); + datalen = 0; + break; + } + } + } else if (c != '<') { + if (datalen < sizeof(x->data) - 1) { + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if (x->xmldata) + x->xmldata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } + if (c == '<') { + x->data[datalen] = '\0'; + if (x->xmldata && datalen) + x->xmldata(x, x->data, datalen); + if (x->xmldataend) + x->xmldataend(x); + break; + } + } + } + } +} diff --git a/xml.h b/xml.h new file mode 100644 index 0000000..22bb88e --- /dev/null +++ b/xml.h @@ -0,0 +1,49 @@ +#ifndef _XML_H_ +#define _XML_H_ + +#include + +typedef struct xmlparser { + /* handlers */ + void (*xmlattr)(struct xmlparser *, const char *, size_t, + const char *, size_t, const char *, size_t); + void (*xmlattrend)(struct xmlparser *, const char *, size_t, + const char *, size_t); + void (*xmlattrstart)(struct xmlparser *, const char *, size_t, + const char *, size_t); + void (*xmlattrentity)(struct xmlparser *, const char *, size_t, + const char *, size_t, const char *, size_t); + void (*xmlcdatastart)(struct xmlparser *); + void (*xmlcdata)(struct xmlparser *, const char *, size_t); + void (*xmlcdataend)(struct xmlparser *); + void (*xmlcommentstart)(struct xmlparser *); + void (*xmlcomment)(struct xmlparser *, const char *, size_t); + void (*xmlcommentend)(struct xmlparser *); + void (*xmldata)(struct xmlparser *, const char *, size_t); + void (*xmldataend)(struct xmlparser *); + void (*xmldataentity)(struct xmlparser *, const char *, size_t); + void (*xmldatastart)(struct xmlparser *); + void (*xmltagend)(struct xmlparser *, const char *, size_t, int); + void (*xmltagstart)(struct xmlparser *, const char *, size_t); + void (*xmltagstartparsed)(struct xmlparser *, const char *, + size_t, int); + +#ifndef GETNEXT + #define GETNEXT (x)->getnext + int (*getnext)(void); +#endif + + /* current tag */ + char tag[1024]; + size_t taglen; + /* current tag is in shortform ? */ + int isshorttag; + /* current attribute name */ + char name[1024]; + /* data buffer used for tag data, CDATA and attribute data */ + char data[BUFSIZ]; +} XMLParser; + +int xml_entitytostr(const char *, char *, size_t); +void xml_parse(XMLParser *); +#endif diff --git a/yxml.c b/yxml.c deleted file mode 100644 index ec51193..0000000 --- a/yxml.c +++ /dev/null @@ -1,1060 +0,0 @@ -/* This file is generated by yxml-gen.pl using yxml-states and yxml.c.in as input files. - * It is preferable to edit those files instead of this one if you want to make a change. - * The source files can be found through the homepage: https://dev.yorhel.nl/yxml */ - -/* Copyright (c) 2013-2014 Yoran Heling - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "yxml.h" -#include - -typedef enum { - YXMLS_string, - YXMLS_attr0, - YXMLS_attr1, - YXMLS_attr2, - YXMLS_attr3, - YXMLS_attr4, - YXMLS_cd0, - YXMLS_cd1, - YXMLS_cd2, - YXMLS_comment0, - YXMLS_comment1, - YXMLS_comment2, - YXMLS_comment3, - YXMLS_comment4, - YXMLS_dt0, - YXMLS_dt1, - YXMLS_dt2, - YXMLS_dt3, - YXMLS_dt4, - YXMLS_elem0, - YXMLS_elem1, - YXMLS_elem2, - YXMLS_elem3, - YXMLS_enc0, - YXMLS_enc1, - YXMLS_enc2, - YXMLS_enc3, - YXMLS_etag0, - YXMLS_etag1, - YXMLS_etag2, - YXMLS_init, - YXMLS_le0, - YXMLS_le1, - YXMLS_le2, - YXMLS_le3, - YXMLS_lee1, - YXMLS_lee2, - YXMLS_leq0, - YXMLS_misc0, - YXMLS_misc1, - YXMLS_misc2, - YXMLS_misc2a, - YXMLS_misc3, - YXMLS_pi0, - YXMLS_pi1, - YXMLS_pi2, - YXMLS_pi3, - YXMLS_pi4, - YXMLS_std0, - YXMLS_std1, - YXMLS_std2, - YXMLS_std3, - YXMLS_ver0, - YXMLS_ver1, - YXMLS_ver2, - YXMLS_ver3, - YXMLS_xmldecl0, - YXMLS_xmldecl1, - YXMLS_xmldecl2, - YXMLS_xmldecl3, - YXMLS_xmldecl4, - YXMLS_xmldecl5, - YXMLS_xmldecl6, - YXMLS_xmldecl7, - YXMLS_xmldecl8, - YXMLS_xmldecl9 -} yxml_state_t; - - -#define yxml_isChar(c) 1 -/* 0xd should be part of SP, too, but yxml_parse() already normalizes that into 0xa */ -#define yxml_isSP(c) (c == 0x20 || c == 0x09 || c == 0x0a) -#define yxml_isAlpha(c) ((c|32)-'a' < 26) -#define yxml_isNum(c) (c-'0' < 10) -#define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6) -#define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-') -#define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_' || c >= 128) -#define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.') -/* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */ -#define yxml_isAttValue(c) (yxml_isChar(c) && c != x->quote && c != '<' && c != '&') -/* Anything between '&' and ';', the yxml_ref* functions will do further - * validation. Strictly speaking, this is "yxml_isName(c) || c == '#'", but - * this parser doesn't understand entities with '.', ':', etc, anwyay. */ -#define yxml_isRef(c) (yxml_isNum(c) || yxml_isAlpha(c) || c == '#') - -#define INTFROM5CHARS(a, b, c, d, e) ((((uint64_t)(a))<<32) | (((uint64_t)(b))<<24) | (((uint64_t)(c))<<16) | (((uint64_t)(d))<<8) | (uint64_t)(e)) - - -/* Set the given char value to ch (0<=ch<=255). */ -static inline void yxml_setchar(char *dest, unsigned ch) { - *(unsigned char *)dest = ch; -} - - -/* Similar to yxml_setchar(), but will convert ch (any valid unicode point) to - * UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */ -static void yxml_setutf8(char *dest, unsigned ch) { - if(ch <= 0x007F) - yxml_setchar(dest++, ch); - else if(ch <= 0x07FF) { - yxml_setchar(dest++, 0xC0 | (ch>>6)); - yxml_setchar(dest++, 0x80 | (ch & 0x3F)); - } else if(ch <= 0xFFFF) { - yxml_setchar(dest++, 0xE0 | (ch>>12)); - yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F)); - yxml_setchar(dest++, 0x80 | (ch & 0x3F)); - } else { - yxml_setchar(dest++, 0xF0 | (ch>>18)); - yxml_setchar(dest++, 0x80 | ((ch>>12) & 0x3F)); - yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F)); - yxml_setchar(dest++, 0x80 | (ch & 0x3F)); - } - *dest = 0; -} - - -static inline yxml_ret_t yxml_datacontent(yxml_t *x, unsigned ch) { - yxml_setchar(x->data, ch); - x->data[1] = 0; - return YXML_CONTENT; -} - - -static inline yxml_ret_t yxml_datapi1(yxml_t *x, unsigned ch) { - yxml_setchar(x->data, ch); - x->data[1] = 0; - return YXML_PICONTENT; -} - - -static inline yxml_ret_t yxml_datapi2(yxml_t *x, unsigned ch) { - x->data[0] = '?'; - yxml_setchar(x->data+1, ch); - x->data[2] = 0; - return YXML_PICONTENT; -} - - -static inline yxml_ret_t yxml_datacd1(yxml_t *x, unsigned ch) { - x->data[0] = ']'; - yxml_setchar(x->data+1, ch); - x->data[2] = 0; - return YXML_CONTENT; -} - - -static inline yxml_ret_t yxml_datacd2(yxml_t *x, unsigned ch) { - x->data[0] = ']'; - x->data[1] = ']'; - yxml_setchar(x->data+2, ch); - x->data[3] = 0; - return YXML_CONTENT; -} - - -static inline yxml_ret_t yxml_dataattr(yxml_t *x, unsigned ch) { - /* Normalize attribute values according to the XML spec section 3.3.3. */ - yxml_setchar(x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch); - x->data[1] = 0; - return YXML_ATTRVAL; -} - - -static yxml_ret_t yxml_pushstack(yxml_t *x, char **res, unsigned ch) { - if(x->stacklen+2 >= x->stacksize) - return YXML_ESTACK; - x->stacklen++; - *res = (char *)x->stack+x->stacklen; - x->stack[x->stacklen] = ch; - x->stacklen++; - x->stack[x->stacklen] = 0; - return YXML_OK; -} - - -static yxml_ret_t yxml_pushstackc(yxml_t *x, unsigned ch) { - if(x->stacklen+1 >= x->stacksize) - return YXML_ESTACK; - x->stack[x->stacklen] = ch; - x->stacklen++; - x->stack[x->stacklen] = 0; - return YXML_OK; -} - - -static void yxml_popstack(yxml_t *x) { - do - x->stacklen--; - while(x->stack[x->stacklen]); -} - - -static inline yxml_ret_t yxml_elemstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->elem, ch); } -static inline yxml_ret_t yxml_elemname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } -static inline yxml_ret_t yxml_elemnameend(yxml_t *x, unsigned ch) { return YXML_ELEMSTART; } - - -/* Also used in yxml_elemcloseend(), since this function just removes the last - * element from the stack and returns ELEMEND. */ -static yxml_ret_t yxml_selfclose(yxml_t *x, unsigned ch) { - yxml_popstack(x); - if(x->stacklen) { - x->elem = (char *)x->stack+x->stacklen-1; - while(*(x->elem-1)) - x->elem--; - return YXML_ELEMEND; - } - x->elem = (char *)x->stack; - x->state = YXMLS_misc3; - return YXML_ELEMEND; -} - - -static inline yxml_ret_t yxml_elemclose(yxml_t *x, unsigned ch) { - if(*((unsigned char *)x->elem) != ch) - return YXML_ECLOSE; - x->elem++; - return YXML_OK; -} - - -static inline yxml_ret_t yxml_elemcloseend(yxml_t *x, unsigned ch) { - if(*x->elem) - return YXML_ECLOSE; - return yxml_selfclose(x, ch); -} - - -static inline yxml_ret_t yxml_attrstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->attr, ch); } -static inline yxml_ret_t yxml_attrname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } -static inline yxml_ret_t yxml_attrnameend(yxml_t *x, unsigned ch) { return YXML_ATTRSTART; } -static inline yxml_ret_t yxml_attrvalend (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_ATTREND; } - - -static inline yxml_ret_t yxml_pistart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); } -static inline yxml_ret_t yxml_piname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } -static inline yxml_ret_t yxml_piabort (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_OK; } -static inline yxml_ret_t yxml_pinameend(yxml_t *x, unsigned ch) { - return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART; -} -static inline yxml_ret_t yxml_pivalend (yxml_t *x, unsigned ch) { yxml_popstack(x); x->pi = (char *)x->stack; return YXML_PIEND; } - - -static inline yxml_ret_t yxml_refstart(yxml_t *x, unsigned ch) { - memset(x->data, 0, sizeof(x->data)); - x->reflen = 0; - return YXML_OK; -} - - -static yxml_ret_t yxml_ref(yxml_t *x, unsigned ch) { - if(x->reflen >= sizeof(x->data)-1) - return YXML_EREF; - yxml_setchar(x->data+x->reflen, ch); - x->reflen++; - return YXML_OK; -} - - -static yxml_ret_t yxml_refend(yxml_t *x, yxml_ret_t ret) { - unsigned char *r = (unsigned char *)x->data; - unsigned ch = 0; - if(*r == '#') { - if(r[1] == 'x') - for(r += 2; yxml_isHex((unsigned)*r); r++) - ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10); - else - for(r++; yxml_isNum((unsigned)*r); r++) - ch = (ch*10) + (*r-'0'); - if(*r) - ch = 0; - } else { - uint64_t i = INTFROM5CHARS(r[0], r[1], r[2], r[3], r[4]); - ch = - i == INTFROM5CHARS('l','t', 0, 0, 0) ? '<' : - i == INTFROM5CHARS('g','t', 0, 0, 0) ? '>' : - i == INTFROM5CHARS('a','m','p', 0, 0) ? '&' : - i == INTFROM5CHARS('a','p','o','s',0) ? '\'': - i == INTFROM5CHARS('q','u','o','t',0) ? '"' : 0; - } - - /* Codepoints not allowed in the XML 1.1 definition of a Char */ - if(!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF) - return YXML_EREF; - yxml_setutf8(x->data, ch); - return ret; -} - - -static inline yxml_ret_t yxml_refcontent(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_CONTENT); } -static inline yxml_ret_t yxml_refattrval(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_ATTRVAL); } - - -void yxml_init(yxml_t *x, void *stack, size_t stacksize) { - memset(x, 0, sizeof(*x)); - x->line = 1; - x->stack = (unsigned char*)stack; - x->stacksize = stacksize; - *x->stack = 0; - x->elem = x->pi = x->attr = (char *)x->stack; - x->state = YXMLS_init; -} - - -yxml_ret_t yxml_parse(yxml_t *x, int _ch) { - /* Ensure that characters are in the range of 0..255 rather than -126..125. - * All character comparisons are done with positive integers. */ - unsigned ch = (unsigned)(_ch+256) & 0xff; - if(!ch) - return YXML_ESYN; - x->total++; - - /* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and - * normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds - * some non-ASCII character sequences to this list, but we can only handle - * ASCII here without making assumptions about the input encoding. */ - if(x->ignore == ch) { - x->ignore = 0; - return YXML_OK; - } - x->ignore = (ch == 0xd) * 0xa; - if(ch == 0xa || ch == 0xd) { - ch = 0xa; - x->line++; - x->byte = 0; - } - x->byte++; - - switch((yxml_state_t)x->state) { - case YXMLS_string: - if(ch == *x->string) { - x->string++; - if(!*x->string) - x->state = x->nextstate; - return YXML_OK; - } - break; - case YXMLS_attr0: - if(yxml_isName(ch)) - return yxml_attrname(x, ch); - if(yxml_isSP(ch)) { - x->state = YXMLS_attr1; - return yxml_attrnameend(x, ch); - } - if(ch == (unsigned char)'=') { - x->state = YXMLS_attr2; - return yxml_attrnameend(x, ch); - } - break; - case YXMLS_attr1: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'=') { - x->state = YXMLS_attr2; - return YXML_OK; - } - break; - case YXMLS_attr2: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { - x->state = YXMLS_attr3; - x->quote = ch; - return YXML_OK; - } - break; - case YXMLS_attr3: - if(yxml_isAttValue(ch)) - return yxml_dataattr(x, ch); - if(ch == (unsigned char)'&') { - x->state = YXMLS_attr4; - return yxml_refstart(x, ch); - } - if(x->quote == ch) { - x->state = YXMLS_elem2; - return yxml_attrvalend(x, ch); - } - break; - case YXMLS_attr4: - if(yxml_isRef(ch)) - return yxml_ref(x, ch); - if(ch == (unsigned char)'\x3b') { - x->state = YXMLS_attr3; - return yxml_refattrval(x, ch); - } - break; - case YXMLS_cd0: - if(ch == (unsigned char)']') { - x->state = YXMLS_cd1; - return YXML_OK; - } - if(yxml_isChar(ch)) - return yxml_datacontent(x, ch); - break; - case YXMLS_cd1: - if(ch == (unsigned char)']') { - x->state = YXMLS_cd2; - return YXML_OK; - } - if(yxml_isChar(ch)) { - x->state = YXMLS_cd0; - return yxml_datacd1(x, ch); - } - break; - case YXMLS_cd2: - if(ch == (unsigned char)']') - return yxml_datacontent(x, ch); - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc2; - return YXML_OK; - } - if(yxml_isChar(ch)) { - x->state = YXMLS_cd0; - return yxml_datacd2(x, ch); - } - break; - case YXMLS_comment0: - if(ch == (unsigned char)'-') { - x->state = YXMLS_comment1; - return YXML_OK; - } - break; - case YXMLS_comment1: - if(ch == (unsigned char)'-') { - x->state = YXMLS_comment2; - return YXML_OK; - } - break; - case YXMLS_comment2: - if(ch == (unsigned char)'-') { - x->state = YXMLS_comment3; - return YXML_OK; - } - if(yxml_isChar(ch)) - return YXML_OK; - break; - case YXMLS_comment3: - if(ch == (unsigned char)'-') { - x->state = YXMLS_comment4; - return YXML_OK; - } - if(yxml_isChar(ch)) { - x->state = YXMLS_comment2; - return YXML_OK; - } - break; - case YXMLS_comment4: - if(ch == (unsigned char)'>') { - x->state = x->nextstate; - return YXML_OK; - } - break; - case YXMLS_dt0: - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc1; - return YXML_OK; - } - if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { - x->state = YXMLS_dt1; - x->quote = ch; - x->nextstate = YXMLS_dt0; - return YXML_OK; - } - if(ch == (unsigned char)'<') { - x->state = YXMLS_dt2; - return YXML_OK; - } - if(yxml_isChar(ch)) - return YXML_OK; - break; - case YXMLS_dt1: - if(x->quote == ch) { - x->state = x->nextstate; - return YXML_OK; - } - if(yxml_isChar(ch)) - return YXML_OK; - break; - case YXMLS_dt2: - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi0; - x->nextstate = YXMLS_dt0; - return YXML_OK; - } - if(ch == (unsigned char)'!') { - x->state = YXMLS_dt3; - return YXML_OK; - } - break; - case YXMLS_dt3: - if(ch == (unsigned char)'-') { - x->state = YXMLS_comment1; - x->nextstate = YXMLS_dt0; - return YXML_OK; - } - if(yxml_isChar(ch)) { - x->state = YXMLS_dt4; - return YXML_OK; - } - break; - case YXMLS_dt4: - if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { - x->state = YXMLS_dt1; - x->quote = ch; - x->nextstate = YXMLS_dt4; - return YXML_OK; - } - if(ch == (unsigned char)'>') { - x->state = YXMLS_dt0; - return YXML_OK; - } - if(yxml_isChar(ch)) - return YXML_OK; - break; - case YXMLS_elem0: - if(yxml_isName(ch)) - return yxml_elemname(x, ch); - if(yxml_isSP(ch)) { - x->state = YXMLS_elem1; - return yxml_elemnameend(x, ch); - } - if(ch == (unsigned char)'/') { - x->state = YXMLS_elem3; - return yxml_elemnameend(x, ch); - } - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc2; - return yxml_elemnameend(x, ch); - } - break; - case YXMLS_elem1: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'/') { - x->state = YXMLS_elem3; - return YXML_OK; - } - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc2; - return YXML_OK; - } - if(yxml_isNameStart(ch)) { - x->state = YXMLS_attr0; - return yxml_attrstart(x, ch); - } - break; - case YXMLS_elem2: - if(yxml_isSP(ch)) { - x->state = YXMLS_elem1; - return YXML_OK; - } - if(ch == (unsigned char)'/') { - x->state = YXMLS_elem3; - return YXML_OK; - } - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc2; - return YXML_OK; - } - break; - case YXMLS_elem3: - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc2; - return yxml_selfclose(x, ch); - } - break; - case YXMLS_enc0: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'=') { - x->state = YXMLS_enc1; - return YXML_OK; - } - break; - case YXMLS_enc1: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { - x->state = YXMLS_enc2; - x->quote = ch; - return YXML_OK; - } - break; - case YXMLS_enc2: - if(yxml_isAlpha(ch)) { - x->state = YXMLS_enc3; - return YXML_OK; - } - break; - case YXMLS_enc3: - if(yxml_isEncName(ch)) - return YXML_OK; - if(x->quote == ch) { - x->state = YXMLS_xmldecl6; - return YXML_OK; - } - break; - case YXMLS_etag0: - if(yxml_isNameStart(ch)) { - x->state = YXMLS_etag1; - return yxml_elemclose(x, ch); - } - break; - case YXMLS_etag1: - if(yxml_isName(ch)) - return yxml_elemclose(x, ch); - if(yxml_isSP(ch)) { - x->state = YXMLS_etag2; - return yxml_elemcloseend(x, ch); - } - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc2; - return yxml_elemcloseend(x, ch); - } - break; - case YXMLS_etag2: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc2; - return YXML_OK; - } - break; - case YXMLS_init: - if(ch == (unsigned char)'\xef') { - x->state = YXMLS_string; - x->nextstate = YXMLS_misc0; - x->string = (unsigned char *)"\xbb\xbf"; - return YXML_OK; - } - if(yxml_isSP(ch)) { - x->state = YXMLS_misc0; - return YXML_OK; - } - if(ch == (unsigned char)'<') { - x->state = YXMLS_le0; - return YXML_OK; - } - break; - case YXMLS_le0: - if(ch == (unsigned char)'!') { - x->state = YXMLS_lee1; - return YXML_OK; - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_leq0; - return YXML_OK; - } - if(yxml_isNameStart(ch)) { - x->state = YXMLS_elem0; - return yxml_elemstart(x, ch); - } - break; - case YXMLS_le1: - if(ch == (unsigned char)'!') { - x->state = YXMLS_lee1; - return YXML_OK; - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi0; - x->nextstate = YXMLS_misc1; - return YXML_OK; - } - if(yxml_isNameStart(ch)) { - x->state = YXMLS_elem0; - return yxml_elemstart(x, ch); - } - break; - case YXMLS_le2: - if(ch == (unsigned char)'!') { - x->state = YXMLS_lee2; - return YXML_OK; - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi0; - x->nextstate = YXMLS_misc2; - return YXML_OK; - } - if(ch == (unsigned char)'/') { - x->state = YXMLS_etag0; - return YXML_OK; - } - if(yxml_isNameStart(ch)) { - x->state = YXMLS_elem0; - return yxml_elemstart(x, ch); - } - break; - case YXMLS_le3: - if(ch == (unsigned char)'!') { - x->state = YXMLS_comment0; - x->nextstate = YXMLS_misc3; - return YXML_OK; - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi0; - x->nextstate = YXMLS_misc3; - return YXML_OK; - } - break; - case YXMLS_lee1: - if(ch == (unsigned char)'-') { - x->state = YXMLS_comment1; - x->nextstate = YXMLS_misc1; - return YXML_OK; - } - if(ch == (unsigned char)'D') { - x->state = YXMLS_string; - x->nextstate = YXMLS_dt0; - x->string = (unsigned char *)"OCTYPE"; - return YXML_OK; - } - break; - case YXMLS_lee2: - if(ch == (unsigned char)'-') { - x->state = YXMLS_comment1; - x->nextstate = YXMLS_misc2; - return YXML_OK; - } - if(ch == (unsigned char)'[') { - x->state = YXMLS_string; - x->nextstate = YXMLS_cd0; - x->string = (unsigned char *)"CDATA["; - return YXML_OK; - } - break; - case YXMLS_leq0: - if(ch == (unsigned char)'x') { - x->state = YXMLS_xmldecl0; - x->nextstate = YXMLS_misc1; - return yxml_pistart(x, ch); - } - if(yxml_isNameStart(ch)) { - x->state = YXMLS_pi1; - x->nextstate = YXMLS_misc1; - return yxml_pistart(x, ch); - } - break; - case YXMLS_misc0: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'<') { - x->state = YXMLS_le0; - return YXML_OK; - } - break; - case YXMLS_misc1: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'<') { - x->state = YXMLS_le1; - return YXML_OK; - } - break; - case YXMLS_misc2: - if(ch == (unsigned char)'<') { - x->state = YXMLS_le2; - return YXML_OK; - } - if(ch == (unsigned char)'&') { - x->state = YXMLS_misc2a; - return yxml_refstart(x, ch); - } - if(yxml_isChar(ch)) - return yxml_datacontent(x, ch); - break; - case YXMLS_misc2a: - if(yxml_isRef(ch)) - return yxml_ref(x, ch); - if(ch == (unsigned char)'\x3b') { - x->state = YXMLS_misc2; - return yxml_refcontent(x, ch); - } - break; - case YXMLS_misc3: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'<') { - x->state = YXMLS_le3; - return YXML_OK; - } - break; - case YXMLS_pi0: - if(yxml_isNameStart(ch)) { - x->state = YXMLS_pi1; - return yxml_pistart(x, ch); - } - break; - case YXMLS_pi1: - if(yxml_isName(ch)) - return yxml_piname(x, ch); - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi4; - return yxml_pinameend(x, ch); - } - if(yxml_isSP(ch)) { - x->state = YXMLS_pi2; - return yxml_pinameend(x, ch); - } - break; - case YXMLS_pi2: - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi3; - return YXML_OK; - } - if(yxml_isChar(ch)) - return yxml_datapi1(x, ch); - break; - case YXMLS_pi3: - if(ch == (unsigned char)'>') { - x->state = x->nextstate; - return yxml_pivalend(x, ch); - } - if(yxml_isChar(ch)) { - x->state = YXMLS_pi2; - return yxml_datapi2(x, ch); - } - break; - case YXMLS_pi4: - if(ch == (unsigned char)'>') { - x->state = x->nextstate; - return yxml_pivalend(x, ch); - } - break; - case YXMLS_std0: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'=') { - x->state = YXMLS_std1; - return YXML_OK; - } - break; - case YXMLS_std1: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { - x->state = YXMLS_std2; - x->quote = ch; - return YXML_OK; - } - break; - case YXMLS_std2: - if(ch == (unsigned char)'y') { - x->state = YXMLS_string; - x->nextstate = YXMLS_std3; - x->string = (unsigned char *)"es"; - return YXML_OK; - } - if(ch == (unsigned char)'n') { - x->state = YXMLS_string; - x->nextstate = YXMLS_std3; - x->string = (unsigned char *)"o"; - return YXML_OK; - } - break; - case YXMLS_std3: - if(x->quote == ch) { - x->state = YXMLS_xmldecl8; - return YXML_OK; - } - break; - case YXMLS_ver0: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'=') { - x->state = YXMLS_ver1; - return YXML_OK; - } - break; - case YXMLS_ver1: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { - x->state = YXMLS_string; - x->quote = ch; - x->nextstate = YXMLS_ver2; - x->string = (unsigned char *)"1."; - return YXML_OK; - } - break; - case YXMLS_ver2: - if(yxml_isNum(ch)) { - x->state = YXMLS_ver3; - return YXML_OK; - } - break; - case YXMLS_ver3: - if(yxml_isNum(ch)) - return YXML_OK; - if(x->quote == ch) { - x->state = YXMLS_xmldecl4; - return YXML_OK; - } - break; - case YXMLS_xmldecl0: - if(ch == (unsigned char)'m') { - x->state = YXMLS_xmldecl1; - return yxml_piname(x, ch); - } - if(yxml_isName(ch)) { - x->state = YXMLS_pi1; - return yxml_piname(x, ch); - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi4; - return yxml_pinameend(x, ch); - } - if(yxml_isSP(ch)) { - x->state = YXMLS_pi2; - return yxml_pinameend(x, ch); - } - break; - case YXMLS_xmldecl1: - if(ch == (unsigned char)'l') { - x->state = YXMLS_xmldecl2; - return yxml_piname(x, ch); - } - if(yxml_isName(ch)) { - x->state = YXMLS_pi1; - return yxml_piname(x, ch); - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_pi4; - return yxml_pinameend(x, ch); - } - if(yxml_isSP(ch)) { - x->state = YXMLS_pi2; - return yxml_pinameend(x, ch); - } - break; - case YXMLS_xmldecl2: - if(yxml_isSP(ch)) { - x->state = YXMLS_xmldecl3; - return yxml_piabort(x, ch); - } - if(yxml_isName(ch)) { - x->state = YXMLS_pi1; - return yxml_piname(x, ch); - } - break; - case YXMLS_xmldecl3: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'v') { - x->state = YXMLS_string; - x->nextstate = YXMLS_ver0; - x->string = (unsigned char *)"ersion"; - return YXML_OK; - } - break; - case YXMLS_xmldecl4: - if(yxml_isSP(ch)) { - x->state = YXMLS_xmldecl5; - return YXML_OK; - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_xmldecl9; - return YXML_OK; - } - break; - case YXMLS_xmldecl5: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'?') { - x->state = YXMLS_xmldecl9; - return YXML_OK; - } - if(ch == (unsigned char)'e') { - x->state = YXMLS_string; - x->nextstate = YXMLS_enc0; - x->string = (unsigned char *)"ncoding"; - return YXML_OK; - } - if(ch == (unsigned char)'s') { - x->state = YXMLS_string; - x->nextstate = YXMLS_std0; - x->string = (unsigned char *)"tandalone"; - return YXML_OK; - } - break; - case YXMLS_xmldecl6: - if(yxml_isSP(ch)) { - x->state = YXMLS_xmldecl7; - return YXML_OK; - } - if(ch == (unsigned char)'?') { - x->state = YXMLS_xmldecl9; - return YXML_OK; - } - break; - case YXMLS_xmldecl7: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'?') { - x->state = YXMLS_xmldecl9; - return YXML_OK; - } - if(ch == (unsigned char)'s') { - x->state = YXMLS_string; - x->nextstate = YXMLS_std0; - x->string = (unsigned char *)"tandalone"; - return YXML_OK; - } - break; - case YXMLS_xmldecl8: - if(yxml_isSP(ch)) - return YXML_OK; - if(ch == (unsigned char)'?') { - x->state = YXMLS_xmldecl9; - return YXML_OK; - } - break; - case YXMLS_xmldecl9: - if(ch == (unsigned char)'>') { - x->state = YXMLS_misc1; - return YXML_OK; - } - break; - } - return YXML_ESYN; -} - - -yxml_ret_t yxml_eof(yxml_t *x) { - if(x->state != YXMLS_misc3) - return YXML_EEOF; - return YXML_OK; -} - - -/* vim: set noet sw=4 ts=4: */ diff --git a/yxml.h b/yxml.h deleted file mode 100644 index a0cc5f9..0000000 --- a/yxml.h +++ /dev/null @@ -1,162 +0,0 @@ -/* Copyright (c) 2013-2014 Yoran Heling - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef YXML_H -#define YXML_H - -#include -#include - -#if defined(_MSC_VER) && !defined(__cplusplus) && !defined(inline) -#define inline __inline -#endif - -/* Full API documentation for this library can be found in the "yxml.md" file - * in the yxml git repository, or online at http://dev.yorhel.nl/yxml/man */ - -typedef enum { - YXML_EEOF = -5, /* Unexpected EOF */ - YXML_EREF = -4, /* Invalid character or entity reference (&whatever;) */ - YXML_ECLOSE = -3, /* Close tag does not match open tag ( .. ) */ - YXML_ESTACK = -2, /* Stack overflow (too deeply nested tags or too long element/attribute name) */ - YXML_ESYN = -1, /* Syntax error (unexpected byte) */ - YXML_OK = 0, /* Character consumed, no new token present */ - YXML_ELEMSTART = 1, /* Start of an element: '' or '' */ - YXML_ATTRSTART = 4, /* Attribute: 'Name=..' */ - YXML_ATTRVAL = 5, /* Attribute value */ - YXML_ATTREND = 6, /* End of attribute '.."' */ - YXML_PISTART = 7, /* Start of a processing instruction */ - YXML_PICONTENT = 8, /* Content of a PI */ - YXML_PIEND = 9 /* End of a processing instruction */ -} yxml_ret_t; - -/* When, exactly, are tokens returned? - * - * ' ELEMSTART - * '/' ELEMSTART, '>' ELEMEND - * ' ' ELEMSTART - * '>' - * '/', '>' ELEMEND - * Attr - * '=' ATTRSTART - * "X ATTRVAL - * 'Y' ATTRVAL - * 'Z' ATTRVAL - * '"' ATTREND - * '>' - * '/', '>' ELEMEND - * - * ' ELEMEND - */ - - -typedef struct { - /* PUBLIC (read-only) */ - - /* Name of the current element, zero-length if not in any element. Changed - * after YXML_ELEMSTART. The pointer will remain valid up to and including - * the next non-YXML_ATTR* token, the pointed-to buffer will remain valid - * up to and including the YXML_ELEMEND for the corresponding element. */ - char *elem; - - /* The last read character(s) of an attribute value (YXML_ATTRVAL), element - * data (YXML_CONTENT), or processing instruction (YXML_PICONTENT). Changed - * after one of the respective YXML_ values is returned, and only valid - * until the next yxml_parse() call. Usually, this string only consists of - * a single byte, but multiple bytes are returned in the following cases: - * - "": The two characters "?x" - * - "": The two characters "]x" - * - "": The three characters "]]x" - * - "&#N;" and "&#xN;", where dec(n) > 127. The referenced Unicode - * character is then encoded in multiple UTF-8 bytes. - */ - char data[8]; - - /* Name of the current attribute. Changed after YXML_ATTRSTART, valid up to - * and including the next YXML_ATTREND. */ - char *attr; - - /* Name/target of the current processing instruction, zero-length if not in - * a PI. Changed after YXML_PISTART, valid up to (but excluding) - * the next YXML_PIEND. */ - char *pi; - - /* Line number, byte offset within that line, and total bytes read. These - * values refer to the position _after_ the last byte given to - * yxml_parse(). These are useful for debugging and error reporting. */ - uint64_t byte; - uint64_t total; - uint32_t line; - - - /* PRIVATE */ - int state; - unsigned char *stack; /* Stack of element names + attribute/PI name, separated by \0. Also starts with a \0. */ - size_t stacksize, stacklen; - unsigned reflen; - unsigned quote; - int nextstate; /* Used for '@' state remembering and for the "string" consuming state */ - unsigned ignore; - unsigned char *string; -} yxml_t; - - -#ifdef __cplusplus -extern "C" { -#endif - -void yxml_init(yxml_t *, void *, size_t); - - -yxml_ret_t yxml_parse(yxml_t *, int); - - -/* May be called after the last character has been given to yxml_parse(). - * Returns YXML_OK if the XML document is valid, YXML_EEOF otherwise. Using - * this function isn't really necessary, but can be used to detect documents - * that don't end correctly. In particular, an error is returned when the XML - * document did not contain a (complete) root element, or when the document - * ended while in a comment or processing instruction. */ -yxml_ret_t yxml_eof(yxml_t *); - -#ifdef __cplusplus -} -#endif - - -/* Returns the length of the element name (x->elem), attribute name (x->attr), - * or PI name (x->pi). This function should ONLY be used directly after the - * YXML_ELEMSTART, YXML_ATTRSTART or YXML_PISTART (respectively) tokens have - * been returned by yxml_parse(), calling this at any other time may not give - * the correct results. This function should also NOT be used on strings other - * than x->elem, x->attr or x->pi. */ -static inline size_t yxml_symlen(yxml_t *x, const char *s) { - return (x->stack + x->stacklen) - (const unsigned char*)s; -} - -#endif - -/* vim: set noet sw=4 ts=4: */