Yet again switching XML parsers. When I tried out xmlparser yesterday, it was very easy to use so I think it would be easy to switch over to this.master
parent
2b2af33fec
commit
1310665922
6 changed files with 539 additions and 1239 deletions
@ -0,0 +1,15 @@ |
||||
ISC License |
||||
|
||||
Copyright (c) 2011-2022 Hiltjo Posthuma <hiltjo@codemadness.org> |
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any |
||||
purpose with or without fee is hereby granted, provided that the above |
||||
copyright notice and this permission notice appear in all copies. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
||||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
@ -0,0 +1,454 @@ |
||||
#include <errno.h> |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
#include "xml.h" |
||||
|
||||
#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) |
||||
#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) |
||||
|
||||
static void |
||||
xml_parseattrs(XMLParser *x) |
||||
{ |
||||
size_t namelen = 0, valuelen; |
||||
int c, endsep, endname = 0, valuestart = 0; |
||||
|
||||
while ((c = GETNEXT()) != EOF) { |
||||
if (ISSPACE(c)) { |
||||
if (namelen) |
||||
endname = 1; |
||||
continue; |
||||
} else if (c == '?') |
||||
; /* ignore */ |
||||
else if (c == '=') { |
||||
x->name[namelen] = '\0'; |
||||
valuestart = 1; |
||||
endname = 1; |
||||
} else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) { |
||||
/* attribute without value */ |
||||
x->name[namelen] = '\0'; |
||||
if (x->xmlattrstart) |
||||
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); |
||||
if (x->xmlattr) |
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0); |
||||
if (x->xmlattrend) |
||||
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); |
||||
endname = 0; |
||||
x->name[0] = c; |
||||
namelen = 1; |
||||
} else if (namelen && valuestart) { |
||||
/* attribute with value */ |
||||
if (x->xmlattrstart) |
||||
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); |
||||
|
||||
valuelen = 0; |
||||
if (c == '\'' || c == '"') { |
||||
endsep = c; |
||||
} else { |
||||
endsep = ' '; /* ISSPACE() */ |
||||
goto startvalue; |
||||
} |
||||
|
||||
while ((c = GETNEXT()) != EOF) { |
||||
startvalue: |
||||
if (c == '&') { /* entities */ |
||||
x->data[valuelen] = '\0'; |
||||
/* call data function with data before entity if there is data */ |
||||
if (valuelen && x->xmlattr) |
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); |
||||
x->data[0] = c; |
||||
valuelen = 1; |
||||
while ((c = GETNEXT()) != EOF) { |
||||
if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) |
||||
break; |
||||
if (valuelen < sizeof(x->data) - 1) |
||||
x->data[valuelen++] = c; |
||||
else { |
||||
/* entity too long for buffer, handle as normal data */ |
||||
x->data[valuelen] = '\0'; |
||||
if (x->xmlattr) |
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); |
||||
x->data[0] = c; |
||||
valuelen = 1; |
||||
break; |
||||
} |
||||
if (c == ';') { |
||||
x->data[valuelen] = '\0'; |
||||
if (x->xmlattrentity) |
||||
x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); |
||||
valuelen = 0; |
||||
break; |
||||
} |
||||
} |
||||
} else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) { |
||||
if (valuelen < sizeof(x->data) - 1) { |
||||
x->data[valuelen++] = c; |
||||
} else { |
||||
x->data[valuelen] = '\0'; |
||||
if (x->xmlattr) |
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); |
||||
x->data[0] = c; |
||||
valuelen = 1; |
||||
} |
||||
} |
||||
if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) { |
||||
x->data[valuelen] = '\0'; |
||||
if (x->xmlattr) |
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); |
||||
if (x->xmlattrend) |
||||
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); |
||||
break; |
||||
} |
||||
} |
||||
namelen = endname = valuestart = 0; |
||||
} else if (namelen < sizeof(x->name) - 1) { |
||||
x->name[namelen++] = c; |
||||
} |
||||
if (c == '>') { |
||||
break; |
||||
} else if (c == '/') { |
||||
x->isshorttag = 1; |
||||
x->name[0] = '\0'; |
||||
namelen = 0; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void |
||||
xml_parsecomment(XMLParser *x) |
||||
{ |
||||
size_t datalen = 0, i = 0; |
||||
int c; |
||||
|
||||
if (x->xmlcommentstart) |
||||
x->xmlcommentstart(x); |
||||
while ((c = GETNEXT()) != EOF) { |
||||
if (c == '-' || c == '>') { |
||||
if (x->xmlcomment && datalen) { |
||||
x->data[datalen] = '\0'; |
||||
x->xmlcomment(x, x->data, datalen); |
||||
datalen = 0; |
||||
} |
||||
} |
||||
|
||||
if (c == '-') { |
||||
if (++i > 2) { |
||||
if (x->xmlcomment) |
||||
for (; i > 2; i--) |
||||
x->xmlcomment(x, "-", 1); |
||||
i = 2; |
||||
} |
||||
continue; |
||||
} else if (c == '>' && i == 2) { |
||||
if (x->xmlcommentend) |
||||
x->xmlcommentend(x); |
||||
return; |
||||
} else if (i) { |
||||
if (x->xmlcomment) { |
||||
for (; i > 0; i--) |
||||
x->xmlcomment(x, "-", 1); |
||||
} |
||||
i = 0; |
||||
} |
||||
|
||||
if (datalen < sizeof(x->data) - 1) { |
||||
x->data[datalen++] = c; |
||||
} else { |
||||
x->data[datalen] = '\0'; |
||||
if (x->xmlcomment) |
||||
x->xmlcomment(x, x->data, datalen); |
||||
x->data[0] = c; |
||||
datalen = 1; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void |
||||
xml_parsecdata(XMLParser *x) |
||||
{ |
||||
size_t datalen = 0, i = 0; |
||||
int c; |
||||
|
||||
if (x->xmlcdatastart) |
||||
x->xmlcdatastart(x); |
||||
while ((c = GETNEXT()) != EOF) { |
||||
if (c == ']' || c == '>') { |
||||
if (x->xmlcdata && datalen) { |
||||
x->data[datalen] = '\0'; |
||||
x->xmlcdata(x, x->data, datalen); |
||||
datalen = 0; |
||||
} |
||||
} |
||||
|
||||
if (c == ']') { |
||||
if (++i > 2) { |
||||
if (x->xmlcdata) |
||||
for (; i > 2; i--) |
||||
x->xmlcdata(x, "]", 1); |
||||
i = 2; |
||||
} |
||||
continue; |
||||
} else if (c == '>' && i == 2) { |
||||
if (x->xmlcdataend) |
||||
x->xmlcdataend(x); |
||||
return; |
||||
} else if (i) { |
||||
if (x->xmlcdata) |
||||
for (; i > 0; i--) |
||||
x->xmlcdata(x, "]", 1); |
||||
i = 0; |
||||
} |
||||
|
||||
if (datalen < sizeof(x->data) - 1) { |
||||
x->data[datalen++] = c; |
||||
} else { |
||||
x->data[datalen] = '\0'; |
||||
if (x->xmlcdata) |
||||
x->xmlcdata(x, x->data, datalen); |
||||
x->data[0] = c; |
||||
datalen = 1; |
||||
} |
||||
} |
||||
} |
||||
|
||||
static int |
||||
codepointtoutf8(long r, char *s) |
||||
{ |
||||
if (r == 0) { |
||||
return 0; /* NUL byte */ |
||||
} else if (r <= 0x7F) { |
||||
/* 1 byte: 0aaaaaaa */ |
||||
s[0] = r; |
||||
return 1; |
||||
} else if (r <= 0x07FF) { |
||||
/* 2 bytes: 00000aaa aabbbbbb */ |
||||
s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ |
||||
s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ |
||||
return 2; |
||||
} else if (r <= 0xFFFF) { |
||||
/* 3 bytes: aaaabbbb bbcccccc */ |
||||
s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ |
||||
s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ |
||||
s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ |
||||
return 3; |
||||
} else { |
||||
/* 4 bytes: 000aaabb bbbbcccc ccdddddd */ |
||||
s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ |
||||
s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ |
||||
s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ |
||||
s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ |
||||
return 4; |
||||
} |
||||
} |
||||
|
||||
static int |
||||
namedentitytostr(const char *e, char *buf, size_t bufsiz) |
||||
{ |
||||
static const struct { |
||||
const char *entity; |
||||
int c; |
||||
} entities[] = { |
||||
{ "amp;", '&' }, |
||||
{ "lt;", '<' }, |
||||
{ "gt;", '>' }, |
||||
{ "apos;", '\'' }, |
||||
{ "quot;", '"' }, |
||||
}; |
||||
size_t i; |
||||
|
||||
/* buffer is too small */ |
||||
if (bufsiz < 2) |
||||
return -1; |
||||
|
||||
for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) { |
||||
if (!strcmp(e, entities[i].entity)) { |
||||
buf[0] = entities[i].c; |
||||
buf[1] = '\0'; |
||||
return 1; |
||||
} |
||||
} |
||||
return -1; |
||||
} |
||||
|
||||
static int |
||||
numericentitytostr(const char *e, char *buf, size_t bufsiz) |
||||
{ |
||||
long l; |
||||
int len; |
||||
char *end; |
||||
|
||||
/* buffer is too small */ |
||||
if (bufsiz < 5) |
||||
return -1; |
||||
|
||||
errno = 0; |
||||
/* hex (16) or decimal (10) */ |
||||
if (*e == 'x') |
||||
l = strtol(++e, &end, 16); |
||||
else |
||||
l = strtol(e, &end, 10); |
||||
/* invalid value or not a well-formed entity or invalid code point */ |
||||
if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff || |
||||
(l >= 0xd800 && l <= 0xdfff)) |
||||
return -1; |
||||
len = codepointtoutf8(l, buf); |
||||
buf[len] = '\0'; |
||||
|
||||
return len; |
||||
} |
||||
|
||||
/* convert named- or numeric entity string to buffer string
|
||||
* returns byte-length of string or -1 on failure. */ |
||||
int |
||||
xml_entitytostr(const char *e, char *buf, size_t bufsiz) |
||||
{ |
||||
/* doesn't start with & */ |
||||
if (e[0] != '&') |
||||
return -1; |
||||
/* numeric entity */ |
||||
if (e[1] == '#') |
||||
return numericentitytostr(e + 2, buf, bufsiz); |
||||
else /* named entity */ |
||||
return namedentitytostr(e + 1, buf, bufsiz); |
||||
} |
||||
|
||||
void |
||||
xml_parse(XMLParser *x) |
||||
{ |
||||
size_t datalen, tagdatalen; |
||||
int c, isend; |
||||
|
||||
while ((c = GETNEXT()) != EOF && c != '<') |
||||
; /* skip until < */ |
||||
|
||||
while (c != EOF) { |
||||
if (c == '<') { /* parse tag */ |
||||
if ((c = GETNEXT()) == EOF) |
||||
return; |
||||
|
||||
if (c == '!') { /* CDATA and comments */ |
||||
for (tagdatalen = 0; (c = GETNEXT()) != EOF;) { |
||||
/* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */ |
||||
if (tagdatalen <= sizeof("[CDATA[") - 1) |
||||
x->data[tagdatalen++] = c; |
||||
if (c == '>') |
||||
break; |
||||
else if (c == '-' && tagdatalen == sizeof("--") - 1 && |
||||
(x->data[0] == '-')) { |
||||
xml_parsecomment(x); |
||||
break; |
||||
} else if (c == '[') { |
||||
if (tagdatalen == sizeof("[CDATA[") - 1 && |
||||
!strncmp(x->data, "[CDATA[", tagdatalen)) { |
||||
xml_parsecdata(x); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
} else { |
||||
/* normal tag (open, short open, close), processing instruction. */ |
||||
x->tag[0] = c; |
||||
x->taglen = 1; |
||||
x->isshorttag = isend = 0; |
||||
|
||||
/* treat processing instruction as shorttag, don't strip "?" prefix. */ |
||||
if (c == '?') { |
||||
x->isshorttag = 1; |
||||
} else if (c == '/') { |
||||
if ((c = GETNEXT()) == EOF) |
||||
return; |
||||
x->tag[0] = c; |
||||
isend = 1; |
||||
} |
||||
|
||||
while ((c = GETNEXT()) != EOF) { |
||||
if (c == '/') |
||||
x->isshorttag = 1; /* short tag */ |
||||
else if (c == '>' || ISSPACE(c)) { |
||||
x->tag[x->taglen] = '\0'; |
||||
if (isend) { /* end tag, starts with </ */ |
||||
if (x->xmltagend) |
||||
x->xmltagend(x, x->tag, x->taglen, x->isshorttag); |
||||
x->tag[0] = '\0'; |
||||
x->taglen = 0; |
||||
} else { |
||||
/* start tag */ |
||||
if (x->xmltagstart) |
||||
x->xmltagstart(x, x->tag, x->taglen); |
||||
if (ISSPACE(c)) |
||||
xml_parseattrs(x); |
||||
if (x->xmltagstartparsed) |
||||
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); |
||||
} |
||||
/* call tagend for shortform or processing instruction */ |
||||
if (x->isshorttag) { |
||||
if (x->xmltagend) |
||||
x->xmltagend(x, x->tag, x->taglen, x->isshorttag); |
||||
x->tag[0] = '\0'; |
||||
x->taglen = 0; |
||||
} |
||||
break; |
||||
} else if (x->taglen < sizeof(x->tag) - 1) |
||||
x->tag[x->taglen++] = c; /* NOTE: tag name truncation */ |
||||
} |
||||
} |
||||
} else { |
||||
/* parse tag data */ |
||||
datalen = 0; |
||||
if (x->xmldatastart) |
||||
x->xmldatastart(x); |
||||
while ((c = GETNEXT()) != EOF) { |
||||
if (c == '&') { |
||||
if (datalen) { |
||||
x->data[datalen] = '\0'; |
||||
if (x->xmldata) |
||||
x->xmldata(x, x->data, datalen); |
||||
} |
||||
x->data[0] = c; |
||||
datalen = 1; |
||||
while ((c = GETNEXT()) != EOF) { |
||||
if (c == '<') |
||||
break; |
||||
if (datalen < sizeof(x->data) - 1) |
||||
x->data[datalen++] = c; |
||||
else { |
||||
/* entity too long for buffer, handle as normal data */ |
||||
x->data[datalen] = '\0'; |
||||
if (x->xmldata) |
||||
x->xmldata(x, x->data, datalen); |
||||
x->data[0] = c; |
||||
datalen = 1; |
||||
break; |
||||
} |
||||
if (c == ';') { |
||||
x->data[datalen] = '\0'; |
||||
if (x->xmldataentity) |
||||
x->xmldataentity(x, x->data, datalen); |
||||
datalen = 0; |
||||
break; |
||||
} |
||||
} |
||||
} else if (c != '<') { |
||||
if (datalen < sizeof(x->data) - 1) { |
||||
x->data[datalen++] = c; |
||||
} else { |
||||
x->data[datalen] = '\0'; |
||||
if (x->xmldata) |
||||
x->xmldata(x, x->data, datalen); |
||||
x->data[0] = c; |
||||
datalen = 1; |
||||
} |
||||
} |
||||
if (c == '<') { |
||||
x->data[datalen] = '\0'; |
||||
if (x->xmldata && datalen) |
||||
x->xmldata(x, x->data, datalen); |
||||
if (x->xmldataend) |
||||
x->xmldataend(x); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,49 @@ |
||||
#ifndef _XML_H_ |
||||
#define _XML_H_ |
||||
|
||||
#include <stdio.h> |
||||
|
||||
typedef struct xmlparser { |
||||
/* handlers */ |
||||
void (*xmlattr)(struct xmlparser *, const char *, size_t, |
||||
const char *, size_t, const char *, size_t); |
||||
void (*xmlattrend)(struct xmlparser *, const char *, size_t, |
||||
const char *, size_t); |
||||
void (*xmlattrstart)(struct xmlparser *, const char *, size_t, |
||||
const char *, size_t); |
||||
void (*xmlattrentity)(struct xmlparser *, const char *, size_t, |
||||
const char *, size_t, const char *, size_t); |
||||
void (*xmlcdatastart)(struct xmlparser *); |
||||
void (*xmlcdata)(struct xmlparser *, const char *, size_t); |
||||
void (*xmlcdataend)(struct xmlparser *); |
||||
void (*xmlcommentstart)(struct xmlparser *); |
||||
void (*xmlcomment)(struct xmlparser *, const char *, size_t); |
||||
void (*xmlcommentend)(struct xmlparser *); |
||||
void (*xmldata)(struct xmlparser *, const char *, size_t); |
||||
void (*xmldataend)(struct xmlparser *); |
||||
void (*xmldataentity)(struct xmlparser *, const char *, size_t); |
||||
void (*xmldatastart)(struct xmlparser *); |
||||
void (*xmltagend)(struct xmlparser *, const char *, size_t, int); |
||||
void (*xmltagstart)(struct xmlparser *, const char *, size_t); |
||||
void (*xmltagstartparsed)(struct xmlparser *, const char *, |
||||
size_t, int); |
||||
|
||||
#ifndef GETNEXT |
||||
#define GETNEXT (x)->getnext |
||||
int (*getnext)(void); |
||||
#endif |
||||
|
||||
/* current tag */ |
||||
char tag[1024]; |
||||
size_t taglen; |
||||
/* current tag is in shortform ? <tag /> */ |
||||
int isshorttag; |
||||
/* current attribute name */ |
||||
char name[1024]; |
||||
/* data buffer used for tag data, CDATA and attribute data */ |
||||
char data[BUFSIZ]; |
||||
} XMLParser; |
||||
|
||||
int xml_entitytostr(const char *, char *, size_t); |
||||
void xml_parse(XMLParser *); |
||||
#endif |
@ -1,162 +0,0 @@ |
||||
/* Copyright (c) 2013-2014 Yoran Heling
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining |
||||
a copy of this software and associated documentation files (the |
||||
"Software"), to deal in the Software without restriction, including |
||||
without limitation the rights to use, copy, modify, merge, publish, |
||||
distribute, sublicense, and/or sell copies of the Software, and to |
||||
permit persons to whom the Software is furnished to do so, subject to |
||||
the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included |
||||
in all copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
*/ |
||||
|
||||
#ifndef YXML_H |
||||
#define YXML_H |
||||
|
||||
#include <stdint.h> |
||||
#include <stddef.h> |
||||
|
||||
#if defined(_MSC_VER) && !defined(__cplusplus) && !defined(inline) |
||||
#define inline __inline |
||||
#endif |
||||
|
||||
/* Full API documentation for this library can be found in the "yxml.md" file
|
||||
* in the yxml git repository, or online at http://dev.yorhel.nl/yxml/man */
|
||||
|
||||
typedef enum { |
||||
YXML_EEOF = -5, /* Unexpected EOF */ |
||||
YXML_EREF = -4, /* Invalid character or entity reference (&whatever;) */ |
||||
YXML_ECLOSE = -3, /* Close tag does not match open tag (<Tag> .. </OtherTag>) */ |
||||
YXML_ESTACK = -2, /* Stack overflow (too deeply nested tags or too long element/attribute name) */ |
||||
YXML_ESYN = -1, /* Syntax error (unexpected byte) */ |
||||
YXML_OK = 0, /* Character consumed, no new token present */ |
||||
YXML_ELEMSTART = 1, /* Start of an element: '<Tag ..' */ |
||||
YXML_CONTENT = 2, /* Element content */ |
||||
YXML_ELEMEND = 3, /* End of an element: '.. />' or '</Tag>' */ |
||||
YXML_ATTRSTART = 4, /* Attribute: 'Name=..' */ |
||||
YXML_ATTRVAL = 5, /* Attribute value */ |
||||
YXML_ATTREND = 6, /* End of attribute '.."' */ |
||||
YXML_PISTART = 7, /* Start of a processing instruction */ |
||||
YXML_PICONTENT = 8, /* Content of a PI */ |
||||
YXML_PIEND = 9 /* End of a processing instruction */ |
||||
} yxml_ret_t; |
||||
|
||||
/* When, exactly, are tokens returned?
|
||||
* |
||||
* <TagName |
||||
* '>' ELEMSTART |
||||
* '/' ELEMSTART, '>' ELEMEND |
||||
* ' ' ELEMSTART |
||||
* '>' |
||||
* '/', '>' ELEMEND |
||||
* Attr |
||||
* '=' ATTRSTART |
||||
* "X ATTRVAL |
||||
* 'Y' ATTRVAL |
||||
* 'Z' ATTRVAL |
||||
* '"' ATTREND |
||||
* '>' |
||||
* '/', '>' ELEMEND |
||||
* |
||||
* </TagName |
||||
* '>' ELEMEND |
||||
*/ |
||||
|
||||
|
||||
typedef struct { |
||||
/* PUBLIC (read-only) */ |
||||
|
||||
/* Name of the current element, zero-length if not in any element. Changed
|
||||
* after YXML_ELEMSTART. The pointer will remain valid up to and including |
||||
* the next non-YXML_ATTR* token, the pointed-to buffer will remain valid |
||||
* up to and including the YXML_ELEMEND for the corresponding element. */ |
||||
char *elem; |
||||
|
||||
/* The last read character(s) of an attribute value (YXML_ATTRVAL), element
|
||||
* data (YXML_CONTENT), or processing instruction (YXML_PICONTENT). Changed |
||||
* after one of the respective YXML_ values is returned, and only valid |
||||
* until the next yxml_parse() call. Usually, this string only consists of |
||||
* a single byte, but multiple bytes are returned in the following cases: |
||||
* - "<?SomePI ?x ?>": The two characters "?x" |
||||
* - "<![CDATA[ ]x ]]>": The two characters "]x" |
||||
* - "<![CDATA[ ]]x ]]>": The three characters "]]x" |
||||
* - "&#N;" and "&#xN;", where dec(n) > 127. The referenced Unicode |
||||
* character is then encoded in multiple UTF-8 bytes. |
||||
*/ |
||||
char data[8]; |
||||
|
||||
/* Name of the current attribute. Changed after YXML_ATTRSTART, valid up to
|
||||
* and including the next YXML_ATTREND. */ |
||||
char *attr; |
||||
|
||||
/* Name/target of the current processing instruction, zero-length if not in
|
||||
* a PI. Changed after YXML_PISTART, valid up to (but excluding) |
||||
* the next YXML_PIEND. */ |
||||
char *pi; |
||||
|
||||
/* Line number, byte offset within that line, and total bytes read. These
|
||||
* values refer to the position _after_ the last byte given to |
||||
* yxml_parse(). These are useful for debugging and error reporting. */ |
||||
uint64_t byte; |
||||
uint64_t total; |
||||
uint32_t line; |
||||
|
||||
|
||||
/* PRIVATE */ |
||||
int state; |
||||
unsigned char *stack; /* Stack of element names + attribute/PI name, separated by \0. Also starts with a \0. */ |
||||
size_t stacksize, stacklen; |
||||
unsigned reflen; |
||||
unsigned quote; |
||||
int nextstate; /* Used for '@' state remembering and for the "string" consuming state */ |
||||
unsigned ignore; |
||||
unsigned char *string; |
||||
} yxml_t; |
||||
|
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
void yxml_init(yxml_t *, void *, size_t); |
||||
|
||||
|
||||
yxml_ret_t yxml_parse(yxml_t *, int); |
||||
|
||||
|
||||
/* May be called after the last character has been given to yxml_parse().
|
||||
* Returns YXML_OK if the XML document is valid, YXML_EEOF otherwise. Using |
||||
* this function isn't really necessary, but can be used to detect documents |
||||
* that don't end correctly. In particular, an error is returned when the XML |
||||
* document did not contain a (complete) root element, or when the document |
||||
* ended while in a comment or processing instruction. */ |
||||
yxml_ret_t yxml_eof(yxml_t *); |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
|
||||
/* Returns the length of the element name (x->elem), attribute name (x->attr),
|
||||
* or PI name (x->pi). This function should ONLY be used directly after the |
||||
* YXML_ELEMSTART, YXML_ATTRSTART or YXML_PISTART (respectively) tokens have |
||||
* been returned by yxml_parse(), calling this at any other time may not give |
||||
* the correct results. This function should also NOT be used on strings other |
||||
* than x->elem, x->attr or x->pi. */ |
||||
static inline size_t yxml_symlen(yxml_t *x, const char *s) { |
||||
return (x->stack + x->stacklen) - (const unsigned char*)s; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
/* vim: set noet sw=4 ts=4: */ |
Loading…
Reference in new issue