prepare to use codemadness's xmlparser instead of yxml
Yet again switching XML parsers. When I tried out xmlparser yesterday, it was very easy to use so I think it would be easy to switch over to this.master
parent
2b2af33fec
commit
1310665922
|
@ -0,0 +1,15 @@
|
|||
ISC License
|
||||
|
||||
Copyright (c) 2011-2022 Hiltjo Posthuma <hiltjo@codemadness.org>
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any
|
||||
purpose with or without fee is hereby granted, provided that the above
|
||||
copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
38
fetch.c
38
fetch.c
|
@ -2,30 +2,34 @@
|
|||
#include <unistd.h>
|
||||
#include "fetch.h"
|
||||
#include "parse.h"
|
||||
#include "yxml.h"
|
||||
#include "xml.h"
|
||||
|
||||
#define XMLBUFSIZE 4096
|
||||
|
||||
/* reads feed from stdin and parses */
|
||||
int fetch_rss(void)
|
||||
{
|
||||
yxml_t parser;
|
||||
yxml_ret_t ret;
|
||||
char c, buf[XMLBUFSIZE];
|
||||
XMLParser x = { 0 };
|
||||
|
||||
yxml_init(&parser, buf, XMLBUFSIZE);
|
||||
x.xmlattr = xmlattr;
|
||||
x.xmlattrend = xmlattrend;
|
||||
x.xmlattrstart = xmlattrstart;
|
||||
x.xmlattrentity = xmlattrentity;
|
||||
x.xmlcdatastart = xmlcdatastart;
|
||||
x.xmlcdata = xmlcdata;
|
||||
x.xmlcdataend = xmlcdataend;
|
||||
x.xmlcommentstart = xmlcommentstart;
|
||||
x.xmlcomment = xmlcomment;
|
||||
x.xmlcommentend = xmlcommentend;
|
||||
x.xmldata = xmldata;
|
||||
x.xmldataend = xmldataend;
|
||||
x.xmldataentity = xmldataentity;
|
||||
x.xmldatastart = xmldatastart;
|
||||
x.xmltagend = xmltagend;
|
||||
x.xmltagstart = xmltagstart;
|
||||
x.xmltagstartparsed = xmltagstartparsed;
|
||||
|
||||
/* read from stdin 1 byte at a time */
|
||||
while (read(0, &c, 1) > 0 && c != '\0') {
|
||||
ret = yxml_parse(&parser, c);
|
||||
if (parse_item(&parser, ret) < 0) {
|
||||
fprintf(stderr, "failed to parse item(s)\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (yxml_eof(&parser) < 0)
|
||||
fprintf(stderr, "raw feed had invalid xml.\n");
|
||||
x.getnext = getchar;
|
||||
xml_parse(&x);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,454 @@
|
|||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "xml.h"
|
||||
|
||||
#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
|
||||
#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
|
||||
|
||||
static void
|
||||
xml_parseattrs(XMLParser *x)
|
||||
{
|
||||
size_t namelen = 0, valuelen;
|
||||
int c, endsep, endname = 0, valuestart = 0;
|
||||
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
if (ISSPACE(c)) {
|
||||
if (namelen)
|
||||
endname = 1;
|
||||
continue;
|
||||
} else if (c == '?')
|
||||
; /* ignore */
|
||||
else if (c == '=') {
|
||||
x->name[namelen] = '\0';
|
||||
valuestart = 1;
|
||||
endname = 1;
|
||||
} else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
|
||||
/* attribute without value */
|
||||
x->name[namelen] = '\0';
|
||||
if (x->xmlattrstart)
|
||||
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
|
||||
if (x->xmlattr)
|
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
|
||||
if (x->xmlattrend)
|
||||
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
|
||||
endname = 0;
|
||||
x->name[0] = c;
|
||||
namelen = 1;
|
||||
} else if (namelen && valuestart) {
|
||||
/* attribute with value */
|
||||
if (x->xmlattrstart)
|
||||
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
|
||||
|
||||
valuelen = 0;
|
||||
if (c == '\'' || c == '"') {
|
||||
endsep = c;
|
||||
} else {
|
||||
endsep = ' '; /* ISSPACE() */
|
||||
goto startvalue;
|
||||
}
|
||||
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
startvalue:
|
||||
if (c == '&') { /* entities */
|
||||
x->data[valuelen] = '\0';
|
||||
/* call data function with data before entity if there is data */
|
||||
if (valuelen && x->xmlattr)
|
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
||||
x->data[0] = c;
|
||||
valuelen = 1;
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
|
||||
break;
|
||||
if (valuelen < sizeof(x->data) - 1)
|
||||
x->data[valuelen++] = c;
|
||||
else {
|
||||
/* entity too long for buffer, handle as normal data */
|
||||
x->data[valuelen] = '\0';
|
||||
if (x->xmlattr)
|
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
||||
x->data[0] = c;
|
||||
valuelen = 1;
|
||||
break;
|
||||
}
|
||||
if (c == ';') {
|
||||
x->data[valuelen] = '\0';
|
||||
if (x->xmlattrentity)
|
||||
x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
||||
valuelen = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
|
||||
if (valuelen < sizeof(x->data) - 1) {
|
||||
x->data[valuelen++] = c;
|
||||
} else {
|
||||
x->data[valuelen] = '\0';
|
||||
if (x->xmlattr)
|
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
||||
x->data[0] = c;
|
||||
valuelen = 1;
|
||||
}
|
||||
}
|
||||
if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
|
||||
x->data[valuelen] = '\0';
|
||||
if (x->xmlattr)
|
||||
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
||||
if (x->xmlattrend)
|
||||
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
|
||||
break;
|
||||
}
|
||||
}
|
||||
namelen = endname = valuestart = 0;
|
||||
} else if (namelen < sizeof(x->name) - 1) {
|
||||
x->name[namelen++] = c;
|
||||
}
|
||||
if (c == '>') {
|
||||
break;
|
||||
} else if (c == '/') {
|
||||
x->isshorttag = 1;
|
||||
x->name[0] = '\0';
|
||||
namelen = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
xml_parsecomment(XMLParser *x)
|
||||
{
|
||||
size_t datalen = 0, i = 0;
|
||||
int c;
|
||||
|
||||
if (x->xmlcommentstart)
|
||||
x->xmlcommentstart(x);
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
if (c == '-' || c == '>') {
|
||||
if (x->xmlcomment && datalen) {
|
||||
x->data[datalen] = '\0';
|
||||
x->xmlcomment(x, x->data, datalen);
|
||||
datalen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (c == '-') {
|
||||
if (++i > 2) {
|
||||
if (x->xmlcomment)
|
||||
for (; i > 2; i--)
|
||||
x->xmlcomment(x, "-", 1);
|
||||
i = 2;
|
||||
}
|
||||
continue;
|
||||
} else if (c == '>' && i == 2) {
|
||||
if (x->xmlcommentend)
|
||||
x->xmlcommentend(x);
|
||||
return;
|
||||
} else if (i) {
|
||||
if (x->xmlcomment) {
|
||||
for (; i > 0; i--)
|
||||
x->xmlcomment(x, "-", 1);
|
||||
}
|
||||
i = 0;
|
||||
}
|
||||
|
||||
if (datalen < sizeof(x->data) - 1) {
|
||||
x->data[datalen++] = c;
|
||||
} else {
|
||||
x->data[datalen] = '\0';
|
||||
if (x->xmlcomment)
|
||||
x->xmlcomment(x, x->data, datalen);
|
||||
x->data[0] = c;
|
||||
datalen = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
xml_parsecdata(XMLParser *x)
|
||||
{
|
||||
size_t datalen = 0, i = 0;
|
||||
int c;
|
||||
|
||||
if (x->xmlcdatastart)
|
||||
x->xmlcdatastart(x);
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
if (c == ']' || c == '>') {
|
||||
if (x->xmlcdata && datalen) {
|
||||
x->data[datalen] = '\0';
|
||||
x->xmlcdata(x, x->data, datalen);
|
||||
datalen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (c == ']') {
|
||||
if (++i > 2) {
|
||||
if (x->xmlcdata)
|
||||
for (; i > 2; i--)
|
||||
x->xmlcdata(x, "]", 1);
|
||||
i = 2;
|
||||
}
|
||||
continue;
|
||||
} else if (c == '>' && i == 2) {
|
||||
if (x->xmlcdataend)
|
||||
x->xmlcdataend(x);
|
||||
return;
|
||||
} else if (i) {
|
||||
if (x->xmlcdata)
|
||||
for (; i > 0; i--)
|
||||
x->xmlcdata(x, "]", 1);
|
||||
i = 0;
|
||||
}
|
||||
|
||||
if (datalen < sizeof(x->data) - 1) {
|
||||
x->data[datalen++] = c;
|
||||
} else {
|
||||
x->data[datalen] = '\0';
|
||||
if (x->xmlcdata)
|
||||
x->xmlcdata(x, x->data, datalen);
|
||||
x->data[0] = c;
|
||||
datalen = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
codepointtoutf8(long r, char *s)
|
||||
{
|
||||
if (r == 0) {
|
||||
return 0; /* NUL byte */
|
||||
} else if (r <= 0x7F) {
|
||||
/* 1 byte: 0aaaaaaa */
|
||||
s[0] = r;
|
||||
return 1;
|
||||
} else if (r <= 0x07FF) {
|
||||
/* 2 bytes: 00000aaa aabbbbbb */
|
||||
s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
|
||||
s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
|
||||
return 2;
|
||||
} else if (r <= 0xFFFF) {
|
||||
/* 3 bytes: aaaabbbb bbcccccc */
|
||||
s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
|
||||
s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
|
||||
s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
|
||||
return 3;
|
||||
} else {
|
||||
/* 4 bytes: 000aaabb bbbbcccc ccdddddd */
|
||||
s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
|
||||
s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
|
||||
s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
|
||||
s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
namedentitytostr(const char *e, char *buf, size_t bufsiz)
|
||||
{
|
||||
static const struct {
|
||||
const char *entity;
|
||||
int c;
|
||||
} entities[] = {
|
||||
{ "amp;", '&' },
|
||||
{ "lt;", '<' },
|
||||
{ "gt;", '>' },
|
||||
{ "apos;", '\'' },
|
||||
{ "quot;", '"' },
|
||||
};
|
||||
size_t i;
|
||||
|
||||
/* buffer is too small */
|
||||
if (bufsiz < 2)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
|
||||
if (!strcmp(e, entities[i].entity)) {
|
||||
buf[0] = entities[i].c;
|
||||
buf[1] = '\0';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
numericentitytostr(const char *e, char *buf, size_t bufsiz)
|
||||
{
|
||||
long l;
|
||||
int len;
|
||||
char *end;
|
||||
|
||||
/* buffer is too small */
|
||||
if (bufsiz < 5)
|
||||
return -1;
|
||||
|
||||
errno = 0;
|
||||
/* hex (16) or decimal (10) */
|
||||
if (*e == 'x')
|
||||
l = strtol(++e, &end, 16);
|
||||
else
|
||||
l = strtol(e, &end, 10);
|
||||
/* invalid value or not a well-formed entity or invalid code point */
|
||||
if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
|
||||
(l >= 0xd800 && l <= 0xdfff))
|
||||
return -1;
|
||||
len = codepointtoutf8(l, buf);
|
||||
buf[len] = '\0';
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/* convert named- or numeric entity string to buffer string
|
||||
* returns byte-length of string or -1 on failure. */
|
||||
int
|
||||
xml_entitytostr(const char *e, char *buf, size_t bufsiz)
|
||||
{
|
||||
/* doesn't start with & */
|
||||
if (e[0] != '&')
|
||||
return -1;
|
||||
/* numeric entity */
|
||||
if (e[1] == '#')
|
||||
return numericentitytostr(e + 2, buf, bufsiz);
|
||||
else /* named entity */
|
||||
return namedentitytostr(e + 1, buf, bufsiz);
|
||||
}
|
||||
|
||||
void
|
||||
xml_parse(XMLParser *x)
|
||||
{
|
||||
size_t datalen, tagdatalen;
|
||||
int c, isend;
|
||||
|
||||
while ((c = GETNEXT()) != EOF && c != '<')
|
||||
; /* skip until < */
|
||||
|
||||
while (c != EOF) {
|
||||
if (c == '<') { /* parse tag */
|
||||
if ((c = GETNEXT()) == EOF)
|
||||
return;
|
||||
|
||||
if (c == '!') { /* CDATA and comments */
|
||||
for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
|
||||
/* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
|
||||
if (tagdatalen <= sizeof("[CDATA[") - 1)
|
||||
x->data[tagdatalen++] = c;
|
||||
if (c == '>')
|
||||
break;
|
||||
else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
|
||||
(x->data[0] == '-')) {
|
||||
xml_parsecomment(x);
|
||||
break;
|
||||
} else if (c == '[') {
|
||||
if (tagdatalen == sizeof("[CDATA[") - 1 &&
|
||||
!strncmp(x->data, "[CDATA[", tagdatalen)) {
|
||||
xml_parsecdata(x);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* normal tag (open, short open, close), processing instruction. */
|
||||
x->tag[0] = c;
|
||||
x->taglen = 1;
|
||||
x->isshorttag = isend = 0;
|
||||
|
||||
/* treat processing instruction as shorttag, don't strip "?" prefix. */
|
||||
if (c == '?') {
|
||||
x->isshorttag = 1;
|
||||
} else if (c == '/') {
|
||||
if ((c = GETNEXT()) == EOF)
|
||||
return;
|
||||
x->tag[0] = c;
|
||||
isend = 1;
|
||||
}
|
||||
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
if (c == '/')
|
||||
x->isshorttag = 1; /* short tag */
|
||||
else if (c == '>' || ISSPACE(c)) {
|
||||
x->tag[x->taglen] = '\0';
|
||||
if (isend) { /* end tag, starts with </ */
|
||||
if (x->xmltagend)
|
||||
x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
|
||||
x->tag[0] = '\0';
|
||||
x->taglen = 0;
|
||||
} else {
|
||||
/* start tag */
|
||||
if (x->xmltagstart)
|
||||
x->xmltagstart(x, x->tag, x->taglen);
|
||||
if (ISSPACE(c))
|
||||
xml_parseattrs(x);
|
||||
if (x->xmltagstartparsed)
|
||||
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
|
||||
}
|
||||
/* call tagend for shortform or processing instruction */
|
||||
if (x->isshorttag) {
|
||||
if (x->xmltagend)
|
||||
x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
|
||||
x->tag[0] = '\0';
|
||||
x->taglen = 0;
|
||||
}
|
||||
break;
|
||||
} else if (x->taglen < sizeof(x->tag) - 1)
|
||||
x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* parse tag data */
|
||||
datalen = 0;
|
||||
if (x->xmldatastart)
|
||||
x->xmldatastart(x);
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
if (c == '&') {
|
||||
if (datalen) {
|
||||
x->data[datalen] = '\0';
|
||||
if (x->xmldata)
|
||||
x->xmldata(x, x->data, datalen);
|
||||
}
|
||||
x->data[0] = c;
|
||||
datalen = 1;
|
||||
while ((c = GETNEXT()) != EOF) {
|
||||
if (c == '<')
|
||||
break;
|
||||
if (datalen < sizeof(x->data) - 1)
|
||||
x->data[datalen++] = c;
|
||||
else {
|
||||
/* entity too long for buffer, handle as normal data */
|
||||
x->data[datalen] = '\0';
|
||||
if (x->xmldata)
|
||||
x->xmldata(x, x->data, datalen);
|
||||
x->data[0] = c;
|
||||
datalen = 1;
|
||||
break;
|
||||
}
|
||||
if (c == ';') {
|
||||
x->data[datalen] = '\0';
|
||||
if (x->xmldataentity)
|
||||
x->xmldataentity(x, x->data, datalen);
|
||||
datalen = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (c != '<') {
|
||||
if (datalen < sizeof(x->data) - 1) {
|
||||
x->data[datalen++] = c;
|
||||
} else {
|
||||
x->data[datalen] = '\0';
|
||||
if (x->xmldata)
|
||||
x->xmldata(x, x->data, datalen);
|
||||
x->data[0] = c;
|
||||
datalen = 1;
|
||||
}
|
||||
}
|
||||
if (c == '<') {
|
||||
x->data[datalen] = '\0';
|
||||
if (x->xmldata && datalen)
|
||||
x->xmldata(x, x->data, datalen);
|
||||
if (x->xmldataend)
|
||||
x->xmldataend(x);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
#ifndef _XML_H_
|
||||
#define _XML_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct xmlparser {
|
||||
/* handlers */
|
||||
void (*xmlattr)(struct xmlparser *, const char *, size_t,
|
||||
const char *, size_t, const char *, size_t);
|
||||
void (*xmlattrend)(struct xmlparser *, const char *, size_t,
|
||||
const char *, size_t);
|
||||
void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
|
||||
const char *, size_t);
|
||||
void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
|
||||
const char *, size_t, const char *, size_t);
|
||||
void (*xmlcdatastart)(struct xmlparser *);
|
||||
void (*xmlcdata)(struct xmlparser *, const char *, size_t);
|
||||
void (*xmlcdataend)(struct xmlparser *);
|
||||
void (*xmlcommentstart)(struct xmlparser *);
|
||||
void (*xmlcomment)(struct xmlparser *, const char *, size_t);
|
||||
void (*xmlcommentend)(struct xmlparser *);
|
||||
void (*xmldata)(struct xmlparser *, const char *, size_t);
|
||||
void (*xmldataend)(struct xmlparser *);
|
||||
void (*xmldataentity)(struct xmlparser *, const char *, size_t);
|
||||
void (*xmldatastart)(struct xmlparser *);
|
||||
void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
|
||||
void (*xmltagstart)(struct xmlparser *, const char *, size_t);
|
||||
void (*xmltagstartparsed)(struct xmlparser *, const char *,
|
||||
size_t, int);
|
||||
|
||||
#ifndef GETNEXT
|
||||
#define GETNEXT (x)->getnext
|
||||
int (*getnext)(void);
|
||||
#endif
|
||||
|
||||
/* current tag */
|
||||
char tag[1024];
|
||||
size_t taglen;
|
||||
/* current tag is in shortform ? <tag /> */
|
||||
int isshorttag;
|
||||
/* current attribute name */
|
||||
char name[1024];
|
||||
/* data buffer used for tag data, CDATA and attribute data */
|
||||
char data[BUFSIZ];
|
||||
} XMLParser;
|
||||
|
||||
int xml_entitytostr(const char *, char *, size_t);
|
||||
void xml_parse(XMLParser *);
|
||||
#endif
|
162
yxml.h
162
yxml.h
|
@ -1,162 +0,0 @@
|
|||
/* Copyright (c) 2013-2014 Yoran Heling
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef YXML_H
|
||||
#define YXML_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__cplusplus) && !defined(inline)
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
/* Full API documentation for this library can be found in the "yxml.md" file
|
||||
* in the yxml git repository, or online at http://dev.yorhel.nl/yxml/man */
|
||||
|
||||
typedef enum {
|
||||
YXML_EEOF = -5, /* Unexpected EOF */
|
||||
YXML_EREF = -4, /* Invalid character or entity reference (&whatever;) */
|
||||
YXML_ECLOSE = -3, /* Close tag does not match open tag (<Tag> .. </OtherTag>) */
|
||||
YXML_ESTACK = -2, /* Stack overflow (too deeply nested tags or too long element/attribute name) */
|
||||
YXML_ESYN = -1, /* Syntax error (unexpected byte) */
|
||||
YXML_OK = 0, /* Character consumed, no new token present */
|
||||
YXML_ELEMSTART = 1, /* Start of an element: '<Tag ..' */
|
||||
YXML_CONTENT = 2, /* Element content */
|
||||
YXML_ELEMEND = 3, /* End of an element: '.. />' or '</Tag>' */
|
||||
YXML_ATTRSTART = 4, /* Attribute: 'Name=..' */
|
||||
YXML_ATTRVAL = 5, /* Attribute value */
|
||||
YXML_ATTREND = 6, /* End of attribute '.."' */
|
||||
YXML_PISTART = 7, /* Start of a processing instruction */
|
||||
YXML_PICONTENT = 8, /* Content of a PI */
|
||||
YXML_PIEND = 9 /* End of a processing instruction */
|
||||
} yxml_ret_t;
|
||||
|
||||
/* When, exactly, are tokens returned?
|
||||
*
|
||||
* <TagName
|
||||
* '>' ELEMSTART
|
||||
* '/' ELEMSTART, '>' ELEMEND
|
||||
* ' ' ELEMSTART
|
||||
* '>'
|
||||
* '/', '>' ELEMEND
|
||||
* Attr
|
||||
* '=' ATTRSTART
|
||||
* "X ATTRVAL
|
||||
* 'Y' ATTRVAL
|
||||
* 'Z' ATTRVAL
|
||||
* '"' ATTREND
|
||||
* '>'
|
||||
* '/', '>' ELEMEND
|
||||
*
|
||||
* </TagName
|
||||
* '>' ELEMEND
|
||||
*/
|
||||
|
||||
|
||||
typedef struct {
|
||||
/* PUBLIC (read-only) */
|
||||
|
||||
/* Name of the current element, zero-length if not in any element. Changed
|
||||
* after YXML_ELEMSTART. The pointer will remain valid up to and including
|
||||
* the next non-YXML_ATTR* token, the pointed-to buffer will remain valid
|
||||
* up to and including the YXML_ELEMEND for the corresponding element. */
|
||||
char *elem;
|
||||
|
||||
/* The last read character(s) of an attribute value (YXML_ATTRVAL), element
|
||||
* data (YXML_CONTENT), or processing instruction (YXML_PICONTENT). Changed
|
||||
* after one of the respective YXML_ values is returned, and only valid
|
||||
* until the next yxml_parse() call. Usually, this string only consists of
|
||||
* a single byte, but multiple bytes are returned in the following cases:
|
||||
* - "<?SomePI ?x ?>": The two characters "?x"
|
||||
* - "<![CDATA[ ]x ]]>": The two characters "]x"
|
||||
* - "<![CDATA[ ]]x ]]>": The three characters "]]x"
|
||||
* - "&#N;" and "&#xN;", where dec(n) > 127. The referenced Unicode
|
||||
* character is then encoded in multiple UTF-8 bytes.
|
||||
*/
|
||||
char data[8];
|
||||
|
||||
/* Name of the current attribute. Changed after YXML_ATTRSTART, valid up to
|
||||
* and including the next YXML_ATTREND. */
|
||||
char *attr;
|
||||
|
||||
/* Name/target of the current processing instruction, zero-length if not in
|
||||
* a PI. Changed after YXML_PISTART, valid up to (but excluding)
|
||||
* the next YXML_PIEND. */
|
||||
char *pi;
|
||||
|
||||
/* Line number, byte offset within that line, and total bytes read. These
|
||||
* values refer to the position _after_ the last byte given to
|
||||
* yxml_parse(). These are useful for debugging and error reporting. */
|
||||
uint64_t byte;
|
||||
uint64_t total;
|
||||
uint32_t line;
|
||||
|
||||
|
||||
/* PRIVATE */
|
||||
int state;
|
||||
unsigned char *stack; /* Stack of element names + attribute/PI name, separated by \0. Also starts with a \0. */
|
||||
size_t stacksize, stacklen;
|
||||
unsigned reflen;
|
||||
unsigned quote;
|
||||
int nextstate; /* Used for '@' state remembering and for the "string" consuming state */
|
||||
unsigned ignore;
|
||||
unsigned char *string;
|
||||
} yxml_t;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void yxml_init(yxml_t *, void *, size_t);
|
||||
|
||||
|
||||
yxml_ret_t yxml_parse(yxml_t *, int);
|
||||
|
||||
|
||||
/* May be called after the last character has been given to yxml_parse().
|
||||
* Returns YXML_OK if the XML document is valid, YXML_EEOF otherwise. Using
|
||||
* this function isn't really necessary, but can be used to detect documents
|
||||
* that don't end correctly. In particular, an error is returned when the XML
|
||||
* document did not contain a (complete) root element, or when the document
|
||||
* ended while in a comment or processing instruction. */
|
||||
yxml_ret_t yxml_eof(yxml_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Returns the length of the element name (x->elem), attribute name (x->attr),
|
||||
* or PI name (x->pi). This function should ONLY be used directly after the
|
||||
* YXML_ELEMSTART, YXML_ATTRSTART or YXML_PISTART (respectively) tokens have
|
||||
* been returned by yxml_parse(), calling this at any other time may not give
|
||||
* the correct results. This function should also NOT be used on strings other
|
||||
* than x->elem, x->attr or x->pi. */
|
||||
static inline size_t yxml_symlen(yxml_t *x, const char *s) {
|
||||
return (x->stack + x->stacklen) - (const unsigned char*)s;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* vim: set noet sw=4 ts=4: */
|
Loading…
Reference in New Issue