Browse Source

use perfect hash table made by gperf in sax_cb

master
Shokara 7 months ago
parent
commit
17be38e5f9
Signed by: shokara GPG Key ID: 7F5702DEF8E6DC82
  1. 2
      Makefile
  2. 1
      README
  3. 1
      TODO
  4. 85
      gperf.c
  5. 39
      gperf.h
  6. 18
      parse.c
  7. 13
      sax-elements

2
Makefile

@ -2,7 +2,7 @@ NAME=rssdl
CC=c89
CFLAGS=-O2 -std=c89 -g -I. -Wall -Wextra -pedantic -D_XOPEN_SOURCE=500
CFILES=main.c fetch.c parse.c save.c hash.c
CFILES=main.c fetch.c parse.c save.c hash.c gperf.c
LIBS=-lcurl -lmxml
all:

1
README

@ -9,6 +9,7 @@ Dependencies
- POSIX-compliant system
- curl
- mxml
- gperf # only needed when generating gperf.c/h
Usage
-----

1
TODO

@ -1,7 +1,6 @@
TODO
====
- add atom support
- check element against a hash table in sax_cb when parsing
- add unit tests via something like MinUnit/JTN002
revisit later

85
gperf.c

@ -0,0 +1,85 @@
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -cC -W elements sax-elements | indent -i8 -linux */
/* Computed positions: -k'4' */
#include <stdio.h>
#include <string.h>
#include "gperf.h"
static unsigned int hash(register const char *str, register size_t len)
{
static const unsigned char asso_values[] = {
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 9, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 15, 23, 0,
10, 23, 23, 23, 23, 23, 23, 0, 5, 5,
10, 23, 23, 23, 0, 23, 0, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23
};
register unsigned int hval = len;
switch (hval) {
default:
hval += asso_values[(unsigned char)str[3]];
/*FALLTHROUGH*/ case 3:
break;
}
return hval;
}
const char *in_word_set(register const char *str, register size_t len)
{
static const char *const elements[] = {
"", "", "",
"rss",
"link",
"entry",
"",
"content",
"",
"item",
"title",
"description",
"summary",
"",
"feed",
"content:encoded",
"pubDate",
"channel",
"", "", "", "",
"updated"
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) {
register unsigned int key = hash(str, len);
if (key <= MAX_HASH_VALUE) {
register const char *s = elements[key];
if (*str == *s && !strncmp(str + 1, s + 1, len - 1)
&& s[len] == '\0')
return s;
}
}
return 0;
}

39
gperf.h

@ -0,0 +1,39 @@
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -cC -W elements sax-elements | indent -i8 -linux */
/* Computed positions: -k'4' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
#endif
#define TOTAL_KEYWORDS 13
#define MIN_WORD_LENGTH 3
#define MAX_WORD_LENGTH 15
#define MIN_HASH_VALUE 3
#define MAX_HASH_VALUE 22
/* maximum key range = 20, duplicates = 0 */
const char *in_word_set(register const char *str, register size_t len);

18
parse.c

@ -1,5 +1,6 @@
#include <string.h>
#include <mxml.h>
#include "gperf.h"
#include "parse.h"
mxml_node_t *tree;
@ -10,21 +11,8 @@ void sax_cb(mxml_node_t *curnode, mxml_sax_event_t event, void *data)
{
if (event == MXML_SAX_ELEMENT_OPEN) {
const char *element = mxmlGetElement(curnode);
/* TODO use a hash table here */
if (!strcmp(element, "rss") ||
!strcmp(element, "feed") ||
!strcmp(element, "updated") ||
!strcmp(element, "channel") ||
!strcmp(element, "item") ||
!strcmp(element, "entry") ||
!strcmp(element, "title") ||
!strcmp(element, "link") ||
!strcmp(element, "updated") ||
!strcmp(element, "pubDate") ||
!strcmp(element, "description") ||
!strcmp(element, "summary") ||
!strcmp(element, "content") ||
!strcmp(element, "content:encoded"))
size_t len = strlen(element);
if (in_word_set(element, len))
mxmlRetain(curnode);
} else if (event == MXML_SAX_DIRECTIVE) {
mxmlRetain(curnode);

13
sax-elements

@ -0,0 +1,13 @@
channel
content
content:encoded
description
entry
feed
item
link
pubDate
rss
summary
title
updated
Loading…
Cancel
Save