Browse Source

shared: add simplistic XML parser for usage in the D-Bus policy language compat parser

keep-around/ba91431154ad7bac82ddf0a540ec1b40db62d782
Lennart Poettering 9 years ago
parent
commit
08bcebf36e
  1. 1
      .gitignore
  2. 13
      Makefile.am
  3. 216
      src/shared/xml.c
  4. 34
      src/shared/xml.h
  5. 83
      src/test/test-xml.c

1
.gitignore

@ -186,6 +186,7 @@
/test-utf8
/test-util
/test-watchdog
/test-xml
/timedatectl
/udevadm
/undefined

13
Makefile.am

@ -779,7 +779,9 @@ libsystemd_shared_la_SOURCES = \
src/shared/syscall-list.c \
src/shared/syscall-list.h \
src/shared/audit.c \
src/shared/audit.h
src/shared/audit.h \
src/shared/xml.c \
src/shared/xml.h
nodist_libsystemd_shared_la_SOURCES = \
src/shared/errno-from-name.h \
@ -1156,7 +1158,8 @@ tests += \
test-hashmap \
test-list \
test-tables \
test-device-nodes
test-device-nodes \
test-xml
EXTRA_DIST += \
test/sched_idle_bad.service \
@ -1258,6 +1261,12 @@ test_hashmap_SOURCES = \
test_hashmap_LDADD = \
libsystemd-core.la
test_xml_SOURCES = \
src/test/test-xml.c
test_xml_LDADD = \
libsystemd-shared.la
test_list_SOURCES = \
src/test/test-list.c

216
src/shared/xml.c

@ -0,0 +1,216 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2013 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <string.h>
#include "util.h"
#include "xml.h"
enum {
STATE_TEXT,
STATE_TAG,
STATE_ATTRIBUTE,
};
/* We don't actually do real XML here. We only read a simplistic
* subset, that is a bit less strict that XML and lacks all the more
* complex features, like entities, or namespaces. However, we do
* support some HTML5-like simplifications */
int xml_tokenize(const char **p, char **name, void **state) {
const char *c, *e, *b;
char *ret;
int t;
assert(p);
assert(*p);
assert(name);
assert(state);
t = PTR_TO_INT(*state);
c = *p;
for (;;) {
if (*c == 0)
return XML_END;
switch (t) {
case STATE_TEXT: {
int x;
e = strchrnul(c, '<');
if (e > c) {
/* More text... */
ret = strndup(c, e - c);
if (!ret)
return -ENOMEM;
*name = ret;
*p = e;
*state = INT_TO_PTR(STATE_TEXT);
return XML_TEXT;
}
assert(*e == '<');
b = c + 1;
if (startswith(b, "!--")) {
/* A comment */
e = strstr(b + 3, "-->");
if (!e)
return -EINVAL;
c = e + 3;
continue;
}
if (*b == '?') {
/* Processing instruction */
e = strstr(b + 1, "?>");
if (!e)
return -EINVAL;
c = e + 2;
continue;
}
if (*b == '!') {
/* DTD */
e = strchr(b + 1, '>');
if (!e)
return -EINVAL;
c = e + 1;
continue;
}
if (*b == '/') {
/* A closing tag */
x = XML_TAG_CLOSE;
b++;
} else
x = XML_TAG_OPEN;
e = strpbrk(b, WHITESPACE "/>");
if (!e)
return -EINVAL;
ret = strndup(b, e - b);
if (!ret)
return -ENOMEM;
*name = ret;
*p = e;
*state = INT_TO_PTR(STATE_TAG);
return x;
}
case STATE_TAG:
b = c + strspn(c, WHITESPACE);
if (*b == 0)
return -EINVAL;
e = b + strcspn(b, WHITESPACE "=/>");
if (e > b) {
/* An attribute */
ret = strndup(b, e - b);
if (!ret)
return -ENOMEM;
*name = ret;
*p = e;
*state = INT_TO_PTR(STATE_ATTRIBUTE);
return XML_ATTRIBUTE_NAME;
}
if (startswith(b, "/>")) {
/* An empty tag */
*name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
*p = b + 2;
*state = INT_TO_PTR(STATE_TEXT);
return XML_TAG_CLOSE_EMPTY;
}
if (*b != '>')
return -EINVAL;
c = b + 1;
t = STATE_TEXT;
continue;
case STATE_ATTRIBUTE:
if (*c == '=') {
c++;
if (*c == '\'' || *c == '\"') {
/* Tag with a quoted value */
e = strchr(c+1, *c);
if (!e)
return -EINVAL;
ret = strndup(c+1, e - c - 1);
if (!ret)
return -ENOMEM;
*name = ret;
*p = e + 1;
*state = INT_TO_PTR(STATE_TAG);
return XML_ATTRIBUTE_VALUE;
}
/* Tag with a value without quotes */
b = strpbrk(c, WHITESPACE ">");
if (!b)
b = c;
ret = strndup(c, b - c);
if (!ret)
return -ENOMEM;
*name = ret;
*p = b;
*state = INT_TO_PTR(STATE_TAG);
return XML_ATTRIBUTE_VALUE;
}
t = STATE_TAG;
continue;
}
}
assert_not_reached("Bad state");
}

34
src/shared/xml.h

@ -0,0 +1,34 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
#pragma once
/***
This file is part of systemd.
Copyright 2013 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
enum {
XML_END,
XML_TEXT,
XML_TAG_OPEN,
XML_TAG_CLOSE,
XML_TAG_CLOSE_EMPTY,
XML_ATTRIBUTE_NAME,
XML_ATTRIBUTE_VALUE
};
int xml_tokenize(const char **p, char **name, void **state);

83
src/test/test-xml.c

@ -0,0 +1,83 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2013 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <stdarg.h>
#include "xml.h"
#include "util.h"
static void test_one(const char *data, ...) {
void *state = NULL;
va_list ap;
va_start(ap, data);
for (;;) {
_cleanup_free_ char *name = NULL;
int t, tt;
const char *nn;
t = xml_tokenize(&data, &name, &state);
assert_se(t >= 0);
tt = va_arg(ap, int);
assert_se(tt >= 0);
assert_se(t == tt);
if (t == XML_END)
break;
nn = va_arg(ap, const char *);
assert_se(streq_ptr(nn, name));
}
va_end(ap);
}
int main(int argc, char *argv[]) {
test_one("", XML_END);
test_one("<foo></foo>",
XML_TAG_OPEN, "foo",
XML_TAG_CLOSE, "foo",
XML_END);
test_one("<foo waldo=piep meh=\"huhu\"/>",
XML_TAG_OPEN, "foo",
XML_ATTRIBUTE_NAME, "waldo",
XML_ATTRIBUTE_VALUE, "piep",
XML_ATTRIBUTE_NAME, "meh",
XML_ATTRIBUTE_VALUE, "huhu",
XML_TAG_CLOSE_EMPTY, NULL,
XML_END);
test_one("xxxx\n"
"<foo><?xml foo?> <!-- zzzz --> </foo>",
XML_TEXT, "xxxx\n",
XML_TAG_OPEN, "foo",
XML_TEXT, " ",
XML_TEXT, " ",
XML_TAG_CLOSE, "foo",
XML_END);
return 0;
}
Loading…
Cancel
Save