You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
395 lines
9.1 KiB
395 lines
9.1 KiB
2 months ago
|
/*
|
||
|
* UCW Library -- A simple XML parser
|
||
|
*
|
||
|
* (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
|
||
|
*
|
||
|
* This software may be freely distributed and used according to the terms
|
||
|
* of the GNU Lesser General Public License.
|
||
|
*/
|
||
|
|
||
|
#include <ucw/lib.h>
|
||
|
#include <ucw-xml/xml.h>
|
||
|
#include <ucw-xml/dtd.h>
|
||
|
#include <ucw/getopt.h>
|
||
|
#include <ucw/fastbuf.h>
|
||
|
#include <ucw/gary.h>
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <fcntl.h>
|
||
|
|
||
|
enum {
|
||
|
WANT_FIRST = 0x100,
|
||
|
WANT_HIDE_ERRORS,
|
||
|
WANT_IGNORE_COMMENTS,
|
||
|
WANT_IGNORE_PIS,
|
||
|
WANT_REPORT_BLOCKS,
|
||
|
WANT_REPORT_IGNORABLE,
|
||
|
WANT_FILE_ENTITIES,
|
||
|
WANT_QNAMES,
|
||
|
};
|
||
|
|
||
|
static char *shortopts = "spdtn" CF_SHORT_OPTS;
|
||
|
static struct option longopts[] = {
|
||
|
CF_LONG_OPTS
|
||
|
{ "sax", 0, 0, 's' },
|
||
|
{ "pull", 0, 0, 'p' },
|
||
|
{ "dom", 0, 0, 't' },
|
||
|
{ "dtd", 0, 0, 'd' },
|
||
|
{ "namespaces", 0, 0, 'n' },
|
||
|
{ "hide-errors", 0, 0, WANT_HIDE_ERRORS },
|
||
|
{ "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS },
|
||
|
{ "ignore-pis", 0, 0, WANT_IGNORE_PIS },
|
||
|
{ "report-blocks", 0, 0, WANT_REPORT_BLOCKS },
|
||
|
{ "report-ignorable", 0, 0, WANT_REPORT_IGNORABLE },
|
||
|
{ "file-entities", 0, 0, WANT_FILE_ENTITIES },
|
||
|
{ "qnames", 0, 0, WANT_QNAMES },
|
||
|
{ NULL, 0, 0, 0 }
|
||
|
};
|
||
|
|
||
|
static void NONRET
|
||
|
usage(void)
|
||
|
{
|
||
|
fputs("\
|
||
|
Usage: xml-test [options] < input.xml\n\
|
||
|
\n\
|
||
|
Options:\n"
|
||
|
CF_USAGE
|
||
|
"\
|
||
|
-p, --pull Test PULL interface\n\
|
||
|
-s, --sax Test SAX interface\n\
|
||
|
-t, --dom Test DOM interface\n\
|
||
|
-d, --dtd Enable parsing of DTD\n\
|
||
|
-n, --namespaces Resolve namespaces\n\
|
||
|
--hide-errors Hide warnings and error messages\n\
|
||
|
--ignore-comments Ignore comments\n\
|
||
|
--ignore-pis Ignore processing instructions\n\
|
||
|
--report-blocks Report blocks or characters and CDATA sections\n\
|
||
|
--report-ignorable Report ignorable whitespace\n\
|
||
|
--file-entities Resolve file external entities (not fully normative)\n\
|
||
|
--qnames Display qualified names including namespace prefixes\n\
|
||
|
\n", stderr);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
static uint want_sax;
|
||
|
static uint want_pull;
|
||
|
static uint want_dom;
|
||
|
static uint want_ns;
|
||
|
static uint want_parse_dtd;
|
||
|
static uint want_hide_errors;
|
||
|
static uint want_ignore_comments;
|
||
|
static uint want_ignore_pis;
|
||
|
static uint want_report_blocks;
|
||
|
static uint want_report_ignorable;
|
||
|
static uint want_file_entities;
|
||
|
static uint want_qnames;
|
||
|
|
||
|
static struct fastbuf *out;
|
||
|
|
||
|
static char *
|
||
|
node_type(struct xml_node *node)
|
||
|
{
|
||
|
switch (node->type)
|
||
|
{
|
||
|
case XML_NODE_ELEM: return "element";
|
||
|
case XML_NODE_COMMENT: return "comment";
|
||
|
case XML_NODE_PI: return "pi";
|
||
|
case XML_NODE_CHARS: return "chars";
|
||
|
default: return "unknown";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
show_node(struct xml_context *ctx, struct xml_node *node)
|
||
|
{
|
||
|
switch (node->type)
|
||
|
{
|
||
|
case XML_NODE_ELEM:
|
||
|
if (want_ns)
|
||
|
bprintf(out, " (ns%u)<%s>", node->ns, (want_qnames ? xml_node_qname(ctx, node) : node->name));
|
||
|
else
|
||
|
bprintf(out, " <%s>", node->name);
|
||
|
XML_ATTR_FOR_EACH(a, node)
|
||
|
if (want_ns)
|
||
|
bprintf(out, " (ns%u)%s='%s'", a->ns, (want_qnames ? xml_attr_qname(ctx, a) : a->name), a->val);
|
||
|
else
|
||
|
bprintf(out, " %s='%s'", a->name, a->val);
|
||
|
bputc(out, '\n');
|
||
|
break;
|
||
|
case XML_NODE_COMMENT:
|
||
|
bprintf(out, " text='%s'\n", node->text);
|
||
|
break;
|
||
|
case XML_NODE_PI:
|
||
|
bprintf(out, " target=%s text='%s'\n", node->name, node->text);
|
||
|
break;
|
||
|
case XML_NODE_CHARS:
|
||
|
bprintf(out, " text='%s'\n", node->text);
|
||
|
break;
|
||
|
default:
|
||
|
bputc(out, '\n');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
show_tree(struct xml_context *ctx, struct xml_node *node, uint level)
|
||
|
{
|
||
|
if (!node)
|
||
|
return;
|
||
|
bputs(out, "DOM: ");
|
||
|
for (uint i = 0; i < level; i++)
|
||
|
bputs(out, " ");
|
||
|
bputs(out, node_type(node));
|
||
|
show_node(ctx, node);
|
||
|
if (node->type == XML_NODE_ELEM)
|
||
|
XML_NODE_FOR_EACH(son, node)
|
||
|
show_tree(ctx, son, level + 1);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_error(struct xml_context *ctx)
|
||
|
{
|
||
|
bprintf(out, "SAX: %s at %u: %s\n", (ctx->err_code < XML_ERR_ERROR) ? "warn" : "error", xml_row(ctx), ctx->err_msg);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_document_start(struct xml_context *ctx UNUSED)
|
||
|
{
|
||
|
bputs(out, "SAX: document_start\n");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_document_end(struct xml_context *ctx UNUSED)
|
||
|
{
|
||
|
bputs(out, "SAX: document_end\n");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_xml_decl(struct xml_context *ctx)
|
||
|
{
|
||
|
bprintf(out, "SAX: xml_decl version=%s standalone=%d fb_encoding=%s\n", ctx->version_str, ctx->standalone, ctx->src->fb_encoding);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_doctype_decl(struct xml_context *ctx)
|
||
|
{
|
||
|
bprintf(out, "SAX: doctype_decl type=%s public='%s' system='%s' extsub=%d intsub=%d\n",
|
||
|
ctx->doctype, ctx->public_id ? : "", ctx->system_id ? : "",
|
||
|
!!(ctx->flags & XML_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_HAS_INTERNAL_SUBSET));
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_comment(struct xml_context *ctx)
|
||
|
{
|
||
|
bputs(out, "SAX: comment");
|
||
|
show_node(ctx, ctx->node);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_pi(struct xml_context *ctx)
|
||
|
{
|
||
|
bputs(out, "SAX: pi");
|
||
|
show_node(ctx, ctx->node);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_stag(struct xml_context *ctx)
|
||
|
{
|
||
|
bputs(out, "SAX: stag");
|
||
|
show_node(ctx, ctx->node);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_etag(struct xml_context *ctx)
|
||
|
{
|
||
|
bprintf(out, "SAX: etag </%s>\n", ctx->node->name);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_chars(struct xml_context *ctx)
|
||
|
{
|
||
|
bputs(out, "SAX: chars");
|
||
|
show_node(ctx, ctx->node);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_block(struct xml_context *ctx UNUSED, char *text, uint len UNUSED)
|
||
|
{
|
||
|
bprintf(out, "SAX: block text='%s'\n", text);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_cdata(struct xml_context *ctx UNUSED, char *text, uint len UNUSED)
|
||
|
{
|
||
|
bprintf(out, "SAX: cdata text='%s'\n", text);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_ignorable(struct xml_context *ctx UNUSED, char *text, uint len UNUSED)
|
||
|
{
|
||
|
bprintf(out, "SAX: ignorable text='%s'\n", text);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_dtd_start(struct xml_context *ctx UNUSED)
|
||
|
{
|
||
|
bputs(out, "SAX: dtd_start\n");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_dtd_end(struct xml_context *ctx UNUSED)
|
||
|
{
|
||
|
bputs(out, "SAX: dtd_end\n");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
h_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *e)
|
||
|
{
|
||
|
xml_push_fastbuf(ctx, bopen(e->system_id, O_RDONLY, 4096));
|
||
|
}
|
||
|
|
||
|
int
|
||
|
main(int argc, char **argv)
|
||
|
{
|
||
|
int opt;
|
||
|
cf_def_file = NULL;
|
||
|
log_init(argv[0]);
|
||
|
while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0)
|
||
|
switch (opt)
|
||
|
{
|
||
|
case 's':
|
||
|
want_sax++;
|
||
|
break;
|
||
|
case 'p':
|
||
|
want_pull++;
|
||
|
break;
|
||
|
case 't':
|
||
|
want_dom++;
|
||
|
break;
|
||
|
case 'd':
|
||
|
want_parse_dtd++;
|
||
|
break;
|
||
|
case 'n':
|
||
|
want_ns++;
|
||
|
break;
|
||
|
case WANT_HIDE_ERRORS:
|
||
|
want_hide_errors++;
|
||
|
break;
|
||
|
case WANT_IGNORE_COMMENTS:
|
||
|
want_ignore_comments++;
|
||
|
break;
|
||
|
case WANT_IGNORE_PIS:
|
||
|
want_ignore_pis++;
|
||
|
break;
|
||
|
case WANT_REPORT_BLOCKS:
|
||
|
want_report_blocks++;
|
||
|
break;
|
||
|
case WANT_REPORT_IGNORABLE:
|
||
|
want_report_ignorable++;
|
||
|
break;
|
||
|
case WANT_FILE_ENTITIES:
|
||
|
want_file_entities++;
|
||
|
break;
|
||
|
case WANT_QNAMES:
|
||
|
want_qnames++;
|
||
|
break;
|
||
|
default:
|
||
|
usage();
|
||
|
}
|
||
|
if (optind != argc)
|
||
|
usage();
|
||
|
|
||
|
out = bfdopen_shared(1, 4096);
|
||
|
struct xml_context ctx;
|
||
|
xml_init(&ctx);
|
||
|
if (!want_hide_errors)
|
||
|
ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error;
|
||
|
if (want_sax)
|
||
|
{
|
||
|
ctx.h_document_start = h_document_start;
|
||
|
ctx.h_document_end = h_document_end;
|
||
|
ctx.h_xml_decl = h_xml_decl;
|
||
|
ctx.h_doctype_decl = h_doctype_decl;
|
||
|
ctx.h_comment = h_comment;
|
||
|
ctx.h_pi = h_pi;
|
||
|
ctx.h_stag = h_stag;
|
||
|
ctx.h_etag = h_etag;
|
||
|
ctx.h_chars = h_chars;
|
||
|
if (want_report_blocks)
|
||
|
{
|
||
|
ctx.h_block = h_block;
|
||
|
ctx.h_cdata = h_cdata;
|
||
|
}
|
||
|
if (want_report_ignorable)
|
||
|
ctx.h_ignorable = h_ignorable;
|
||
|
ctx.h_dtd_start = h_dtd_start;
|
||
|
ctx.h_dtd_end = h_dtd_end;
|
||
|
}
|
||
|
if (want_dom)
|
||
|
ctx.flags |= XML_ALLOC_ALL;
|
||
|
if (want_parse_dtd)
|
||
|
ctx.flags |= XML_PARSE_DTD;
|
||
|
if (want_ignore_comments)
|
||
|
ctx.flags &= ~(XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS);
|
||
|
if (want_ignore_pis)
|
||
|
ctx.flags &= ~(XML_REPORT_PIS | XML_ALLOC_PIS);
|
||
|
if (want_file_entities)
|
||
|
ctx.h_resolve_entity = h_resolve_entity;
|
||
|
if (want_ns)
|
||
|
xml_ns_enable(&ctx);
|
||
|
xml_push_fastbuf(&ctx, bfdopen_shared(0, 4096));
|
||
|
bputs(out, "PULL: start\n");
|
||
|
if (want_pull)
|
||
|
{
|
||
|
ctx.pull = XML_PULL_CHARS | XML_PULL_STAG | XML_PULL_ETAG | XML_PULL_COMMENT | XML_PULL_PI;
|
||
|
uint state;
|
||
|
while (state = xml_next(&ctx))
|
||
|
switch (state)
|
||
|
{
|
||
|
case XML_STATE_CHARS:
|
||
|
bputs(out, "PULL: chars");
|
||
|
show_node(&ctx, ctx.node);
|
||
|
break;
|
||
|
case XML_STATE_STAG:
|
||
|
bputs(out, "PULL: stag");
|
||
|
show_node(&ctx, ctx.node);
|
||
|
break;
|
||
|
case XML_STATE_ETAG:
|
||
|
bprintf(out, "PULL: etag </%s>\n", ctx.node->name);
|
||
|
break;
|
||
|
case XML_STATE_COMMENT:
|
||
|
bputs(out, "PULL: comment");
|
||
|
show_node(&ctx, ctx.node);
|
||
|
break;
|
||
|
case XML_STATE_PI:
|
||
|
bputs(out, "PULL: pi");
|
||
|
show_node(&ctx, ctx.node);
|
||
|
break;
|
||
|
default:
|
||
|
bputs(out, "PULL: unknown\n");
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
xml_parse(&ctx);
|
||
|
if (ctx.err_code)
|
||
|
bprintf(out, "PULL: fatal error at %u: %s\n", xml_row(&ctx), ctx.err_msg);
|
||
|
else
|
||
|
{
|
||
|
bputs(out, "PULL: eof\n");
|
||
|
if (want_dom)
|
||
|
show_tree(&ctx, ctx.dom, 0);
|
||
|
}
|
||
|
|
||
|
if (want_ns)
|
||
|
{
|
||
|
bputs(out, "Known namespaces:\n");
|
||
|
for (uns i=0; i < GARY_SIZE(ctx.ns_by_id); i++)
|
||
|
bprintf(out, "%u\t%s\n", i, ctx.ns_by_id[i]);
|
||
|
}
|
||
|
|
||
|
xml_cleanup(&ctx);
|
||
|
bclose(out);
|
||
|
return 0;
|
||
|
}
|