From 24071a313343becd6e3574c21bc7f1a77e4bb9ae Mon Sep 17 00:00:00 2001 From: mfranz Date: Sun, 29 Jun 2008 13:08:24 +0000 Subject: [PATCH] Erik HOFMAN: faster and better xmlgrep implementation --- utils/xmlgrep/Makefile.am | 3 +- utils/xmlgrep/xml.c | 733 ++++++++++++++++++++++++++++++++++++++ utils/xmlgrep/xml.h | 191 ++++++++++ utils/xmlgrep/xmlgrep.c | 292 +++++++++++++++ utils/xmlgrep/xmlgrep.cxx | 249 ------------- 5 files changed, 1217 insertions(+), 251 deletions(-) create mode 100644 utils/xmlgrep/xml.c create mode 100644 utils/xmlgrep/xml.h create mode 100644 utils/xmlgrep/xmlgrep.c delete mode 100644 utils/xmlgrep/xmlgrep.cxx diff --git a/utils/xmlgrep/Makefile.am b/utils/xmlgrep/Makefile.am index 4c0bbe2eb..cd314d4d8 100644 --- a/utils/xmlgrep/Makefile.am +++ b/utils/xmlgrep/Makefile.am @@ -1,4 +1,3 @@ noinst_PROGRAMS = xmlgrep -xmlgrep_SOURCES = xmlgrep.cxx -xmlgrep_LDADD = -lsgstructure -lsgprops -lsgmisc -lsgdebug -lsgxml +xmlgrep_SOURCES = xmlgrep.c xml.c xml.h diff --git a/utils/xmlgrep/xml.c b/utils/xmlgrep/xml.c new file mode 100644 index 000000000..b464724e0 --- /dev/null +++ b/utils/xmlgrep/xml.c @@ -0,0 +1,733 @@ +/* Copyright (c) 2007,2008 by Adalin B.V. + * Copyright (c) 2007,2008 by Erik Hofman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of (any of) the copyrightholder(s) nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include + +typedef struct +{ + HANDLE m; + void *p; +} SIMPLE_UNMMAP; + +static SIMPLE_UNMMAP un; + +/* + * map 'filename' and return a pointer to it. + */ +void *simple_mmap(int, unsigned int, SIMPLE_UNMMAP *); +void simple_unmmap(SIMPLE_UNMMAP *); + +#define mmap(a,b,c,d,e,f) simple_mmap((e), (b), &un) +#define munmap(a,b) simple_unmmap(&un) + +#else /* !WIN32 */ +# include +# include +#endif + +#include +#include +#include +#include + +#include +#include +#include + + +struct _xml_id +{ + char *start; + size_t len; + int fd; +}; + +static char *__xmlCopyNode(char *, size_t, const char *); +static char *__xmlGetNode(char *, size_t, const char *, size_t *); +static char *__xmlFindNextElement(char *, size_t, const char *); +static void *memncasemem(const void *, size_t, const void *, size_t); + +void * +xmlOpen(char *fn) +{ + struct _xml_id *id = 0; + + if (fn) + { + int fd = open(fn, O_RDONLY); + if (fd > 0) + { + id = malloc(sizeof(struct _xml_id)); + if (id) + { + struct stat statbuf; + + fstat(fd, &statbuf); + + id->fd = fd; + id->len = statbuf.st_size; + id->start = mmap(0, id->len, PROT_READ, MAP_PRIVATE, fd, 0L); + } + } + } + + return (void*)id; +} + +void +xmlClose(void *id) +{ + if (id) + { + struct _xml_id *xid = (struct _xml_id *)id; + munmap(xid->start, xid->len); + close(xid->fd); + free(id); + id = 0; + } +} + +void * +xmlCopyNode(void *id, char *path) +{ + struct _xml_id *xsid = 0; + + if (id && path) + { + struct _xml_id *xid = (struct _xml_id *)id; + char *ptr, *p; + size_t rlen; + + ptr = __xmlGetNode(xid->start, xid->len, path, &rlen); + if (ptr) + { + xsid = malloc(sizeof(struct _xml_id) + rlen); + if (xsid) + { + p = (char *)xsid + sizeof(struct _xml_id); + + xsid->len = rlen; + xsid->start = p; + xsid->fd = 0; + + memcpy(xsid->start, ptr, rlen); + } + } + } + + return (void *)xsid; +} + +void * +xmlGetNode(void *id, char *path) +{ + struct _xml_id *xsid = 0; + + if (id && path) + { + struct _xml_id *xid = (struct _xml_id *)id; + size_t rlen; + char *ptr; + + ptr = __xmlGetNode(xid->start, xid->len, path, &rlen); + if (ptr) + { + xsid = malloc(sizeof(struct _xml_id)); + xsid->len = rlen; + xsid->start = ptr; + xsid->fd = 0; + } + } + + return (void *)xsid; +} + +void * +xmlGetNextElement(const void *pid, void *id, char *path) +{ + struct _xml_id *xpid = (struct _xml_id *)pid; + struct _xml_id *xsid = 0; + + if (id && path) + { + struct _xml_id *xid = (struct _xml_id *)id; + size_t rlen, nlen; + char *ptr; + + if (xid->len < xpid->len) xid->start += xid->len; + nlen = xpid->len - (xid->start - xpid->start); + + ptr = __xmlGetNode(xid->start, nlen, path, &rlen); + if (ptr) + { + xid->len = rlen; + xid->start = ptr; + xsid = xid; + } + } + + return (void *)xsid; +} + +int +xmlCompareString(const void *id, const char *s) +{ + struct _xml_id *xid = (struct _xml_id *)id; + int ret = -1; + + if (xid && xid->len && s && (strlen(s) > 0)) + { + ret = strncasecmp(xid->start, s, xid->len); + } + + return ret; +} + +int +xmlCompareNodeString(const void *id, const char *path, const char *s) +{ + struct _xml_id *xid = (struct _xml_id *)id; + int ret = -1; + + if (xid && xid->len && path && s && (strlen(s) > 0)) + { + size_t rlen; + char *str; + + str = __xmlGetNode(xid->start, xid->len, path, &rlen); + if (str) ret = strncasecmp(str, s, rlen); + } + + return ret; +} + +char * +xmlGetNodeString(void *id, const char *path) +{ + struct _xml_id *xid = (struct _xml_id *)id; + char *str = 0; + + if (xid && xid->len && path) + { + str = __xmlCopyNode(xid->start, xid->len, path); + if (str) + { + char *ps, *pe, *pem; + int slen; + + slen = strlen(str); + ps = str; + pe = pem = ps+slen; + + while ((psps) && isspace(*pe)) pe--; + + if (pestr) && slen) memmove(str, ps, slen+1); + else if (!slen) *str = 0; + } + } + + return str; +} + +char * +xmlGetString(void *id) +{ + struct _xml_id *xid = (struct _xml_id *)id; + char *str = 0; + + if (xid && xid->len) + { + str = malloc(xid->len+1); + if (str) + { + char *ps, *pe, *pem; + int slen; + + slen = xid->len; + memcpy(str, xid->start, slen); + *(str+slen) = 0; + + ps = str; + pe = pem = ps+slen; + + while ((psps) && isspace(*pe)) pe--; + + if (pestr) && slen) memmove(str, ps, slen+1); + else if (!slen) *str = 0; + } + } + + return str; +} + +unsigned int +xmlCopyString(void *id, const char *path, char *buffer, unsigned int buflen) +{ + struct _xml_id *xid = (struct _xml_id *)id; + unsigned int rlen = 0; + + if (xid && xid->len && path && buffer && buflen) + { + char *str; + + *buffer = 0; + str = __xmlGetNode(xid->start, xid->len, path, &rlen); + if (str) + { + char *ps, *pe; + + ps = str; + pe = ps+rlen-1; + + while ((psps) && isspace(*pe)) pe--; + + rlen = (pe-ps)+1; + if (rlen >= buflen) rlen = buflen-1; + + memcpy(buffer, ps, rlen); + str = buffer + rlen; + *str = 0; + } + } + + return rlen; +} + +long int +xmlGetNodeInt(void *id, const char *path) +{ + struct _xml_id *xid = (struct _xml_id *)id; + long int li = 0; + + if (path && xid && xid->len) + { + unsigned int rlen; + char *str; + + str = __xmlGetNode(xid->start, xid->len, path, &rlen); + if (str) li = strtol(str, (char **)NULL, 10); + } + + return li; +} + +long int +xmlGetInt(void *id) +{ + struct _xml_id *xid = (struct _xml_id *)id; + long int li = 0; + + if (xid && xid->len) + li = strtol(xid->start, (char **)NULL, 10); + + return li; +} + +double +xmlGetNodeDouble(void *id, const char *path) +{ + struct _xml_id *xid = (struct _xml_id *)id; + double d = 0.0; + + if (path && xid && xid->len) + { + unsigned int rlen; + char *str; + + str = __xmlGetNode(xid->start, xid->len, path, &rlen); + + if (str) d = strtod(str, (char **)NULL); + } + + return d; +} + +double +xmlGetDouble(void *id) +{ + struct _xml_id *xid = (struct _xml_id *)id; + double d = 0.0; + + if (xid && xid->len) + d = strtod(xid->start, (char **)NULL); + + return d; +} + + +unsigned int +xmlGetNumElements(void *id, const char *path) +{ + struct _xml_id *xid = (struct _xml_id *)id; + unsigned ret = 0; + + if (xid && xid->len && path) + { + unsigned int clen; + char *p, *pathname; + char *pname, *nname; + + pathname = (char *)path; + if (*path == '/') pathname++; + + nname = strrchr(pathname, '/'); + if (nname) + { + unsigned int plen = nname-pathname; + + pname = calloc(1, plen+1); + memcpy(pname, path, plen); + + p = __xmlGetNode(xid->start, xid->len, pname, &clen); + free(pname); + } + else + { + nname = (char *)pathname; + p = (char *)xid->start; + clen = xid->len; + } + + while ((p = __xmlFindNextElement(p, clen, nname)) != 0) + ret++; + } + + return ret; +} + +void * +xmlMarkId(void *id) +{ + struct _xml_id *xmid = 0; + + if (id) + { + xmid = malloc(sizeof(struct _xml_id)); + if (xmid) + { + memcpy(xmid, id, sizeof(struct _xml_id)); + xmid->fd = 0; + } + } + + return (void *)xmid; +} + +/* -------------------------------------------------------------------------- */ + +char * +__xmlCopyNode(char *start, size_t len, const char *path) +{ + char *p, *ret = 0; + size_t rlen; + + p = __xmlGetNode(start, len, path, &rlen); + if (p && rlen) + { + ret = calloc(1, rlen+1); + memcpy(ret, p, rlen); + } + + return ret; +} + +char * +__xmlGetNode(char *start, size_t len, const char *path, size_t *rlen) +{ + char *ret = 0; + + if (len) + { + char last_node = 0; + char *ptr, *name; + int plen; + + name = (char *)path; + if (*name == '/') name++; /* skip the leading '/' character */ + + ptr = strchr(name, '/'); + if (!ptr) + { + last_node = 1; + ptr = name+strlen(name); + } + plen = ptr - name; + + if (plen) + { + char *p, *cur; + + cur = start; + do + { + if ((p = memncasemem(cur, len, name, plen)) != 0) + { + len -= (p + plen) - cur; + cur = p + plen; + } + } + while (p && (*(p-1) != '<')); + + if (p) + { + p = cur; + while ((*cur++ != '>') && (cur<(p+len))); + len -= cur - p; + + if (last_node) + { + char *rptr = cur; + do + { + if ((p = memncasemem(cur, len, name, plen)) != 0) + { + len -= (p + plen) - cur; + cur = p + plen; + if (*(p-2) == '<' && *(p-1) == '/' + && *(p+plen) == '>') break; + } + } + while (p); + + if (p) + { + *rlen = p-rptr-2; + ret = rptr; + } + } + else + ret = __xmlGetNode(cur, len, ptr+1, rlen); + } + } + } + + return ret; +} + +char * +__xmlFindNextElement(char *start, size_t len, const char *name) +{ + char *ret = 0; + + if (start && len && name) + { + unsigned int plen; + + plen = strlen(name); + if (plen) + { + char *p, *cur; + + cur = start; + do + { + if ((p = memncasemem(cur, len, name, plen)) != 0) + { + len -= (p + plen) - cur; + cur = p + plen; + } + } + while (p && (*(p-1) != '<')); + + if (p) + { + char *rptr = cur; + + p = cur; + while ((*cur++ != '>') && (cur<(p+len))); + len -= cur - p; + + do + { + if ((p = memncasemem(cur, len, name, plen)) != 0) + { + len -= (p + plen) - cur; + cur = p + plen; + if (*(p-2) == '<' && *(p-1) == '/' && *(p+plen) == '>') + break; + } + } + while (p); + + ret = rptr; + } + } + } + + return ret; +} + + +#define CASECMP(a,b) ( ((a) & 0xdf) == ((b) & 0xdf) ) +#define NOCASECMP(a,b) ( ((a)^(b)) & 0xdf ) + +void * +memncasemem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen) +{ + void *rptr = 0; + + if (haystack && needle && (needlelen > 0) && (haystacklen >= needlelen)) + { + const char *ne = (const char *)needle + needlelen; + const char *he = (const char *)haystack + haystacklen; + const char *hne = he - needlelen; + char *ns, *hs = (char *)haystack; + + do + { + rptr = 0; + ns = (char *)needle; + while((hs <= hne) && NOCASECMP(*hs,*ns)) + hs++; + + if (hs < hne) + { + rptr = hs; + while((hs < he) && (ns < ne) && !NOCASECMP(*hs,*ns)) + { + hs++; + ns++; + } + } + else break; + } + while (ns < ne); + } + + return rptr; +} + +#if 0 +const unsigned char * +boyermoore_horspool_memmem(const unsigned char* haystack, size_t hlen, + const unsigned char* needle, size_t nlen) +{ + size_t scan = 0; + size_t bad_char_skip[UCHAR_MAX + 1]; /* Officially called: + * bad character shift */ + + /* Sanity checks on the parameters */ + if (nlen <= 0 || !haystack || !needle) + return NULL; + + /* ---- Preprocess ---- */ + /* Initialize the table to default value */ + /* When a character is encountered that does not occur + * in the needle, we can safely skip ahead for the whole + * length of the needle. + */ + for (scan = 0; scan <= UCHAR_MAX; scan = scan + 1) + bad_char_skip[scan] = nlen; + + /* C arrays have the first byte at [0], therefore: + * [nlen - 1] is the last byte of the array. */ + size_t last = nlen - 1; + + /* Then populate it with the analysis of the needle */ + for (scan = 0; scan < last; scan = scan + 1) + bad_char_skip[needle[scan]] = last - scan; + + /* ---- Do the matching ---- */ + + /* Search the haystack, while the needle can still be within it. */ + while (hlen >= nlen) + { + /* scan from the end of the needle */ + for (scan = last; haystack[scan] == needle[scan]; scan = scan - 1) + if (scan == 0) /* If the first byte matches, we've found it. */ + return haystack; + + /* otherwise, we need to skip some bytes and start again. + Note that here we are getting the skip value based on the last byte + of needle, no matter where we didn't match. So if needle is: "abcd" + then we are skipping based on 'd' and that value will be 4, and + for "abcdd" we again skip on 'd' but the value will be only 1. + The alternative of pretending that the mismatched character was + the last character is slower in the normal case (Eg. finding + "abcd" in "...azcd..." gives 4 by using 'd' but only + 4-2==2 using 'z'. */ + hlen -= bad_char_skip[haystack[last]]; + haystack += bad_char_skip[haystack[last]]; + } + + return NULL; +} +#endif + +#ifdef WIN32 +/* Source: + * https://mollyrocket.com/forums/viewtopic.php?p=2529 + */ + +void * +simple_mmap(int fd, unsigned int length, SIMPLE_UNMMAP *un) +{ + HANDLE f; + HANDLE m; + void *p; + + f = (HANDLE)_get_osfhandle(fd); + if (!f) return NULL; + + m = CreateFileMapping(f, NULL, PAGE_READONLY, 0, 0, NULL); + if (!m) return NULL; + + p = MapViewOfFile(m, FILE_MAP_READ, 0,0,0); + if (!p) + { + CloseHandle(m); + return NULL; + } + + if (n) *n = GetFileSize(f, NULL); + + if (un) + { + un->m = m; + un->p = p; + } + + return p; +} + +void +simple_unmmap(SIMPLE_UNMMAP *un) +{ + UnmapViewOfFile(un->p); + CloseHandle(un->m); +} +#endif diff --git a/utils/xmlgrep/xml.h b/utils/xmlgrep/xml.h new file mode 100644 index 000000000..5308ae900 --- /dev/null +++ b/utils/xmlgrep/xml.h @@ -0,0 +1,191 @@ +/* Copyright (c) 2007, 2008 by Adalin B.V. + * Copyright (c) 2007, 2008 by Erik Hofman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of (any of) the copyrightholder(s) nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __XML_CONFIG +#define __XML_CONFIG 1 + +/** + * Open an XML file for processing + * + * @param fname path to the file + * @return XML-id which is used for further processing + */ +void *xmlOpen(const char *); + +/** + * Close the XML file after which no further processing is possible + * + * @param xid XML-id + */ +void xmlClose(const void *); + + +/** + * Locate a subsection of the xml tree for further processing. + * This adds processing speed since the reuired nodes will only be searched + * in the subsection. + * + * The memory allocated for the XML-subsection-id has to be freed by the + * calling program. + * + * @param xid XML-id + * @param node path to the node containing the subsection + * @return XML-subsection-id for further processing + */ +void *xmlGetNode(const void *, const char *); + +/** + * Copy a subsection of the xml tree for further processing. + * This is useful when it's required to process a section of the XML code + * after the file has been closed. The drawback is the added memory + * requirements. + * + * The memory allocated for the XML-subsection-id has to be freed by the + * calling program. + * + * @param xid XML-id + * @param node path to the node containing the subsection + * @return XML-subsection-id for further processing + */ +void *xmlCopyNode(void *, const char *); + +/** + * Get the number of elements with the same name from a specified xml path + * + * @param xid XML-id + * @param path path to the xml node + * @return the number count of the nodename + */ +unsigned int xmlGetNumElements(void *, const char *); + +/** + * Get the next occurrence of element in the parent node + * + * @param pid XML-id of the parent node of this node + * @param xid XML-id + * @param element name of the element to search for + * @return XML-subsection-id for further processing + */ +void *xmlGetNextElement(const void *, void *, const char *); + +/** + * Compare the value of this element to a reference string. + * Comparing is done in a case insensitive way. + * + * @param xid XML-id + * @param s the string to compare to. + * @return an integer less than, equal to, ro greater than zero if the value + * of the node is found, respectively, to be less than, to match, or be greater + * than s + */ +int xmlCompareString(const void *, const char *); + + +/** + * Get a string of characters from a specified xml path + * This function has the advantage of not allocating its own return buffer, + * keeping the memory management to an absolute minimum but the disadvantage + * is that it's unreliable in multithread environments. + * + * @param xid XML-id + * @param path path to the xml node + * @param buffer the buffer to copy the string to + * @param buflen length of the destination buffer + * @return the length of the string + */ +unsigned int xmlCopyNodeString(void *, const char *, char *, const unsigned int); + +/** + * Get a string of characters from the current node + * The returned string has to be freed by the calling program. + * + * @param xid XML-id + * @return a newly alocated string containing the contents of the node. + */ +char *xmlGetString(void *); + +/** + * Get a string of characters from a specified xml path + * The returned string has to be freed by the calling program. + * + * @param xid XML-id + * @param path path to the xml node + * @return a newly alocated string containing the contents of the node. + */ +char *xmlGetNodeString(void *, const char *); + +/** + * Compare the value of a node to a reference string. + * Comparing is done in a case insensitive way. + * + * @param xid XML-id + * @param path path to the xml node to compare to + * @param s the string to compare to. + * @return an integer less than, equal to, ro greater than zero if the value + * of the node is found, respectively, to be less than, to match, or be greater + * than s + */ +int xmlCompareNodeString(const void *, const char *, const char *); + +/** + * Get the integer value from the current node + * + * @param xid XML-id + * @return the contents of the node converted to an integer value. + */ +long int xmlGetInt(void *); + +/** + * Get an integer value from a specified xml path + * + * @param xid XML-id + * @param path path to the xml node + * @return the contents of the node converted to an integer value. + */ +long int xmlGetNodeInt(void *, const char *); + +/** + * Get the double value from the curent node + * + * @param xid XML-id + * @return the contents of the node converted to a double value. + */ +double xmlGetDouble(void *); + +/** + * Get a double value from a specified xml path + * + * @param xid XML-id + * @param path path to the xml node + * @return the contents of the node converted to a double value. + */ +double xmlGetNodeDouble(void *, const char *); + +void *xmlMarkId(void *); + +#endif /* __XML_CONFIG */ + diff --git a/utils/xmlgrep/xmlgrep.c b/utils/xmlgrep/xmlgrep.c new file mode 100644 index 000000000..5f8e92cd0 --- /dev/null +++ b/utils/xmlgrep/xmlgrep.c @@ -0,0 +1,292 @@ +#include +#include +#include + +#include "xml.h" + +static const char *_static_root = "/"; +static unsigned int _fcount = 0; +static char **_filenames = 0; +static char *_element = 0; +static char *_value = 0; +static char *_root = 0; +static char *_print = 0; +static int print_filenames = 0; + +static void free_and_exit(int i); + +#define SHOW_NOVAL(opt) \ +{ \ + printf("option '%s' requires a value\n\n", (opt)); \ + free_and_exit(-1); \ +} + +void +show_help () +{ + printf("usage: xmlgrep [options] [file ...]\n\n"); + printf("Options:\n"); + printf("\t-h\t\tshow this help message\n"); + printf("\t-e \t\tshow sections that contain this element\n"); + printf("\t-p \t\tprint this element as the output\n"); + printf("\t-r \tspecify the XML search root\n"); + printf("\t-v \tshow sections where one of the elements has this "); + printf("value\n\n"); + printf(" To print the contents of the 'type' element of the XML section "); + printf("that begins\n at '/printer/output' one would use the following "); + printf("syntax:\n\n\txmlgrep -r /printer/output -p type sample.xml\n\n"); + printf(" To filter out sections that contain the 'driver' element with "); + printf("'generic' as\n it's value one would issue the following command:"); + printf("\n\n\txmlgrep -r /printer/output -e driver -v generic sample.xml"); + printf("\n\n"); + free_and_exit(0); +} + +void +free_and_exit(int i) +{ + if (_root != _static_root) free(_root); + if (_element) free(_element); + if (_value) free(_value); + if (_print) free(_print); + if (_filenames) + { + for (i=0; i < _fcount; i++) + { + if (_filenames[i]) + { + if (print_filenames) printf("%s\n", _filenames[i]); + free(_filenames[i]); + } + } + free(_filenames); + } + + exit(i); +} + +int +parse_option(char **args, int n, int max) +{ + char *opt, *arg = 0; + int sz; + + opt = args[n]; + if (opt[0] == '-' && opt[1] == '-') + opt++; + + if ((arg = strchr(opt, '=')) != NULL) + { + *arg++ = 0; + } + else if (++n < max) + { + arg = args[n]; +#if 0 + if (arg && arg[0] == '-') + arg = 0; +#endif + } + + sz = strlen(opt); + if (strncmp(opt, "-help", sz) == 0) + { + show_help(); + } + else if (strncmp(opt, "-root", sz) == 0) + { + if (arg == 0) SHOW_NOVAL(opt); + _root = strdup(arg); + return 2; + } + else if (strncmp(opt, "-element", sz) == 0) + { + if (arg == 0) SHOW_NOVAL(opt); + _element = strdup(arg); + return 2; + } + else if (strncmp(opt, "-value", sz) == 0) + { + if (arg == 0) SHOW_NOVAL(opt); + _value = strdup(arg); + return 2; + } + else if (strncmp(opt, "-print", sz) == 0) + { + if (arg == 0) SHOW_NOVAL(opt); + _print = strdup(arg); + return 2; + } + else if (strncmp(opt, "-list-filenames", sz) == 0) + { /* undocumented test argument */ + print_filenames = 1; + return 1; + } + else if (opt[0] == '-') + { + printf("Unknown option %s\n", opt); + free_and_exit(-1); + } + else + { + int pos = _fcount++; + if (_filenames == 0) + { + _filenames = (char **)malloc(sizeof(char*)); + } + else + { + char **ptr = (char **)realloc(_filenames, _fcount*sizeof(char*)); + if (ptr == 0) + { + printf("Out of memory.\n\n"); + free_and_exit(-1); + } + _filenames = ptr; + } + + _filenames[pos] = strdup(opt); + } + + return 1; +} + +void walk_the_tree(size_t num, void *xid, char *tree) +{ + unsigned int q, no_elements; + char *elem, *next; + + elem = tree; + if (*elem == '/') elem++; + + next = strchr(elem, '/'); + if (!next) /* last node from the tree */ + { + void *elem_id = xmlMarkId(xid); + + no_elements = xmlGetNumElements(xid, elem); + for (q=0; q%s\n", + _filenames[num], _print, value, _print); + free(value); + } + } + } + free(xmid); + } + else if (node_id && _value) + { + if (_element) + { + unsigned int i, no_nodes; + void *xmid; + + xmid = xmlMarkId(node_id); + + no_nodes = xmlGetNumElements(node_id, _element); + for (i=0; i%s\n", + _filenames[num], _element, _value, _element); + } + } + free(xmid); + } + else + { + } + } + else if (node_id && _element) + { + } + } + free(elem_id); + } + else /* walk the rest of the tree */ + { + void *xmid; + + xmid = xmlMarkId(xid); + if (xmid) + { + *next++ = 0; + + no_elements = xmlGetNumElements(xid, elem); + for (q=0; q -#include -#include - -#include -#include -#include - - -unsigned int _fcount = 0; -char **_filenames = 0; -char *_element = 0; -char *_value = 0; -char *_root = 0; -char *_print = 0; - -int print_filenames = 0; - -#define DEBUG 0 - -void free_and_exit(int i); - - -#define SHOW_NOVAL(opt) \ -{ \ - printf("option '%s' requires a value\n\n", (opt)); \ - free_and_exit(-1); \ -} - -void show_help () -{ - printf("usage: xmlgrep [options] [file ...]\n\n"); - printf("Options:\n"); - printf("\t-h\t\tshow this help message\n"); - printf("\t-e \t\tshow sections that contain this element\n"); - printf("\t-p \t\tprint this element as the output\n"); - printf("\t-r \tspecify the XML search root\n"); - printf("\t-v \tshow sections where on of the elements has this value \n"); - printf("\n"); - printf(" To print the contents of the 'type' element of the XML section "); - printf("that begins\n at '/printer/output' one would use the following "); - printf("syntax:\n\n\txmlgrep -r /printer/output -p type sample.xml\n\n"); - printf(" To filter out sections that contain the 'driver' element with "); - printf("'generic' as\n it's value one would issue the following command:\n"); - printf("\n\txmlgrep -r /printer/output -e driver -v generic -p type "); - printf("sample.xml\n\n"); - free_and_exit(0); -} - -void free_and_exit(int i) -{ - if (_root) free(_root); - if (_value) free(_value); - if (_element) free(_element); - if (_filenames) - { - for (i=0; i < _fcount; i++) { - if (_filenames[i]) { - if (print_filenames) printf("%s\n", _filenames[i]); - free(_filenames[i]); - } - } - free(_filenames); - } - - exit(i); -} - -int parse_option(char **args, int n, int max) { - char *opt, *arg = 0; - int sz; - - opt = args[n]; - if (opt[0] == '-' && opt[1] == '-') - opt++; - - if ((arg = strchr(opt, '=')) != NULL) - *arg++ = 0; - - else if (++n < max) - { - arg = args[n]; - if (arg && arg[0] == '-') - arg = 0; - } - -#if DEBUG - fprintf(stderr, "processing '%s'='%s'\n", opt, arg ? arg : "NULL"); -#endif - - sz = strlen(opt); - if (strncmp(opt, "-help", sz) == 0) { - show_help(); - } - - else if (strncmp(opt, "-root", sz) == 0) { - if (arg == 0) SHOW_NOVAL(opt); - _root = strdup(arg); -#if DEBUG - fprintf(stderr, "\troot=%s\n", _root); -#endif - return 2; - } - - else if (strncmp(opt, "-element", sz) == 0) { - if (arg == 0) SHOW_NOVAL(opt); - _element = strdup(arg); -#if DEBUG - fprintf(stderr, "\telement=%s\n", _element); -#endif - return 2; - } - - else if (strncmp(opt, "-value", sz) == 0) { - if (arg == 0) SHOW_NOVAL(opt); - _value = strdup(arg); -#if DEBUG - fprintf(stderr, "\tvalue=%s\n", _value); -#endif - return 2; - } - - else if (strncmp(opt, "-print", sz) == 0) { - if (arg == 0) SHOW_NOVAL(opt); - _print = strdup(arg); -#if DEBUG - fprintf(stderr, "\tprint=%s\n", _print); -#endif - return 2; - } - - - /* undocumented test argument */ - else if (strncmp(opt, "-list-filenames", sz) == 0) { - print_filenames = 1; - return 1; - } - - else if (opt[0] == '-') { - printf("Unknown option %s\n", opt); - free_and_exit(-1); - } - - else { - int pos = _fcount++; - if (_filenames == 0) - _filenames = (char **)malloc(sizeof(char*)); - else { - char **ptr = (char **)realloc(_filenames, _fcount*sizeof(char*)); - if (ptr == 0) { - printf("Out of memory.\n\n"); - free_and_exit(-1); - } - _filenames = ptr; - } - - _filenames[pos] = strdup(opt); -#if DEBUG - fprintf(stderr, "\tadding filenames[%i]='%s'\n", pos, _filenames[pos]); -#endif - } - - return 1; -} - -void grep_file(unsigned num) -{ - SGPropertyNode root, *path; - -#if DEBUG - fprintf(stderr, "Reading filenames[%i]: %s ... ", num, _filenames[num]); -#endif - try { - readProperties(_filenames[num], &root); - } catch (const sg_exception &e) { - fprintf(stderr, "Error reading file '%s'\n", _filenames[num]); - // free_and_exit(-1); - return; - } -#if DEBUG - fprintf(stderr, "done.\n"); -#endif - - if ((path = root.getNode(_root, false)) != NULL) - { - SGPropertyNode *elem; - - if (_element && _value) - { - if ((elem = path->getNode(_element, false)) != NULL) - { - if (strcmp(elem->getStringValue(), _value) == NULL) - { - SGPropertyNode *print = path->getNode(_print, false); - if (print) - { - printf("%s: <%s>%s\n", _filenames[num], - _print, print->getStringValue(), _print); - } - } - } - } - else if (_element) - { - } - else if (_value) - { - } - } -#if DEBUG - else - fprintf(stderr," No root node specified.\n"); -#endif -} - -inline void grep_files() -{ -#if DEBUG - fprintf(stderr, "Reading files ...\n"); -#endif - for (int i=0; i<_fcount; i++) - grep_file(i); -} - -int -main (int argc, char **argv) -{ - int i; - - if (argc == 1) - show_help(); - - for (i=1; i