From 65aca6607e096d97ae670bc3fbef327c947d6ad5 Mon Sep 17 00:00:00 2001 From: James Turner Date: Thu, 3 May 2012 17:29:16 +0100 Subject: [PATCH] Update to latest expat for improved BOM / encoding handling. (Related to http://code.google.com/p/flightgear-bugs/issues/detail?id=635) --- CMakeLists.txt | 10 +- simgear/xml/CMakeLists.txt | 2 + simgear/xml/Expat.COPYING | 22 + simgear/xml/ascii.h | 92 + simgear/xml/expat_config_cmake.in | 41 + simgear/xml/internal.h | 73 + simgear/xml/xmlparse.c | 6466 +++++++++++++++++++++-------- simgear/xml/xmlrole.c | 1213 +++--- simgear/xml/xmlrole.h | 71 +- simgear/xml/xmltok.c | 884 ++-- simgear/xml/xmltok.h | 313 +- simgear/xml/xmltok_impl.c | 1095 ++--- simgear/xml/xmltok_ns.c | 95 +- 13 files changed, 7095 insertions(+), 3282 deletions(-) create mode 100644 simgear/xml/Expat.COPYING create mode 100644 simgear/xml/ascii.h create mode 100644 simgear/xml/expat_config_cmake.in create mode 100644 simgear/xml/internal.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b3c0e10..11fb905e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,6 +155,8 @@ check_function_exists(ftime HAVE_FTIME) check_function_exists(timegm HAVE_TIMEGM) check_function_exists(rint HAVE_RINT) check_function_exists(mkdtemp HAVE_MKDTEMP) +check_function_exists(bcopy HAVE_BCOPY) +check_function_exists(mmap HAVE_MMAP) if(HAVE_UNISTD_H) set(CMAKE_REQUIRED_INCLUDES ${CMAKE_INCLUDE_PATH}) @@ -225,11 +227,12 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set (WARNING_FLAGS "-Wall -Wno-overloaded-virtual") endif() -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WARNING_FLAGS} ${MSVC_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WARNING_FLAGS} ${MSVC_FLAGS} -DHAVE_EXPAT_CONFIG_H") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WARNING_FLAGS} ${MSVC_FLAGS} ${BOOST_CXX_FLAGS}") include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_BINARY_DIR}/simgear) +include_directories(${PROJECT_BINARY_DIR}/simgear/xml) include_directories(${OPENSCENEGRAPH_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIR} @@ -245,6 +248,11 @@ configure_file ( "${PROJECT_BINARY_DIR}/simgear/simgear_config.h" ) +configure_file ( + "${PROJECT_SOURCE_DIR}/simgear/xml/expat_config_cmake.in" + "${PROJECT_BINARY_DIR}/simgear/xml/expat_config.h" +) + if(ENABLE_TESTS) # enable CTest / make test target diff --git a/simgear/xml/CMakeLists.txt b/simgear/xml/CMakeLists.txt index 0365e862..81dc8ff3 100644 --- a/simgear/xml/CMakeLists.txt +++ b/simgear/xml/CMakeLists.txt @@ -23,6 +23,8 @@ set(SOURCES xmlrole.c xmltok.c easyxml.cxx + internal.h + ascii.h ) simgear_component(xml xml "${SOURCES}" "${HEADERS}") diff --git a/simgear/xml/Expat.COPYING b/simgear/xml/Expat.COPYING new file mode 100644 index 00000000..dcb45064 --- /dev/null +++ b/simgear/xml/Expat.COPYING @@ -0,0 +1,22 @@ +Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd + and Clark Cooper +Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/simgear/xml/ascii.h b/simgear/xml/ascii.h new file mode 100644 index 00000000..d10530b0 --- /dev/null +++ b/simgear/xml/ascii.h @@ -0,0 +1,92 @@ +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. +*/ + +#define ASCII_A 0x41 +#define ASCII_B 0x42 +#define ASCII_C 0x43 +#define ASCII_D 0x44 +#define ASCII_E 0x45 +#define ASCII_F 0x46 +#define ASCII_G 0x47 +#define ASCII_H 0x48 +#define ASCII_I 0x49 +#define ASCII_J 0x4A +#define ASCII_K 0x4B +#define ASCII_L 0x4C +#define ASCII_M 0x4D +#define ASCII_N 0x4E +#define ASCII_O 0x4F +#define ASCII_P 0x50 +#define ASCII_Q 0x51 +#define ASCII_R 0x52 +#define ASCII_S 0x53 +#define ASCII_T 0x54 +#define ASCII_U 0x55 +#define ASCII_V 0x56 +#define ASCII_W 0x57 +#define ASCII_X 0x58 +#define ASCII_Y 0x59 +#define ASCII_Z 0x5A + +#define ASCII_a 0x61 +#define ASCII_b 0x62 +#define ASCII_c 0x63 +#define ASCII_d 0x64 +#define ASCII_e 0x65 +#define ASCII_f 0x66 +#define ASCII_g 0x67 +#define ASCII_h 0x68 +#define ASCII_i 0x69 +#define ASCII_j 0x6A +#define ASCII_k 0x6B +#define ASCII_l 0x6C +#define ASCII_m 0x6D +#define ASCII_n 0x6E +#define ASCII_o 0x6F +#define ASCII_p 0x70 +#define ASCII_q 0x71 +#define ASCII_r 0x72 +#define ASCII_s 0x73 +#define ASCII_t 0x74 +#define ASCII_u 0x75 +#define ASCII_v 0x76 +#define ASCII_w 0x77 +#define ASCII_x 0x78 +#define ASCII_y 0x79 +#define ASCII_z 0x7A + +#define ASCII_0 0x30 +#define ASCII_1 0x31 +#define ASCII_2 0x32 +#define ASCII_3 0x33 +#define ASCII_4 0x34 +#define ASCII_5 0x35 +#define ASCII_6 0x36 +#define ASCII_7 0x37 +#define ASCII_8 0x38 +#define ASCII_9 0x39 + +#define ASCII_TAB 0x09 +#define ASCII_SPACE 0x20 +#define ASCII_EXCL 0x21 +#define ASCII_QUOT 0x22 +#define ASCII_AMP 0x26 +#define ASCII_APOS 0x27 +#define ASCII_MINUS 0x2D +#define ASCII_PERIOD 0x2E +#define ASCII_COLON 0x3A +#define ASCII_SEMI 0x3B +#define ASCII_LT 0x3C +#define ASCII_EQUALS 0x3D +#define ASCII_GT 0x3E +#define ASCII_LSQB 0x5B +#define ASCII_RSQB 0x5D +#define ASCII_UNDERSCORE 0x5F +#define ASCII_LPAREN 0x28 +#define ASCII_RPAREN 0x29 +#define ASCII_FF 0x0C +#define ASCII_SLASH 0x2F +#define ASCII_HASH 0x23 +#define ASCII_PIPE 0x7C +#define ASCII_COMMA 0x2C diff --git a/simgear/xml/expat_config_cmake.in b/simgear/xml/expat_config_cmake.in new file mode 100644 index 00000000..5cd864bb --- /dev/null +++ b/simgear/xml/expat_config_cmake.in @@ -0,0 +1,41 @@ + +#ifndef EXPAT_CONFIG_H +#define EXPAT_CONFIG_H + +#cmakedefine HAVE_WINDOWS_H + +#ifdef HAVE_WINDOWS_H + #define WIN32_LEAN_AND_MEAN + #include + #undef WIN32_LEAN_AND_MEAN +#endif + +/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ +#define BYTEORDER 1234 + +/* Define to 1 if you have the `bcopy' function. */ +#cmakedefine HAVE_BCOPY + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE + +/* Define to 1 if you have a working `mmap' system call. */ +#cmakedefine HAVE_MMAP + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_UNISTD_H + +/* whether byteorder is bigendian */ +#undef WORDS_BIGENDIAN + +/* Define to specify how much context to retain around the current parse + point. */ +#define XML_CONTEXT_BYTES 1024 + +/* Define to make parameter entity parsing functionality available. */ +#define XML_DTD + +/* Define to make XML Namespaces functionality available. */ +#define XML_NS + +#endif /* ifndef EXPAT_CONFIG_H */ diff --git a/simgear/xml/internal.h b/simgear/xml/internal.h new file mode 100644 index 00000000..dd545483 --- /dev/null +++ b/simgear/xml/internal.h @@ -0,0 +1,73 @@ +/* internal.h + + Internal definitions used by Expat. This is not needed to compile + client code. + + The following calling convention macros are defined for frequently + called functions: + + FASTCALL - Used for those internal functions that have a simple + body and a low number of arguments and local variables. + + PTRCALL - Used for functions called though function pointers. + + PTRFASTCALL - Like PTRCALL, but for low number of arguments. + + inline - Used for selected internal functions for which inlining + may improve performance on some platforms. + + Note: Use of these macros is based on judgement, not hard rules, + and therefore subject to change. +*/ + +#if defined(__GNUC__) && defined(__i386__) && !defined(__MINGW32__) +/* We'll use this version by default only where we know it helps. + + regparm() generates warnings on Solaris boxes. See SF bug #692878. + + Instability reported with egcs on a RedHat Linux 7.3. + Let's comment out: + #define FASTCALL __attribute__((stdcall, regparm(3))) + and let's try this: +*/ +#define FASTCALL __attribute__((regparm(3))) +#define PTRFASTCALL __attribute__((regparm(3))) +#endif + +/* Using __fastcall seems to have an unexpected negative effect under + MS VC++, especially for function pointers, so we won't use it for + now on that platform. It may be reconsidered for a future release + if it can be made more effective. + Likely reason: __fastcall on Windows is like stdcall, therefore + the compiler cannot perform stack optimizations for call clusters. +*/ + +/* Make sure all of these are defined if they aren't already. */ + +#ifndef FASTCALL +#define FASTCALL +#endif + +#ifndef PTRCALL +#define PTRCALL +#endif + +#ifndef PTRFASTCALL +#define PTRFASTCALL +#endif + +#ifndef XML_MIN_SIZE +#if !defined(__cplusplus) && !defined(inline) +#ifdef __GNUC__ +#define inline __inline +#endif /* __GNUC__ */ +#endif +#endif /* XML_MIN_SIZE */ + +#ifdef __cplusplus +#define inline inline +#else +#ifndef inline +#define inline +#endif +#endif diff --git a/simgear/xml/xmlparse.c b/simgear/xml/xmlparse.c index 294b73b4..822dc170 100644 --- a/simgear/xml/xmlparse.c +++ b/simgear/xml/xmlparse.c @@ -1,35 +1,29 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ -#include "xmldef.h" -#include "xmlparse.h" +#include +#include /* memset(), memcpy() */ +#include +#include /* UINT_MAX */ +#include /* time() */ + +#define XML_BUILDING_EXPAT 1 + +#ifdef COMPILED_FROM_DSP +#include "winconfig.h" +#elif defined(MACOS_CLASSIC) +#include "macconfig.h" +#elif defined(__amigaos__) +#include "amigaconfig.h" +#elif defined(__WATCOMC__) +#include "watcomconfig.h" +#elif defined(HAVE_EXPAT_CONFIG_H) +#include "expat_config.h" +#endif /* ndef COMPILED_FROM_DSP */ + +#include "ascii.h" +#include "expat.h" #ifdef XML_UNICODE #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX @@ -37,7 +31,8 @@ your version of this file under either the MPL or the GPL. #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS #define XmlEncode XmlUtf16Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) +/* Using pointer subtraction to convert to integer type. */ +#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1)) typedef unsigned short ICHAR; #else #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX @@ -60,23 +55,87 @@ typedef char ICHAR; #endif +#ifdef XML_UNICODE #ifdef XML_UNICODE_WCHAR_T -#define XML_T(x) L ## x +#define XML_T(x) (const wchar_t)x +#define XML_L(x) L ## x +#else +#define XML_T(x) (const unsigned short)x +#define XML_L(x) x +#endif + #else + #define XML_T(x) x +#define XML_L(x) x + #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) +/* Handle the case where memmove() doesn't exist. */ +#ifndef HAVE_MEMMOVE +#ifdef HAVE_BCOPY +#define memmove(d,s,l) bcopy((s),(d),(l)) +#else +#error memmove does not exist on this platform, nor is a substitute available +#endif /* HAVE_BCOPY */ +#endif /* HAVE_MEMMOVE */ + +#include "internal.h" #include "xmltok.h" #include "xmlrole.h" -#include "hashtable.h" + +typedef const XML_Char *KEY; + +typedef struct { + KEY name; +} NAMED; + +typedef struct { + NAMED **v; + unsigned char power; + size_t size; + size_t used; + const XML_Memory_Handling_Suite *mem; +} HASH_TABLE; + +/* Basic character hash algorithm, taken from Python's string hash: + h = h * 1000003 ^ character, the constant being a prime number. + +*/ +#ifdef XML_UNICODE +#define CHAR_HASH(h, c) \ + (((h) * 0xF4243) ^ (unsigned short)(c)) +#else +#define CHAR_HASH(h, c) \ + (((h) * 0xF4243) ^ (unsigned char)(c)) +#endif + +/* For probing (after a collision) we need a step size relative prime + to the hash table size, which is a power of 2. We use double-hashing, + since we can calculate a second hash value cheaply by taking those bits + of the first hash value that were discarded (masked out) when the table + index was calculated: index = hash & mask, where mask = table->size - 1. + We limit the maximum step size to table->size / 4 (mask >> 2) and make + it odd, since odd numbers are always relative prime to a power of 2. +*/ +#define SECOND_HASH(hash, mask, power) \ + ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) +#define PROBE_STEP(hash, mask, power) \ + ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) + +typedef struct { + NAMED **p; + NAMED **end; +} HASH_TABLE_ITER; #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 #define INIT_ATTS_SIZE 16 +#define INIT_ATTS_VERSION 0xFFFFFFFF #define INIT_BLOCK_SIZE 1024 #define INIT_BUFFER_SIZE 1024 @@ -100,30 +159,61 @@ typedef struct prefix { typedef struct { const XML_Char *str; const XML_Char *localPart; + const XML_Char *prefix; + int strLen; int uriLen; + int prefixLen; } TAG_NAME; +/* TAG represents an open element. + The name of the element is stored in both the document and API + encodings. The memory buffer 'buf' is a separately-allocated + memory area which stores the name. During the XML_Parse()/ + XMLParseBuffer() when the element is open, the memory for the 'raw' + version of the name (in the document encoding) is shared with the + document buffer. If the element is open across calls to + XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to + contain the 'raw' name as well. + + A parser re-uses these structures, maintaining a list of allocated + TAG objects in a free list. +*/ typedef struct tag { - struct tag *parent; - const char *rawName; + struct tag *parent; /* parent of this element */ + const char *rawName; /* tagName in the original encoding */ int rawNameLength; - TAG_NAME name; - char *buf; - char *bufEnd; + TAG_NAME name; /* tagName in the API encoding */ + char *buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; typedef struct { const XML_Char *name; const XML_Char *textPtr; - int textLen; + int textLen; /* length in XML_Chars */ + int processed; /* # of processed bytes - when suspended */ const XML_Char *systemId; const XML_Char *base; const XML_Char *publicId; const XML_Char *notation; - char open; + XML_Bool open; + XML_Bool is_param; + XML_Bool is_internal; /* true if declared in internal subset outside PE */ } ENTITY; +typedef struct { + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char * name; + int firstchild; + int lastchild; + int childcnt; + int nextsib; +} CONTENT_SCAFFOLD; + +#define INIT_SCAFFOLD_ELEMENTS 32 + typedef struct block { struct block *next; int size; @@ -136,26 +226,34 @@ typedef struct { const XML_Char *end; XML_Char *ptr; XML_Char *start; + const XML_Memory_Handling_Suite *mem; } STRING_POOL; /* The XML_Char before the name is used to determine whether -an attribute has been specified. */ + an attribute has been specified. */ typedef struct attribute_id { XML_Char *name; PREFIX *prefix; - char maybeTokenized; - char xmlns; + XML_Bool maybeTokenized; + XML_Bool xmlns; } ATTRIBUTE_ID; typedef struct { const ATTRIBUTE_ID *id; - char isCdata; + XML_Bool isCdata; const XML_Char *value; } DEFAULT_ATTRIBUTE; +typedef struct { + unsigned long version; + unsigned long hash; + const XML_Char *uriName; +} NS_ATT; + typedef struct { const XML_Char *name; PREFIX *prefix; + const ATTRIBUTE_ID *idAtt; int nDefaultAtts; int allocDefaultAtts; DEFAULT_ATTRIBUTE *defaultAtts; @@ -167,10 +265,27 @@ typedef struct { HASH_TABLE attributeIds; HASH_TABLE prefixes; STRING_POOL pool; - int complete; - int standalone; - const XML_Char *base; + STRING_POOL entityValuePool; + /* false once a parameter entity reference has been skipped */ + XML_Bool keepProcessing; + /* true once an internal or external PE reference has been encountered; + this includes the reference to an external subset */ + XML_Bool hasParamEntityRefs; + XML_Bool standalone; +#ifdef XML_DTD + /* indicates if external PE has been read */ + XML_Bool paramEntityRead; + HASH_TABLE paramEntities; +#endif /* XML_DTD */ PREFIX defaultPrefix; + /* === scaffolding for building content model === */ + XML_Bool in_eldecl; + CONTENT_SCAFFOLD *scaffold; + unsigned contentStringLen; + unsigned scaffSize; + unsigned scaffCount; + int scaffLevel; + int *scaffIndex; } DTD; typedef struct open_internal_entity { @@ -178,74 +293,156 @@ typedef struct open_internal_entity { const char *internalEventEndPtr; struct open_internal_entity *next; ENTITY *entity; + int startTagLevel; + XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; -typedef enum XML_Error Processor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr); +typedef enum XML_Error PTRCALL Processor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; static Processor contentProcessor; static Processor cdataSectionProcessor; +#ifdef XML_DTD +static Processor ignoreSectionProcessor; +static Processor externalParEntProcessor; +static Processor externalParEntInitProcessor; +static Processor entityValueProcessor; +static Processor entityValueInitProcessor; +#endif /* XML_DTD */ static Processor epilogProcessor; +static Processor errorProcessor; static Processor externalEntityInitProcessor; static Processor externalEntityInitProcessor2; static Processor externalEntityInitProcessor3; static Processor externalEntityContentProcessor; +static Processor internalEntityProcessor; static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *); +processXmlDecl(XML_Parser parser, int isGeneralTextEntity, + const char *s, const char *next); static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error +doProlog(XML_Parser parser, const ENCODING *enc, const char *s, + const char *end, int tok, const char *next, const char **nextPtr, + XML_Bool haveMore); +static enum XML_Error +processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl); +static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, - const char *start, const char *end, const char **endPtr); + const char *start, const char *end, const char **endPtr, + XML_Bool haveMore); +static enum XML_Error +doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore); +#ifdef XML_DTD +static enum XML_Error +doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, + const char *end, const char **nextPtr, XML_Bool haveMore); +#endif /* XML_DTD */ + static enum XML_Error -doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, - TAG_NAME *tagNamePtr, BINDING **bindingsPtr); -static -int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); +storeAtts(XML_Parser parser, const ENCODING *, const char *s, + TAG_NAME *tagNamePtr, BINDING **bindingsPtr); +static enum XML_Error +addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + const XML_Char *uri, BINDING **bindingsPtr); static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue); +defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, + XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser); static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, - STRING_POOL *); +storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, + const char *, const char *, STRING_POOL *); static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, - STRING_POOL *); +appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, + const char *, const char *, STRING_POOL *); static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); -static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); +getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); +static int +setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error -storeEntityValue(XML_Parser parser, const char *start, const char *end); +storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); +static int +reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); +static int +reportComment(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); +static void +reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); + +static const XML_Char * getContext(XML_Parser parser); +static XML_Bool +setContext(XML_Parser parser, const XML_Char *context); + +static void FASTCALL normalizePublicId(XML_Char *s); + +static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms); +/* do not call if parentParser != NULL */ +static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); +static void +dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +dtdCopy(XML_Parser oldParser, + DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); +static NAMED * +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); +static void FASTCALL +hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); +static void FASTCALL hashTableClear(HASH_TABLE *); +static void FASTCALL hashTableDestroy(HASH_TABLE *); +static void FASTCALL +hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); +static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *); + +static void FASTCALL +poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms); +static void FASTCALL poolClear(STRING_POOL *); +static void FASTCALL poolDestroy(STRING_POOL *); +static XML_Char * +poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Char * +poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); +static const XML_Char * FASTCALL +poolCopyString(STRING_POOL *pool, const XML_Char *s); +static const XML_Char * +poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); +static const XML_Char * FASTCALL +poolAppendString(STRING_POOL *pool, const XML_Char *s); + +static int FASTCALL nextScaffoldPart(XML_Parser parser); +static XML_Content * build_model(XML_Parser parser); +static ELEMENT_TYPE * +getElementType(XML_Parser parser, const ENCODING *enc, + const char *ptr, const char *end); + +static unsigned long generate_hash_secret_salt(void); +static XML_Bool startParsing(XML_Parser parser); + +static XML_Parser +parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep, + DTD *dtd); + static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); - -static const XML_Char *getContext(XML_Parser parser); -static int setContext(XML_Parser parser, const XML_Char *context); -static void normalizePublicId(XML_Char *s); -static int dtdInit(DTD *); -static void dtdDestroy(DTD *); -static int dtdCopy(DTD *newDtd, const DTD *oldDtd); -static void poolInit(STRING_POOL *); -static void poolClear(STRING_POOL *); -static void poolDestroy(STRING_POOL *); -static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); -static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); -static int poolGrow(STRING_POOL *pool); -static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); -static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); +parserInit(XML_Parser parser, const XML_Char *encodingName); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) @@ -259,18 +456,20 @@ static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int ? 0 \ : ((*((pool)->ptr)++ = c), 1)) -typedef struct { - /* The first member must be userData so that the XML_GetUserData macro works. */ +struct XML_ParserStruct { + /* The first member must be userData so that the XML_GetUserData + macro works. */ void *m_userData; void *m_handlerArg; char *m_buffer; + const XML_Memory_Handling_Suite m_mem; /* first character to be parsed */ const char *m_bufferPtr; /* past last character to be parsed */ char *m_bufferEnd; /* allocated end of buffer */ const char *m_bufferLim; - long m_parseEndByteIndex; + XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; @@ -282,22 +481,31 @@ typedef struct { XML_StartCdataSectionHandler m_startCdataSectionHandler; XML_EndCdataSectionHandler m_endCdataSectionHandler; XML_DefaultHandler m_defaultHandler; + XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; + XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; XML_NotationDeclHandler m_notationDeclHandler; XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; XML_NotStandaloneHandler m_notStandaloneHandler; XML_ExternalEntityRefHandler m_externalEntityRefHandler; - void *m_externalEntityRefHandlerArg; + XML_Parser m_externalEntityRefHandlerArg; + XML_SkippedEntityHandler m_skippedEntityHandler; XML_UnknownEncodingHandler m_unknownEncodingHandler; + XML_ElementDeclHandler m_elementDeclHandler; + XML_AttlistDeclHandler m_attlistDeclHandler; + XML_EntityDeclHandler m_entityDeclHandler; + XML_XmlDeclHandler m_xmlDeclHandler; const ENCODING *m_encoding; INIT_ENCODING m_initEncoding; + const ENCODING *m_internalEncoding; const XML_Char *m_protocolEncodingName; - int m_ns; + XML_Bool m_ns; + XML_Bool m_ns_triplets; void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; - void (*m_unknownEncodingRelease)(void *); + void (XMLCALL *m_unknownEncodingRelease)(void *); PROLOG_STATE m_prologState; Processor *m_processor; enum XML_Error m_errorCode; @@ -305,249 +513,545 @@ typedef struct { const char *m_eventEndPtr; const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; - int m_defaultExpandInternalEntities; + OPEN_INTERNAL_ENTITY *m_freeInternalEntities; + XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; + const XML_Char *m_doctypeName; + const XML_Char *m_doctypeSysid; + const XML_Char *m_doctypePubid; + const XML_Char *m_declAttributeType; const XML_Char *m_declNotationName; const XML_Char *m_declNotationPublicId; ELEMENT_TYPE *m_declElementType; ATTRIBUTE_ID *m_declAttributeId; - char m_declAttributeIsCdata; - DTD m_dtd; + XML_Bool m_declAttributeIsCdata; + XML_Bool m_declAttributeIsId; + DTD *m_dtd; + const XML_Char *m_curBase; TAG *m_tagStack; TAG *m_freeTagList; BINDING *m_inheritedBindings; BINDING *m_freeBindingList; int m_attsSize; int m_nSpecifiedAtts; + int m_idAttIndex; ATTRIBUTE *m_atts; + NS_ATT *m_nsAtts; + unsigned long m_nsAttsVersion; + unsigned char m_nsAttsPower; +#ifdef XML_ATTR_INFO + XML_AttrInfo *m_attInfo; +#endif POSITION m_position; STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; char *m_groupConnector; - unsigned m_groupSize; - int m_hadExternalDoctype; + unsigned int m_groupSize; XML_Char m_namespaceSeparator; -} Parser; - -#define userData (((Parser *)parser)->m_userData) -#define handlerArg (((Parser *)parser)->m_handlerArg) -#define startElementHandler (((Parser *)parser)->m_startElementHandler) -#define endElementHandler (((Parser *)parser)->m_endElementHandler) -#define characterDataHandler (((Parser *)parser)->m_characterDataHandler) -#define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler) -#define commentHandler (((Parser *)parser)->m_commentHandler) -#define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler) -#define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler) -#define defaultHandler (((Parser *)parser)->m_defaultHandler) -#define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler) -#define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler) -#define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler) -#define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler) -#define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler) -#define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler) -#define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg) -#define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler) -#define encoding (((Parser *)parser)->m_encoding) -#define initEncoding (((Parser *)parser)->m_initEncoding) -#define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem) -#define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData) + XML_Parser m_parentParser; + XML_ParsingStatus m_parsingStatus; +#ifdef XML_DTD + XML_Bool m_isParamEntity; + XML_Bool m_useForeignDTD; + enum XML_ParamEntityParsing m_paramEntityParsing; +#endif + unsigned long m_hash_secret_salt; +}; + +#define MALLOC(s) (parser->m_mem.malloc_fcn((s))) +#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s))) +#define FREE(p) (parser->m_mem.free_fcn((p))) + +#define userData (parser->m_userData) +#define handlerArg (parser->m_handlerArg) +#define startElementHandler (parser->m_startElementHandler) +#define endElementHandler (parser->m_endElementHandler) +#define characterDataHandler (parser->m_characterDataHandler) +#define processingInstructionHandler \ + (parser->m_processingInstructionHandler) +#define commentHandler (parser->m_commentHandler) +#define startCdataSectionHandler \ + (parser->m_startCdataSectionHandler) +#define endCdataSectionHandler (parser->m_endCdataSectionHandler) +#define defaultHandler (parser->m_defaultHandler) +#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler) +#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler) +#define unparsedEntityDeclHandler \ + (parser->m_unparsedEntityDeclHandler) +#define notationDeclHandler (parser->m_notationDeclHandler) +#define startNamespaceDeclHandler \ + (parser->m_startNamespaceDeclHandler) +#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler) +#define notStandaloneHandler (parser->m_notStandaloneHandler) +#define externalEntityRefHandler \ + (parser->m_externalEntityRefHandler) +#define externalEntityRefHandlerArg \ + (parser->m_externalEntityRefHandlerArg) +#define internalEntityRefHandler \ + (parser->m_internalEntityRefHandler) +#define skippedEntityHandler (parser->m_skippedEntityHandler) +#define unknownEncodingHandler (parser->m_unknownEncodingHandler) +#define elementDeclHandler (parser->m_elementDeclHandler) +#define attlistDeclHandler (parser->m_attlistDeclHandler) +#define entityDeclHandler (parser->m_entityDeclHandler) +#define xmlDeclHandler (parser->m_xmlDeclHandler) +#define encoding (parser->m_encoding) +#define initEncoding (parser->m_initEncoding) +#define internalEncoding (parser->m_internalEncoding) +#define unknownEncodingMem (parser->m_unknownEncodingMem) +#define unknownEncodingData (parser->m_unknownEncodingData) #define unknownEncodingHandlerData \ - (((Parser *)parser)->m_unknownEncodingHandlerData) -#define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease) -#define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName) -#define ns (((Parser *)parser)->m_ns) -#define prologState (((Parser *)parser)->m_prologState) -#define processor (((Parser *)parser)->m_processor) -#define errorCode (((Parser *)parser)->m_errorCode) -#define eventPtr (((Parser *)parser)->m_eventPtr) -#define eventEndPtr (((Parser *)parser)->m_eventEndPtr) -#define positionPtr (((Parser *)parser)->m_positionPtr) -#define position (((Parser *)parser)->m_position) -#define openInternalEntities (((Parser *)parser)->m_openInternalEntities) -#define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities) -#define tagLevel (((Parser *)parser)->m_tagLevel) -#define buffer (((Parser *)parser)->m_buffer) -#define bufferPtr (((Parser *)parser)->m_bufferPtr) -#define bufferEnd (((Parser *)parser)->m_bufferEnd) -#define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex) -#define parseEndPtr (((Parser *)parser)->m_parseEndPtr) -#define bufferLim (((Parser *)parser)->m_bufferLim) -#define dataBuf (((Parser *)parser)->m_dataBuf) -#define dataBufEnd (((Parser *)parser)->m_dataBufEnd) -#define dtd (((Parser *)parser)->m_dtd) -#define declEntity (((Parser *)parser)->m_declEntity) -#define declNotationName (((Parser *)parser)->m_declNotationName) -#define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId) -#define declElementType (((Parser *)parser)->m_declElementType) -#define declAttributeId (((Parser *)parser)->m_declAttributeId) -#define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata) -#define freeTagList (((Parser *)parser)->m_freeTagList) -#define freeBindingList (((Parser *)parser)->m_freeBindingList) -#define inheritedBindings (((Parser *)parser)->m_inheritedBindings) -#define tagStack (((Parser *)parser)->m_tagStack) -#define atts (((Parser *)parser)->m_atts) -#define attsSize (((Parser *)parser)->m_attsSize) -#define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts) -#define tempPool (((Parser *)parser)->m_tempPool) -#define temp2Pool (((Parser *)parser)->m_temp2Pool) -#define groupConnector (((Parser *)parser)->m_groupConnector) -#define groupSize (((Parser *)parser)->m_groupSize) -#define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype) -#define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator) - -#ifdef _MSC_VER -#ifdef _DEBUG -Parser *asParser(XML_Parser parser) + (parser->m_unknownEncodingHandlerData) +#define unknownEncodingRelease (parser->m_unknownEncodingRelease) +#define protocolEncodingName (parser->m_protocolEncodingName) +#define ns (parser->m_ns) +#define ns_triplets (parser->m_ns_triplets) +#define prologState (parser->m_prologState) +#define processor (parser->m_processor) +#define errorCode (parser->m_errorCode) +#define eventPtr (parser->m_eventPtr) +#define eventEndPtr (parser->m_eventEndPtr) +#define positionPtr (parser->m_positionPtr) +#define position (parser->m_position) +#define openInternalEntities (parser->m_openInternalEntities) +#define freeInternalEntities (parser->m_freeInternalEntities) +#define defaultExpandInternalEntities \ + (parser->m_defaultExpandInternalEntities) +#define tagLevel (parser->m_tagLevel) +#define buffer (parser->m_buffer) +#define bufferPtr (parser->m_bufferPtr) +#define bufferEnd (parser->m_bufferEnd) +#define parseEndByteIndex (parser->m_parseEndByteIndex) +#define parseEndPtr (parser->m_parseEndPtr) +#define bufferLim (parser->m_bufferLim) +#define dataBuf (parser->m_dataBuf) +#define dataBufEnd (parser->m_dataBufEnd) +#define _dtd (parser->m_dtd) +#define curBase (parser->m_curBase) +#define declEntity (parser->m_declEntity) +#define doctypeName (parser->m_doctypeName) +#define doctypeSysid (parser->m_doctypeSysid) +#define doctypePubid (parser->m_doctypePubid) +#define declAttributeType (parser->m_declAttributeType) +#define declNotationName (parser->m_declNotationName) +#define declNotationPublicId (parser->m_declNotationPublicId) +#define declElementType (parser->m_declElementType) +#define declAttributeId (parser->m_declAttributeId) +#define declAttributeIsCdata (parser->m_declAttributeIsCdata) +#define declAttributeIsId (parser->m_declAttributeIsId) +#define freeTagList (parser->m_freeTagList) +#define freeBindingList (parser->m_freeBindingList) +#define inheritedBindings (parser->m_inheritedBindings) +#define tagStack (parser->m_tagStack) +#define atts (parser->m_atts) +#define attsSize (parser->m_attsSize) +#define nSpecifiedAtts (parser->m_nSpecifiedAtts) +#define idAttIndex (parser->m_idAttIndex) +#define nsAtts (parser->m_nsAtts) +#define nsAttsVersion (parser->m_nsAttsVersion) +#define nsAttsPower (parser->m_nsAttsPower) +#define attInfo (parser->m_attInfo) +#define tempPool (parser->m_tempPool) +#define temp2Pool (parser->m_temp2Pool) +#define groupConnector (parser->m_groupConnector) +#define groupSize (parser->m_groupSize) +#define namespaceSeparator (parser->m_namespaceSeparator) +#define parentParser (parser->m_parentParser) +#define ps_parsing (parser->m_parsingStatus.parsing) +#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer) +#ifdef XML_DTD +#define isParamEntity (parser->m_isParamEntity) +#define useForeignDTD (parser->m_useForeignDTD) +#define paramEntityParsing (parser->m_paramEntityParsing) +#endif /* XML_DTD */ +#define hash_secret_salt (parser->m_hash_secret_salt) + +XML_Parser XMLCALL +XML_ParserCreate(const XML_Char *encodingName) { - return parser; + return XML_ParserCreate_MM(encodingName, NULL, NULL); +} + +XML_Parser XMLCALL +XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) +{ + XML_Char tmp[2]; + *tmp = nsSep; + return XML_ParserCreate_MM(encodingName, NULL, tmp); +} + +static const XML_Char implicitContext[] = { + ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, + ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, + ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, + ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, + ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, + ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0' +}; + +static unsigned long +generate_hash_secret_salt(void) +{ + unsigned int seed = time(NULL) % UINT_MAX; + srand(seed); + return rand(); +} + +static XML_Bool /* only valid for root parser */ +startParsing(XML_Parser parser) +{ + /* hash functions must be initialized before setContext() is called */ + if (hash_secret_salt == 0) + hash_secret_salt = generate_hash_secret_salt(); + if (ns) { + /* implicit context only set for root parser, since child + parsers (i.e. external entity parsers) will inherit it + */ + return setContext(parser, implicitContext); + } + return XML_TRUE; +} + +XML_Parser XMLCALL +XML_ParserCreate_MM(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep) +{ + return parserCreate(encodingName, memsuite, nameSep, NULL); } -#endif -#endif -XML_Parser XML_ParserCreate(const XML_Char *encodingName) +static XML_Parser +parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep, + DTD *dtd) { - XML_Parser parser = malloc(sizeof(Parser)); + XML_Parser parser; + + if (memsuite) { + XML_Memory_Handling_Suite *mtemp; + parser = (XML_Parser) + memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); + if (parser != NULL) { + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = memsuite->malloc_fcn; + mtemp->realloc_fcn = memsuite->realloc_fcn; + mtemp->free_fcn = memsuite->free_fcn; + } + } + else { + XML_Memory_Handling_Suite *mtemp; + parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); + if (parser != NULL) { + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = malloc; + mtemp->realloc_fcn = realloc; + mtemp->free_fcn = free; + } + } + if (!parser) return parser; + + buffer = NULL; + bufferLim = NULL; + + attsSize = INIT_ATTS_SIZE; + atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE)); + if (atts == NULL) { + FREE(parser); + return NULL; + } +#ifdef XML_ATTR_INFO + attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo)); + if (attInfo == NULL) { + FREE(atts); + FREE(parser); + return NULL; + } +#endif + dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + if (dataBuf == NULL) { + FREE(atts); +#ifdef XML_ATTR_INFO + FREE(attInfo); +#endif + FREE(parser); + return NULL; + } + dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; + + if (dtd) + _dtd = dtd; + else { + _dtd = dtdCreate(&parser->m_mem); + if (_dtd == NULL) { + FREE(dataBuf); + FREE(atts); +#ifdef XML_ATTR_INFO + FREE(attInfo); +#endif + FREE(parser); + return NULL; + } + } + + freeBindingList = NULL; + freeTagList = NULL; + freeInternalEntities = NULL; + + groupSize = 0; + groupConnector = NULL; + + unknownEncodingHandler = NULL; + unknownEncodingHandlerData = NULL; + + namespaceSeparator = ASCII_EXCL; + ns = XML_FALSE; + ns_triplets = XML_FALSE; + + nsAtts = NULL; + nsAttsVersion = 0; + nsAttsPower = 0; + + poolInit(&tempPool, &(parser->m_mem)); + poolInit(&temp2Pool, &(parser->m_mem)); + parserInit(parser, encodingName); + + if (encodingName && !protocolEncodingName) { + XML_ParserFree(parser); + return NULL; + } + + if (nameSep) { + ns = XML_TRUE; + internalEncoding = XmlGetInternalEncodingNS(); + namespaceSeparator = *nameSep; + } + else { + internalEncoding = XmlGetInternalEncoding(); + } + + return parser; +} + +static void +parserInit(XML_Parser parser, const XML_Char *encodingName) +{ processor = prologInitProcessor; XmlPrologStateInit(&prologState); - userData = 0; - handlerArg = 0; - startElementHandler = 0; - endElementHandler = 0; - characterDataHandler = 0; - processingInstructionHandler = 0; - commentHandler = 0; - startCdataSectionHandler = 0; - endCdataSectionHandler = 0; - defaultHandler = 0; - unparsedEntityDeclHandler = 0; - notationDeclHandler = 0; - startNamespaceDeclHandler = 0; - endNamespaceDeclHandler = 0; - notStandaloneHandler = 0; - externalEntityRefHandler = 0; + protocolEncodingName = (encodingName != NULL + ? poolCopyString(&tempPool, encodingName) + : NULL); + curBase = NULL; + XmlInitEncoding(&initEncoding, &encoding, 0); + userData = NULL; + handlerArg = NULL; + startElementHandler = NULL; + endElementHandler = NULL; + characterDataHandler = NULL; + processingInstructionHandler = NULL; + commentHandler = NULL; + startCdataSectionHandler = NULL; + endCdataSectionHandler = NULL; + defaultHandler = NULL; + startDoctypeDeclHandler = NULL; + endDoctypeDeclHandler = NULL; + unparsedEntityDeclHandler = NULL; + notationDeclHandler = NULL; + startNamespaceDeclHandler = NULL; + endNamespaceDeclHandler = NULL; + notStandaloneHandler = NULL; + externalEntityRefHandler = NULL; externalEntityRefHandlerArg = parser; - unknownEncodingHandler = 0; - buffer = 0; - bufferPtr = 0; - bufferEnd = 0; + skippedEntityHandler = NULL; + elementDeclHandler = NULL; + attlistDeclHandler = NULL; + entityDeclHandler = NULL; + xmlDeclHandler = NULL; + bufferPtr = buffer; + bufferEnd = buffer; parseEndByteIndex = 0; - parseEndPtr = 0; - bufferLim = 0; - declElementType = 0; - declAttributeId = 0; - declEntity = 0; - declNotationName = 0; - declNotationPublicId = 0; + parseEndPtr = NULL; + declElementType = NULL; + declAttributeId = NULL; + declEntity = NULL; + doctypeName = NULL; + doctypeSysid = NULL; + doctypePubid = NULL; + declAttributeType = NULL; + declNotationName = NULL; + declNotationPublicId = NULL; + declAttributeIsCdata = XML_FALSE; + declAttributeIsId = XML_FALSE; memset(&position, 0, sizeof(POSITION)); errorCode = XML_ERROR_NONE; - eventPtr = 0; - eventEndPtr = 0; - positionPtr = 0; - openInternalEntities = 0; + eventPtr = NULL; + eventEndPtr = NULL; + positionPtr = NULL; + openInternalEntities = NULL; + defaultExpandInternalEntities = XML_TRUE; tagLevel = 0; - tagStack = 0; - freeTagList = 0; - freeBindingList = 0; - inheritedBindings = 0; - attsSize = INIT_ATTS_SIZE; - atts = malloc(attsSize * sizeof(ATTRIBUTE)); + tagStack = NULL; + inheritedBindings = NULL; nSpecifiedAtts = 0; - dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); - groupSize = 0; - groupConnector = 0; - hadExternalDoctype = 0; - unknownEncodingMem = 0; - unknownEncodingRelease = 0; - unknownEncodingData = 0; - unknownEncodingHandlerData = 0; - namespaceSeparator = '!'; - ns = 0; - poolInit(&tempPool); - poolInit(&temp2Pool); - protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0; - if (!dtdInit(&dtd) || !atts || !dataBuf - || (encodingName && !protocolEncodingName)) { - XML_ParserFree(parser); - return 0; - } - dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; - XmlInitEncoding(&initEncoding, &encoding, 0); - return parser; + unknownEncodingMem = NULL; + unknownEncodingRelease = NULL; + unknownEncodingData = NULL; + parentParser = NULL; + ps_parsing = XML_INITIALIZED; +#ifdef XML_DTD + isParamEntity = XML_FALSE; + useForeignDTD = XML_FALSE; + paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif + hash_secret_salt = 0; } -XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) -{ - static - const XML_Char implicitContext[] = { - XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='), - XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'), - XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'), - XML_T('.'), XML_T('w'), XML_T('3'), - XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'), - XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'), - XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'), - XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'), - XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'), - XML_T('\0') - }; +/* moves list of bindings to freeBindingList */ +static void FASTCALL +moveToFreeBindingList(XML_Parser parser, BINDING *bindings) +{ + while (bindings) { + BINDING *b = bindings; + bindings = bindings->nextTagBinding; + b->nextTagBinding = freeBindingList; + freeBindingList = b; + } +} - XML_Parser parser = XML_ParserCreate(encodingName); - if (parser) { - XmlInitEncodingNS(&initEncoding, &encoding, 0); - ns = 1; - namespaceSeparator = nsSep; +XML_Bool XMLCALL +XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) +{ + TAG *tStk; + OPEN_INTERNAL_ENTITY *openEntityList; + if (parentParser) + return XML_FALSE; + /* move tagStack to freeTagList */ + tStk = tagStack; + while (tStk) { + TAG *tag = tStk; + tStk = tStk->parent; + tag->parent = freeTagList; + moveToFreeBindingList(parser, tag->bindings); + tag->bindings = NULL; + freeTagList = tag; } - if (!setContext(parser, implicitContext)) { - XML_ParserFree(parser); - return 0; + /* move openInternalEntities to freeInternalEntities */ + openEntityList = openInternalEntities; + while (openEntityList) { + OPEN_INTERNAL_ENTITY *openEntity = openEntityList; + openEntityList = openEntity->next; + openEntity->next = freeInternalEntities; + freeInternalEntities = openEntity; } - return parser; + moveToFreeBindingList(parser, inheritedBindings); + FREE(unknownEncodingMem); + if (unknownEncodingRelease) + unknownEncodingRelease(unknownEncodingData); + poolClear(&tempPool); + poolClear(&temp2Pool); + parserInit(parser, encodingName); + dtdReset(_dtd, &parser->m_mem); + return XML_TRUE; } -int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) +enum XML_Status XMLCALL +XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { - if (!encodingName) - protocolEncodingName = 0; + /* Block after XML_Parse()/XML_ParseBuffer() has been called. + XXX There's no way for the caller to determine which of the + XXX possible error cases caused the XML_STATUS_ERROR return. + */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return XML_STATUS_ERROR; + if (encodingName == NULL) + protocolEncodingName = NULL; else { protocolEncodingName = poolCopyString(&tempPool, encodingName); if (!protocolEncodingName) - return 0; + return XML_STATUS_ERROR; } - return 1; + return XML_STATUS_OK; } -XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, - const XML_Char *context, - const XML_Char *encodingName) +XML_Parser XMLCALL +XML_ExternalEntityParserCreate(XML_Parser oldParser, + const XML_Char *context, + const XML_Char *encodingName) { XML_Parser parser = oldParser; - DTD *oldDtd = &dtd; + DTD *newDtd = NULL; + DTD *oldDtd = _dtd; XML_StartElementHandler oldStartElementHandler = startElementHandler; XML_EndElementHandler oldEndElementHandler = endElementHandler; XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; - XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler; + XML_ProcessingInstructionHandler oldProcessingInstructionHandler + = processingInstructionHandler; XML_CommentHandler oldCommentHandler = commentHandler; - XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler; - XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler; + XML_StartCdataSectionHandler oldStartCdataSectionHandler + = startCdataSectionHandler; + XML_EndCdataSectionHandler oldEndCdataSectionHandler + = endCdataSectionHandler; XML_DefaultHandler oldDefaultHandler = defaultHandler; - XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler; - XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler; + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler + = unparsedEntityDeclHandler; + XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; + XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler + = startNamespaceDeclHandler; + XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler + = endNamespaceDeclHandler; XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; - XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; - XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; + XML_ExternalEntityRefHandler oldExternalEntityRefHandler + = externalEntityRefHandler; + XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler; + XML_UnknownEncodingHandler oldUnknownEncodingHandler + = unknownEncodingHandler; + XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; + XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; + XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; + XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler; + ELEMENT_TYPE * oldDeclElementType = declElementType; + void *oldUserData = userData; void *oldHandlerArg = handlerArg; - int oldDefaultExpandInternalEntities = defaultExpandInternalEntities; - void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; - - parser = (ns - ? XML_ParserCreateNS(encodingName, namespaceSeparator) - : XML_ParserCreate(encodingName)); + XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities; + XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; +#ifdef XML_DTD + enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing; + int oldInEntityValue = prologState.inEntityValue; +#endif + XML_Bool oldns_triplets = ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt = hash_secret_salt; + +#ifdef XML_DTD + if (!context) + newDtd = oldDtd; +#endif /* XML_DTD */ + + /* Note that the magical uses of the pre-processor to make field + access look more like C++ require that `parser' be overwritten + here. This makes this function more painful to follow than it + would be otherwise. + */ + if (ns) { + XML_Char tmp[2]; + *tmp = namespaceSeparator; + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); + } + else { + parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); + } + if (!parser) - return 0; + return NULL; + startElementHandler = oldStartElementHandler; endElementHandler = oldEndElementHandler; characterDataHandler = oldCharacterDataHandler; @@ -556,11 +1060,19 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, startCdataSectionHandler = oldStartCdataSectionHandler; endCdataSectionHandler = oldEndCdataSectionHandler; defaultHandler = oldDefaultHandler; + unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; + notationDeclHandler = oldNotationDeclHandler; startNamespaceDeclHandler = oldStartNamespaceDeclHandler; endNamespaceDeclHandler = oldEndNamespaceDeclHandler; notStandaloneHandler = oldNotStandaloneHandler; externalEntityRefHandler = oldExternalEntityRefHandler; + skippedEntityHandler = oldSkippedEntityHandler; unknownEncodingHandler = oldUnknownEncodingHandler; + elementDeclHandler = oldElementDeclHandler; + attlistDeclHandler = oldAttlistDeclHandler; + entityDeclHandler = oldEntityDeclHandler; + xmlDeclHandler = oldXmlDeclHandler; + declElementType = oldDeclElementType; userData = oldUserData; if (oldUserData == oldHandlerArg) handlerArg = userData; @@ -569,64 +1081,147 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, if (oldExternalEntityRefHandlerArg != oldParser) externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; - if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) { - XML_ParserFree(parser); - return 0; + ns_triplets = oldns_triplets; + hash_secret_salt = oldhash_secret_salt; + parentParser = oldParser; +#ifdef XML_DTD + paramEntityParsing = oldParamEntityParsing; + prologState.inEntityValue = oldInEntityValue; + if (context) { +#endif /* XML_DTD */ + if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem) + || !setContext(parser, context)) { + XML_ParserFree(parser); + return NULL; + } + processor = externalEntityInitProcessor; +#ifdef XML_DTD } - processor = externalEntityInitProcessor; + else { + /* The DTD instance referenced by _dtd is shared between the document's + root parser and external PE parsers, therefore one does not need to + call setContext. In addition, one also *must* not call setContext, + because this would overwrite existing prefix->binding pointers in + _dtd with ones that get destroyed with the external PE parser. + This would leave those prefixes with dangling pointers. + */ + isParamEntity = XML_TRUE; + XmlPrologStateInitExternalEntity(&prologState); + processor = externalParEntInitProcessor; + } +#endif /* XML_DTD */ return parser; } -static -void destroyBindings(BINDING *bindings) +static void FASTCALL +destroyBindings(BINDING *bindings, XML_Parser parser) { for (;;) { BINDING *b = bindings; if (!b) break; bindings = b->nextTagBinding; - free(b->uri); - free(b); + FREE(b->uri); + FREE(b); } } -void XML_ParserFree(XML_Parser parser) +void XMLCALL +XML_ParserFree(XML_Parser parser) { + TAG *tagList; + OPEN_INTERNAL_ENTITY *entityList; + if (parser == NULL) + return; + /* free tagStack and freeTagList */ + tagList = tagStack; for (;;) { TAG *p; - if (tagStack == 0) { - if (freeTagList == 0) - break; - tagStack = freeTagList; - freeTagList = 0; - } - p = tagStack; - tagStack = tagStack->parent; - free(p->buf); - destroyBindings(p->bindings); - free(p); - } - destroyBindings(freeBindingList); - destroyBindings(inheritedBindings); + if (tagList == NULL) { + if (freeTagList == NULL) + break; + tagList = freeTagList; + freeTagList = NULL; + } + p = tagList; + tagList = tagList->parent; + FREE(p->buf); + destroyBindings(p->bindings, parser); + FREE(p); + } + /* free openInternalEntities and freeInternalEntities */ + entityList = openInternalEntities; + for (;;) { + OPEN_INTERNAL_ENTITY *openEntity; + if (entityList == NULL) { + if (freeInternalEntities == NULL) + break; + entityList = freeInternalEntities; + freeInternalEntities = NULL; + } + openEntity = entityList; + entityList = entityList->next; + FREE(openEntity); + } + + destroyBindings(freeBindingList, parser); + destroyBindings(inheritedBindings, parser); poolDestroy(&tempPool); poolDestroy(&temp2Pool); - dtdDestroy(&dtd); - free((void *)atts); - free(groupConnector); - free(buffer); - free(dataBuf); - free(unknownEncodingMem); +#ifdef XML_DTD + /* external parameter entity parsers share the DTD structure + parser->m_dtd with the root parser, so we must not destroy it + */ + if (!isParamEntity && _dtd) +#else + if (_dtd) +#endif /* XML_DTD */ + dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem); + FREE((void *)atts); +#ifdef XML_ATTR_INFO + FREE((void *)attInfo); +#endif + FREE(groupConnector); + FREE(buffer); + FREE(dataBuf); + FREE(nsAtts); + FREE(unknownEncodingMem); if (unknownEncodingRelease) unknownEncodingRelease(unknownEncodingData); - free(parser); + FREE(parser); } -void XML_UseParserAsHandlerArg(XML_Parser parser) +void XMLCALL +XML_UseParserAsHandlerArg(XML_Parser parser) { handlerArg = parser; } -void XML_SetUserData(XML_Parser parser, void *p) +enum XML_Error XMLCALL +XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) +{ +#ifdef XML_DTD + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; + useForeignDTD = useDTD; + return XML_ERROR_NONE; +#else + return XML_ERROR_FEATURE_REQUIRES_XML_DTD; +#endif +} + +void XMLCALL +XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) +{ + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return; + ns_triplets = do_nst ? XML_TRUE : XML_FALSE; +} + +void XMLCALL +XML_SetUserData(XML_Parser parser, void *p) { if (handlerArg == userData) handlerArg = userData = p; @@ -634,329 +1229,893 @@ void XML_SetUserData(XML_Parser parser, void *p) userData = p; } -int XML_SetBase(XML_Parser parser, const XML_Char *p) +enum XML_Status XMLCALL +XML_SetBase(XML_Parser parser, const XML_Char *p) { if (p) { - p = poolCopyString(&dtd.pool, p); + p = poolCopyString(&_dtd->pool, p); if (!p) - return 0; - dtd.base = p; + return XML_STATUS_ERROR; + curBase = p; } else - dtd.base = 0; - return 1; + curBase = NULL; + return XML_STATUS_OK; } -const XML_Char *XML_GetBase(XML_Parser parser) +const XML_Char * XMLCALL +XML_GetBase(XML_Parser parser) { - return dtd.base; + return curBase; } -int XML_GetSpecifiedAttributeCount(XML_Parser parser) +int XMLCALL +XML_GetSpecifiedAttributeCount(XML_Parser parser) { return nSpecifiedAtts; } -void XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end) +int XMLCALL +XML_GetIdAttributeIndex(XML_Parser parser) +{ + return idAttIndex; +} + +#ifdef XML_ATTR_INFO +const XML_AttrInfo * XMLCALL +XML_GetAttributeInfo(XML_Parser parser) +{ + return attInfo; +} +#endif + +void XMLCALL +XML_SetElementHandler(XML_Parser parser, + XML_StartElementHandler start, + XML_EndElementHandler end) { startElementHandler = start; endElementHandler = end; } -void XML_SetCharacterDataHandler(XML_Parser parser, - XML_CharacterDataHandler handler) +void XMLCALL +XML_SetStartElementHandler(XML_Parser parser, + XML_StartElementHandler start) { + startElementHandler = start; +} + +void XMLCALL +XML_SetEndElementHandler(XML_Parser parser, + XML_EndElementHandler end) { + endElementHandler = end; +} + +void XMLCALL +XML_SetCharacterDataHandler(XML_Parser parser, + XML_CharacterDataHandler handler) { characterDataHandler = handler; } -void XML_SetProcessingInstructionHandler(XML_Parser parser, - XML_ProcessingInstructionHandler handler) +void XMLCALL +XML_SetProcessingInstructionHandler(XML_Parser parser, + XML_ProcessingInstructionHandler handler) { processingInstructionHandler = handler; } -void XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler) +void XMLCALL +XML_SetCommentHandler(XML_Parser parser, + XML_CommentHandler handler) { commentHandler = handler; } -void XML_SetCdataSectionHandler(XML_Parser parser, - XML_StartCdataSectionHandler start, - XML_EndCdataSectionHandler end) +void XMLCALL +XML_SetCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start, + XML_EndCdataSectionHandler end) { startCdataSectionHandler = start; endCdataSectionHandler = end; } -void XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler) +void XMLCALL +XML_SetStartCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start) { + startCdataSectionHandler = start; +} + +void XMLCALL +XML_SetEndCdataSectionHandler(XML_Parser parser, + XML_EndCdataSectionHandler end) { + endCdataSectionHandler = end; +} + +void XMLCALL +XML_SetDefaultHandler(XML_Parser parser, + XML_DefaultHandler handler) { defaultHandler = handler; - defaultExpandInternalEntities = 0; + defaultExpandInternalEntities = XML_FALSE; } -void XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler) +void XMLCALL +XML_SetDefaultHandlerExpand(XML_Parser parser, + XML_DefaultHandler handler) { defaultHandler = handler; - defaultExpandInternalEntities = 1; + defaultExpandInternalEntities = XML_TRUE; +} + +void XMLCALL +XML_SetDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end) +{ + startDoctypeDeclHandler = start; + endDoctypeDeclHandler = end; +} + +void XMLCALL +XML_SetStartDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start) { + startDoctypeDeclHandler = start; +} + +void XMLCALL +XML_SetEndDoctypeDeclHandler(XML_Parser parser, + XML_EndDoctypeDeclHandler end) { + endDoctypeDeclHandler = end; } -void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, - XML_UnparsedEntityDeclHandler handler) +void XMLCALL +XML_SetUnparsedEntityDeclHandler(XML_Parser parser, + XML_UnparsedEntityDeclHandler handler) { unparsedEntityDeclHandler = handler; } -void XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler) +void XMLCALL +XML_SetNotationDeclHandler(XML_Parser parser, + XML_NotationDeclHandler handler) { notationDeclHandler = handler; } -void XML_SetNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end) +void XMLCALL +XML_SetNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start, + XML_EndNamespaceDeclHandler end) { startNamespaceDeclHandler = start; endNamespaceDeclHandler = end; } -void XML_SetNotStandaloneHandler(XML_Parser parser, - XML_NotStandaloneHandler handler) +void XMLCALL +XML_SetStartNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start) { + startNamespaceDeclHandler = start; +} + +void XMLCALL +XML_SetEndNamespaceDeclHandler(XML_Parser parser, + XML_EndNamespaceDeclHandler end) { + endNamespaceDeclHandler = end; +} + +void XMLCALL +XML_SetNotStandaloneHandler(XML_Parser parser, + XML_NotStandaloneHandler handler) { notStandaloneHandler = handler; } -void XML_SetExternalEntityRefHandler(XML_Parser parser, - XML_ExternalEntityRefHandler handler) +void XMLCALL +XML_SetExternalEntityRefHandler(XML_Parser parser, + XML_ExternalEntityRefHandler handler) { externalEntityRefHandler = handler; } -void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) +void XMLCALL +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { if (arg) - externalEntityRefHandlerArg = arg; + externalEntityRefHandlerArg = (XML_Parser)arg; else externalEntityRefHandlerArg = parser; } -void XML_SetUnknownEncodingHandler(XML_Parser parser, - XML_UnknownEncodingHandler handler, - void *data) +void XMLCALL +XML_SetSkippedEntityHandler(XML_Parser parser, + XML_SkippedEntityHandler handler) +{ + skippedEntityHandler = handler; +} + +void XMLCALL +XML_SetUnknownEncodingHandler(XML_Parser parser, + XML_UnknownEncodingHandler handler, + void *data) { unknownEncodingHandler = handler; unknownEncodingHandlerData = data; } -int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) +void XMLCALL +XML_SetElementDeclHandler(XML_Parser parser, + XML_ElementDeclHandler eldecl) +{ + elementDeclHandler = eldecl; +} + +void XMLCALL +XML_SetAttlistDeclHandler(XML_Parser parser, + XML_AttlistDeclHandler attdecl) +{ + attlistDeclHandler = attdecl; +} + +void XMLCALL +XML_SetEntityDeclHandler(XML_Parser parser, + XML_EntityDeclHandler handler) +{ + entityDeclHandler = handler; +} + +void XMLCALL +XML_SetXmlDeclHandler(XML_Parser parser, + XML_XmlDeclHandler handler) { + xmlDeclHandler = handler; +} + +int XMLCALL +XML_SetParamEntityParsing(XML_Parser parser, + enum XML_ParamEntityParsing peParsing) +{ + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return 0; +#ifdef XML_DTD + paramEntityParsing = peParsing; + return 1; +#else + return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; +#endif +} + +int XMLCALL +XML_SetHashSalt(XML_Parser parser, + unsigned long hash_salt) +{ + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return 0; + hash_secret_salt = hash_salt; + return 1; +} + +enum XML_Status XMLCALL +XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + switch (ps_parsing) { + case XML_SUSPENDED: + errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parentParser == NULL && !startParsing(parser)) { + errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + default: + ps_parsing = XML_PARSING; + } + if (len == 0) { + ps_finalBuffer = (XML_Bool)isFinal; if (!isFinal) - return 1; + return XML_STATUS_OK; positionPtr = bufferPtr; - errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0); - if (errorCode == XML_ERROR_NONE) - return 1; + parseEndPtr = bufferEnd; + + /* If data are left over from last buffer, and we now know that these + data are the final chunk of input, then we have to check them again + to detect errors based on that fact. + */ + errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + + if (errorCode == XML_ERROR_NONE) { + switch (ps_parsing) { + case XML_SUSPENDED: + XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); + positionPtr = bufferPtr; + return XML_STATUS_SUSPENDED; + case XML_INITIALIZED: + case XML_PARSING: + ps_parsing = XML_FINISHED; + /* fall through */ + default: + return XML_STATUS_OK; + } + } eventEndPtr = eventPtr; - return 0; + processor = errorProcessor; + return XML_STATUS_ERROR; } +#ifndef XML_CONTEXT_BYTES else if (bufferPtr == bufferEnd) { const char *end; int nLeftOver; + enum XML_Error result; parseEndByteIndex += len; positionPtr = s; - if (isFinal) { - errorCode = processor(parser, s, parseEndPtr = s + len, 0); - if (errorCode == XML_ERROR_NONE) - return 1; - eventEndPtr = eventPtr; - return 0; - } + ps_finalBuffer = (XML_Bool)isFinal; + errorCode = processor(parser, s, parseEndPtr = s + len, &end); + if (errorCode != XML_ERROR_NONE) { eventEndPtr = eventPtr; - return 0; + processor = errorProcessor; + return XML_STATUS_ERROR; } + else { + switch (ps_parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (isFinal) { + ps_parsing = XML_FINISHED; + return XML_STATUS_OK; + } + /* fall through */ + default: + result = XML_STATUS_OK; + } + } + XmlUpdatePosition(encoding, positionPtr, end, &position); nLeftOver = s + len - end; if (nLeftOver) { - if (buffer == 0 || nLeftOver > bufferLim - buffer) { - /* FIXME avoid integer overflow */ - buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2); - if (!buffer) { - errorCode = XML_ERROR_NO_MEMORY; - eventPtr = eventEndPtr = 0; - return 0; - } - bufferLim = buffer + len * 2; + if (buffer == NULL || nLeftOver > bufferLim - buffer) { + /* FIXME avoid integer overflow */ + char *temp; + temp = (buffer == NULL + ? (char *)MALLOC(len * 2) + : (char *)REALLOC(buffer, len * 2)); + if (temp == NULL) { + errorCode = XML_ERROR_NO_MEMORY; + eventPtr = eventEndPtr = NULL; + processor = errorProcessor; + return XML_STATUS_ERROR; + } + buffer = temp; + bufferLim = buffer + len * 2; } memcpy(buffer, end, nLeftOver); - bufferPtr = buffer; - bufferEnd = buffer + nLeftOver; } - return 1; + bufferPtr = buffer; + bufferEnd = buffer + nLeftOver; + positionPtr = bufferPtr; + parseEndPtr = bufferEnd; + eventPtr = bufferPtr; + eventEndPtr = bufferPtr; + return result; } +#endif /* not defined XML_CONTEXT_BYTES */ else { - memcpy(XML_GetBuffer(parser, len), s, len); - return XML_ParseBuffer(parser, len, isFinal); + void *buff = XML_GetBuffer(parser, len); + if (buff == NULL) + return XML_STATUS_ERROR; + else { + memcpy(buff, s, len); + return XML_ParseBuffer(parser, len, isFinal); + } } } -int XML_ParseBuffer(XML_Parser parser, int len, int isFinal) +enum XML_Status XMLCALL +XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { - const char *start = bufferPtr; + const char *start; + enum XML_Status result = XML_STATUS_OK; + + switch (ps_parsing) { + case XML_SUSPENDED: + errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parentParser == NULL && !startParsing(parser)) { + errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + default: + ps_parsing = XML_PARSING; + } + + start = bufferPtr; positionPtr = start; bufferEnd += len; + parseEndPtr = bufferEnd; parseEndByteIndex += len; - errorCode = processor(parser, start, parseEndPtr = bufferEnd, - isFinal ? (const char **)0 : &bufferPtr); - if (errorCode == XML_ERROR_NONE) { - if (!isFinal) - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - return 1; - } - else { + ps_finalBuffer = (XML_Bool)isFinal; + + errorCode = processor(parser, start, parseEndPtr, &bufferPtr); + + if (errorCode != XML_ERROR_NONE) { eventEndPtr = eventPtr; - return 0; + processor = errorProcessor; + return XML_STATUS_ERROR; } -} - -void *XML_GetBuffer(XML_Parser parser, int len) + else { + switch (ps_parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (isFinal) { + ps_parsing = XML_FINISHED; + return result; + } + default: ; /* should not happen */ + } + } + + XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); + positionPtr = bufferPtr; + return result; +} + +void * XMLCALL +XML_GetBuffer(XML_Parser parser, int len) { + switch (ps_parsing) { + case XML_SUSPENDED: + errorCode = XML_ERROR_SUSPENDED; + return NULL; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return NULL; + default: ; + } + if (len > bufferLim - bufferEnd) { /* FIXME avoid integer overflow */ - int neededSize = len + (bufferEnd - bufferPtr); + int neededSize = len + (int)(bufferEnd - bufferPtr); +#ifdef XML_CONTEXT_BYTES + int keep = (int)(bufferPtr - buffer); + + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; + neededSize += keep; +#endif /* defined XML_CONTEXT_BYTES */ if (neededSize <= bufferLim - buffer) { +#ifdef XML_CONTEXT_BYTES + if (keep < bufferPtr - buffer) { + int offset = (int)(bufferPtr - buffer) - keep; + memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); + bufferEnd -= offset; + bufferPtr -= offset; + } +#else memmove(buffer, bufferPtr, bufferEnd - bufferPtr); bufferEnd = buffer + (bufferEnd - bufferPtr); bufferPtr = buffer; +#endif /* not defined XML_CONTEXT_BYTES */ } else { char *newBuf; - int bufferSize = bufferLim - bufferPtr; + int bufferSize = (int)(bufferLim - bufferPtr); if (bufferSize == 0) - bufferSize = INIT_BUFFER_SIZE; + bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; + bufferSize *= 2; } while (bufferSize < neededSize); - newBuf = malloc(bufferSize); + newBuf = (char *)MALLOC(bufferSize); if (newBuf == 0) { - errorCode = XML_ERROR_NO_MEMORY; - return 0; + errorCode = XML_ERROR_NO_MEMORY; + return NULL; } bufferLim = newBuf + bufferSize; +#ifdef XML_CONTEXT_BYTES if (bufferPtr) { - memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); - free(buffer); + int keep = (int)(bufferPtr - buffer); + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; + memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); + FREE(buffer); + buffer = newBuf; + bufferEnd = buffer + (bufferEnd - bufferPtr) + keep; + bufferPtr = buffer + keep; + } + else { + bufferEnd = newBuf + (bufferEnd - bufferPtr); + bufferPtr = buffer = newBuf; + } +#else + if (bufferPtr) { + memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); + FREE(buffer); } bufferEnd = newBuf + (bufferEnd - bufferPtr); bufferPtr = buffer = newBuf; +#endif /* not defined XML_CONTEXT_BYTES */ } + eventPtr = eventEndPtr = NULL; + positionPtr = NULL; } return bufferEnd; } -enum XML_Error XML_GetErrorCode(XML_Parser parser) +enum XML_Status XMLCALL +XML_StopParser(XML_Parser parser, XML_Bool resumable) +{ + switch (ps_parsing) { + case XML_SUSPENDED: + if (resumable) { + errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + } + ps_parsing = XML_FINISHED; + break; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + default: + if (resumable) { +#ifdef XML_DTD + if (isParamEntity) { + errorCode = XML_ERROR_SUSPEND_PE; + return XML_STATUS_ERROR; + } +#endif + ps_parsing = XML_SUSPENDED; + } + else + ps_parsing = XML_FINISHED; + } + return XML_STATUS_OK; +} + +enum XML_Status XMLCALL +XML_ResumeParser(XML_Parser parser) +{ + enum XML_Status result = XML_STATUS_OK; + + if (ps_parsing != XML_SUSPENDED) { + errorCode = XML_ERROR_NOT_SUSPENDED; + return XML_STATUS_ERROR; + } + ps_parsing = XML_PARSING; + + errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + + if (errorCode != XML_ERROR_NONE) { + eventEndPtr = eventPtr; + processor = errorProcessor; + return XML_STATUS_ERROR; + } + else { + switch (ps_parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (ps_finalBuffer) { + ps_parsing = XML_FINISHED; + return result; + } + default: ; + } + } + + XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); + positionPtr = bufferPtr; + return result; +} + +void XMLCALL +XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) +{ + assert(status != NULL); + *status = parser->m_parsingStatus; +} + +enum XML_Error XMLCALL +XML_GetErrorCode(XML_Parser parser) { return errorCode; } -long XML_GetCurrentByteIndex(XML_Parser parser) +XML_Index XMLCALL +XML_GetCurrentByteIndex(XML_Parser parser) { if (eventPtr) return parseEndByteIndex - (parseEndPtr - eventPtr); return -1; } -int XML_GetCurrentByteCount(XML_Parser parser) +int XMLCALL +XML_GetCurrentByteCount(XML_Parser parser) { if (eventEndPtr && eventPtr) - return eventEndPtr - eventPtr; + return (int)(eventEndPtr - eventPtr); return 0; } -int XML_GetCurrentLineNumber(XML_Parser parser) +const char * XMLCALL +XML_GetInputContext(XML_Parser parser, int *offset, int *size) +{ +#ifdef XML_CONTEXT_BYTES + if (eventPtr && buffer) { + *offset = (int)(eventPtr - buffer); + *size = (int)(bufferEnd - buffer); + return buffer; + } +#endif /* defined XML_CONTEXT_BYTES */ + return (char *) 0; +} + +XML_Size XMLCALL +XML_GetCurrentLineNumber(XML_Parser parser) { - if (eventPtr) { + if (eventPtr && eventPtr >= positionPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; } return position.lineNumber + 1; } -int XML_GetCurrentColumnNumber(XML_Parser parser) +XML_Size XMLCALL +XML_GetCurrentColumnNumber(XML_Parser parser) { - if (eventPtr) { + if (eventPtr && eventPtr >= positionPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; } return position.columnNumber; } -void XML_DefaultCurrent(XML_Parser parser) +void XMLCALL +XML_FreeContentModel(XML_Parser parser, XML_Content *model) +{ + FREE(model); +} + +void * XMLCALL +XML_MemMalloc(XML_Parser parser, size_t size) +{ + return MALLOC(size); +} + +void * XMLCALL +XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) +{ + return REALLOC(ptr, size); +} + +void XMLCALL +XML_MemFree(XML_Parser parser, void *ptr) +{ + FREE(ptr); +} + +void XMLCALL +XML_DefaultCurrent(XML_Parser parser) { if (defaultHandler) { if (openInternalEntities) reportDefault(parser, - ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(), - openInternalEntities->internalEventPtr, - openInternalEntities->internalEventEndPtr); + internalEncoding, + openInternalEntities->internalEventPtr, + openInternalEntities->internalEventEndPtr); else reportDefault(parser, encoding, eventPtr, eventEndPtr); } } -const XML_LChar *XML_ErrorString(int code) +const XML_LChar * XMLCALL +XML_ErrorString(enum XML_Error code) { - static const XML_LChar *message[] = { + static const XML_LChar* const message[] = { 0, - XML_T("out of memory"), - XML_T("syntax error"), - XML_T("no element found"), - XML_T("not well-formed"), - XML_T("unclosed token"), - XML_T("unclosed token"), - XML_T("mismatched tag"), - XML_T("duplicate attribute"), - XML_T("junk after document element"), - XML_T("illegal parameter entity reference"), - XML_T("undefined entity"), - XML_T("recursive entity reference"), - XML_T("asynchronous entity"), - XML_T("reference to invalid character number"), - XML_T("reference to binary entity"), - XML_T("reference to external entity in attribute"), - XML_T("xml processing instruction not at start of external entity"), - XML_T("unknown encoding"), - XML_T("encoding specified in XML declaration is incorrect"), - XML_T("unclosed CDATA section"), - XML_T("error in processing external entity reference"), - XML_T("document is not standalone") + XML_L("out of memory"), + XML_L("syntax error"), + XML_L("no element found"), + XML_L("not well-formed (invalid token)"), + XML_L("unclosed token"), + XML_L("partial character"), + XML_L("mismatched tag"), + XML_L("duplicate attribute"), + XML_L("junk after document element"), + XML_L("illegal parameter entity reference"), + XML_L("undefined entity"), + XML_L("recursive entity reference"), + XML_L("asynchronous entity"), + XML_L("reference to invalid character number"), + XML_L("reference to binary entity"), + XML_L("reference to external entity in attribute"), + XML_L("XML or text declaration not at start of entity"), + XML_L("unknown encoding"), + XML_L("encoding specified in XML declaration is incorrect"), + XML_L("unclosed CDATA section"), + XML_L("error in processing external entity reference"), + XML_L("document is not standalone"), + XML_L("unexpected parser state - please send a bug report"), + XML_L("entity declared in parameter entity"), + XML_L("requested feature requires XML_DTD support in Expat"), + XML_L("cannot change setting once parsing has begun"), + XML_L("unbound prefix"), + XML_L("must not undeclare prefix"), + XML_L("incomplete markup in parameter entity"), + XML_L("XML declaration not well-formed"), + XML_L("text declaration not well-formed"), + XML_L("illegal character(s) in public id"), + XML_L("parser suspended"), + XML_L("parser not suspended"), + XML_L("parsing aborted"), + XML_L("parsing finished"), + XML_L("cannot suspend in external parameter entity"), + XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"), + XML_L("reserved prefix (xmlns) must not be declared or undeclared"), + XML_L("prefix must not be bound to one of the reserved namespace names") }; if (code > 0 && code < sizeof(message)/sizeof(message[0])) return message[code]; - return 0; + return NULL; +} + +const XML_LChar * XMLCALL +XML_ExpatVersion(void) { + + /* V1 is used to string-ize the version number. However, it would + string-ize the actual version macro *names* unless we get them + substituted before being passed to V1. CPP is defined to expand + a macro, then rescan for more expansions. Thus, we use V2 to expand + the version macros, then CPP will expand the resulting V1() macro + with the correct numerals. */ + /* ### I'm assuming cpp is portable in this respect... */ + +#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c) +#define V2(a,b,c) XML_L("expat_")V1(a,b,c) + + return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); + +#undef V1 +#undef V2 } -static -enum XML_Error contentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +XML_Expat_Version XMLCALL +XML_ExpatVersionInfo(void) { - return doContent(parser, 0, encoding, start, end, endPtr); + XML_Expat_Version version; + + version.major = XML_MAJOR_VERSION; + version.minor = XML_MINOR_VERSION; + version.micro = XML_MICRO_VERSION; + + return version; } -static -enum XML_Error externalEntityInitProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +const XML_Feature * XMLCALL +XML_GetFeatureList(void) +{ + static const XML_Feature features[] = { + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), + sizeof(XML_Char)}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), + sizeof(XML_LChar)}, +#ifdef XML_UNICODE + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, +#endif +#ifdef XML_UNICODE_WCHAR_T + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, +#endif +#ifdef XML_DTD + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, +#endif +#ifdef XML_CONTEXT_BYTES + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), + XML_CONTEXT_BYTES}, +#endif +#ifdef XML_MIN_SIZE + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, +#endif +#ifdef XML_NS + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, +#endif +#ifdef XML_LARGE_SIZE + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, +#endif +#ifdef XML_ATTR_INFO + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif + {XML_FEATURE_END, NULL, 0} + }; + + return features; +} + +/* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more + permanent location, since the parse buffer is about to be discarded. +*/ +static XML_Bool +storeRawNames(XML_Parser parser) +{ + TAG *tag = tagStack; + while (tag) { + int bufSize; + int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + char *rawNameBuf = tag->buf + nameLen; + /* Stop if already stored. Since tagStack is a stack, we can stop + at the first entry that has already been copied; everything + below it in the stack is already been accounted for in a + previous call to this function. + */ + if (tag->rawName == rawNameBuf) + break; + /* For re-use purposes we need to ensure that the + size of tag->buf is a multiple of sizeof(XML_Char). + */ + bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + if (bufSize > tag->bufEnd - tag->buf) { + char *temp = (char *)REALLOC(tag->buf, bufSize); + if (temp == NULL) + return XML_FALSE; + /* if tag->name.str points to tag->buf (only when namespace + processing is off) then we have to update it + */ + if (tag->name.str == (XML_Char *)tag->buf) + tag->name.str = (XML_Char *)temp; + /* if tag->name.localPart is set (when namespace processing is on) + then update it as well, since it will always point into tag->buf + */ + if (tag->name.localPart) + tag->name.localPart = (XML_Char *)temp + (tag->name.localPart - + (XML_Char *)tag->buf); + tag->buf = temp; + tag->bufEnd = temp + bufSize; + rawNameBuf = temp + nameLen; + } + memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); + tag->rawName = rawNameBuf; + tag = tag->parent; + } + return XML_TRUE; +} + +static enum XML_Error PTRCALL +contentProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) +{ + enum XML_Error result = doContent(parser, 0, encoding, start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result == XML_ERROR_NONE) { + if (!storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + } + return result; +} + +static enum XML_Error PTRCALL +externalEntityInitProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) @@ -965,27 +2124,36 @@ enum XML_Error externalEntityInitProcessor(XML_Parser parser, return externalEntityInitProcessor2(parser, start, end, endPtr); } -static -enum XML_Error externalEntityInitProcessor2(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +static enum XML_Error PTRCALL +externalEntityInitProcessor2(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - const char *next; + const char *next = start; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: + /* If we are at the end of the buffer, this would cause the next stage, + i.e. externalEntityInitProcessor3, to pass control directly to + doContent (by detecting XML_TOK_NONE) without processing any xml text + declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. + */ + if (next == end && !ps_finalBuffer) { + *endPtr = next; + return XML_ERROR_NONE; + } start = next; break; case XML_TOK_PARTIAL: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } @@ -996,61 +2164,81 @@ enum XML_Error externalEntityInitProcessor2(XML_Parser parser, return externalEntityInitProcessor3(parser, start, end, endPtr); } -static -enum XML_Error externalEntityInitProcessor3(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +static enum XML_Error PTRCALL +externalEntityInitProcessor3(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - const char *next; - int tok = XmlContentTok(encoding, start, end, &next); + int tok; + const char *next = start; /* XmlContentTok doesn't always set the last arg */ + eventPtr = start; + tok = XmlContentTok(encoding, start, end, &next); + eventEndPtr = next; + switch (tok) { case XML_TOK_XML_DECL: { - enum XML_Error result = processXmlDecl(parser, 1, start, next); + enum XML_Error result; + result = processXmlDecl(parser, 1, start, next); if (result != XML_ERROR_NONE) - return result; - start = next; + return result; + switch (ps_parsing) { + case XML_SUSPENDED: + *endPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + start = next; + } } break; case XML_TOK_PARTIAL: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } processor = externalEntityContentProcessor; tagLevel = 1; - return doContent(parser, 1, encoding, start, end, endPtr); + return externalEntityContentProcessor(parser, start, end, endPtr); } -static -enum XML_Error externalEntityContentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +static enum XML_Error PTRCALL +externalEntityContentProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - return doContent(parser, 1, encoding, start, end, endPtr); + enum XML_Error result = doContent(parser, 1, encoding, start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result == XML_ERROR_NONE) { + if (!storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + } + return result; } static enum XML_Error doContent(XML_Parser parser, - int startTagLevel, - const ENCODING *enc, - const char *s, - const char *end, - const char **nextPtr) + int startTagLevel, + const ENCODING *enc, + const char *s, + const char *end, + const char **nextPtr, + XML_Bool haveMore) { - const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); + /* save one level of indirection */ + DTD * const dtd = _dtd; + const char **eventPP; const char **eventEndPP; if (enc == encoding) { @@ -1062,513 +2250,581 @@ doContent(XML_Parser parser, eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; + for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; } *eventEndPP = end; if (characterDataHandler) { - XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); + XML_Char c = 0xA; + characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) - reportDefault(parser, enc, s, end); + reportDefault(parser, enc, s, end); + /* We are at the end of the final buffer, should we check for + XML_SUSPENDED, XML_FINISHED? + */ if (startTagLevel == 0) - return XML_ERROR_NO_ELEMENTS; + return XML_ERROR_NO_ELEMENTS; if (tagLevel != startTagLevel) - return XML_ERROR_ASYNC_ENTITY; + return XML_ERROR_ASYNC_ENTITY; + *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_NONE: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; } if (startTagLevel > 0) { - if (tagLevel != startTagLevel) - return XML_ERROR_ASYNC_ENTITY; - return XML_ERROR_NONE; + if (tagLevel != startTagLevel) + return XML_ERROR_ASYNC_ENTITY; + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_NO_ELEMENTS; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_ENTITY_REF: { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = XmlPredefinedEntityName(enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (characterDataHandler) - characterDataHandler(handlerArg, &ch, 1); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - name = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) - return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); - poolDiscard(&dtd.pool); - if (!entity) { - if (dtd.complete || dtd.standalone) - return XML_ERROR_UNDEFINED_ENTITY; - if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - if (entity->open) - return XML_ERROR_RECURSIVE_ENTITY_REF; - if (entity->notation) - return XML_ERROR_BINARY_ENTITY_REF; - if (entity) { - if (entity->textPtr) { - enum XML_Error result; - OPEN_INTERNAL_ENTITY openEntity; - if (defaultHandler && !defaultExpandInternalEntities) { - reportDefault(parser, enc, s, next); - break; - } - entity->open = 1; - openEntity.next = openInternalEntities; - openInternalEntities = &openEntity; - openEntity.entity = entity; - openEntity.internalEventPtr = 0; - openEntity.internalEventEndPtr = 0; - result = doContent(parser, - tagLevel, - internalEnc, - (char *)entity->textPtr, - (char *)(entity->textPtr + entity->textLen), - 0); - entity->open = 0; - openInternalEntities = openEntity.next; - if (result) - return result; - } - else if (externalEntityRefHandler) { - const XML_Char *context; - entity->open = 1; - context = getContext(parser); - entity->open = 0; - if (!context) - return XML_ERROR_NO_MEMORY; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - context, - dtd.base, - entity->systemId, - entity->publicId)) - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - poolDiscard(&tempPool); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - } - break; - } - case XML_TOK_START_TAG_WITH_ATTS: - if (!startElementHandler) { - enum XML_Error result = storeAtts(parser, enc, s, 0, 0); - if (result) - return result; + const XML_Char *name; + ENTITY *entity; + XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (ch) { + if (characterDataHandler) + characterDataHandler(handlerArg, &ch, 1); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + name = poolStoreString(&dtd->pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&dtd->pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity or default handler. + */ + if (!dtd->hasParamEntityRefs || dtd->standalone) { + if (!entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (!entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } + else if (!entity) { + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, name, 0); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->notation) + return XML_ERROR_BINARY_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + if (!defaultExpandInternalEntities) { + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, entity->name, 0); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + result = processInternalEntity(parser, entity, XML_FALSE); + if (result != XML_ERROR_NONE) + return result; + } + else if (externalEntityRefHandler) { + const XML_Char *context; + entity->open = XML_TRUE; + context = getContext(parser); + entity->open = XML_FALSE; + if (!context) + return XML_ERROR_NO_MEMORY; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + context, + entity->base, + entity->systemId, + entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + poolDiscard(&tempPool); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; } - /* fall through */ case XML_TOK_START_TAG_NO_ATTS: + /* fall through */ + case XML_TOK_START_TAG_WITH_ATTS: { - TAG *tag; - if (freeTagList) { - tag = freeTagList; - freeTagList = freeTagList->parent; - } - else { - tag = malloc(sizeof(TAG)); - if (!tag) - return XML_ERROR_NO_MEMORY; - tag->buf = malloc(INIT_TAG_BUF_SIZE); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; - } - tag->bindings = 0; - tag->parent = tagStack; - tagStack = tag; - tag->name.localPart = 0; - tag->rawName = s + enc->minBytesPerChar; - tag->rawNameLength = XmlNameLength(enc, tag->rawName); - if (nextPtr) { - /* Need to guarantee that: - tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */ - if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) { - int bufSize = tag->rawNameLength * 4; - bufSize = ROUND_UP(bufSize, sizeof(XML_Char)); - tag->buf = realloc(tag->buf, bufSize); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + bufSize; - } - memcpy(tag->buf, tag->rawName, tag->rawNameLength); - tag->rawName = tag->buf; - } - ++tagLevel; - if (startElementHandler) { - enum XML_Error result; - XML_Char *toPtr; - for (;;) { - const char *rawNameEnd = tag->rawName + tag->rawNameLength; - const char *fromPtr = tag->rawName; - int bufSize; - if (nextPtr) - toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char))); - else - toPtr = (XML_Char *)tag->buf; - tag->name.str = toPtr; - XmlConvert(enc, - &fromPtr, rawNameEnd, - (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - if (fromPtr == rawNameEnd) - break; - bufSize = (tag->bufEnd - tag->buf) << 1; - tag->buf = realloc(tag->buf, bufSize); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + bufSize; - if (nextPtr) - tag->rawName = tag->buf; - } - *toPtr = XML_T('\0'); - result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); - if (result) - return result; - startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts); - poolClear(&tempPool); - } - else { - tag->name.str = 0; - if (defaultHandler) - reportDefault(parser, enc, s, next); - } - break; - } - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - if (!startElementHandler) { - enum XML_Error result = storeAtts(parser, enc, s, 0, 0); - if (result) - return result; + TAG *tag; + enum XML_Error result; + XML_Char *toPtr; + if (freeTagList) { + tag = freeTagList; + freeTagList = freeTagList->parent; + } + else { + tag = (TAG *)MALLOC(sizeof(TAG)); + if (!tag) + return XML_ERROR_NO_MEMORY; + tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE); + if (!tag->buf) { + FREE(tag); + return XML_ERROR_NO_MEMORY; + } + tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; + } + tag->bindings = NULL; + tag->parent = tagStack; + tagStack = tag; + tag->name.localPart = NULL; + tag->name.prefix = NULL; + tag->rawName = s + enc->minBytesPerChar; + tag->rawNameLength = XmlNameLength(enc, tag->rawName); + ++tagLevel; + { + const char *rawNameEnd = tag->rawName + tag->rawNameLength; + const char *fromPtr = tag->rawName; + toPtr = (XML_Char *)tag->buf; + for (;;) { + int bufSize; + int convLen; + XmlConvert(enc, + &fromPtr, rawNameEnd, + (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); + convLen = (int)(toPtr - (XML_Char *)tag->buf); + if (fromPtr == rawNameEnd) { + tag->name.strLen = convLen; + break; + } + bufSize = (int)(tag->bufEnd - tag->buf) << 1; + { + char *temp = (char *)REALLOC(tag->buf, bufSize); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + tag->buf = temp; + tag->bufEnd = temp + bufSize; + toPtr = (XML_Char *)temp + convLen; + } + } + } + tag->name.str = (XML_Char *)tag->buf; + *toPtr = XML_T('\0'); + result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); + if (result) + return result; + if (startElementHandler) + startElementHandler(handlerArg, tag->name.str, + (const XML_Char **)atts); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&tempPool); + break; } - /* fall through */ case XML_TOK_EMPTY_ELEMENT_NO_ATTS: - if (startElementHandler || endElementHandler) { - const char *rawName = s + enc->minBytesPerChar; - enum XML_Error result; - BINDING *bindings = 0; - TAG_NAME name; - name.str = poolStoreString(&tempPool, enc, rawName, - rawName + XmlNameLength(enc, rawName)); - if (!name.str) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - result = storeAtts(parser, enc, s, &name, &bindings); - if (result) - return result; - poolFinish(&tempPool); - if (startElementHandler) - startElementHandler(handlerArg, name.str, (const XML_Char **)atts); - if (endElementHandler) { - if (startElementHandler) - *eventPP = *eventEndPP; - endElementHandler(handlerArg, name.str); - } - poolClear(&tempPool); - while (bindings) { - BINDING *b = bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } + /* fall through */ + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: + { + const char *rawName = s + enc->minBytesPerChar; + enum XML_Error result; + BINDING *bindings = NULL; + XML_Bool noElmHandlers = XML_TRUE; + TAG_NAME name; + name.str = poolStoreString(&tempPool, enc, rawName, + rawName + XmlNameLength(enc, rawName)); + if (!name.str) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + result = storeAtts(parser, enc, s, &name, &bindings); + if (result) + return result; + poolFinish(&tempPool); + if (startElementHandler) { + startElementHandler(handlerArg, name.str, (const XML_Char **)atts); + noElmHandlers = XML_FALSE; + } + if (endElementHandler) { + if (startElementHandler) + *eventPP = *eventEndPP; + endElementHandler(handlerArg, name.str); + noElmHandlers = XML_FALSE; + } + if (noElmHandlers && defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&tempPool); + while (bindings) { + BINDING *b = bindings; + if (endNamespaceDeclHandler) + endNamespaceDeclHandler(handlerArg, b->prefix->name); + bindings = bindings->nextTagBinding; + b->nextTagBinding = freeBindingList; + freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } } - else if (defaultHandler) - reportDefault(parser, enc, s, next); if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); + return epilogProcessor(parser, next, end, nextPtr); break; case XML_TOK_END_TAG: if (tagLevel == startTagLevel) return XML_ERROR_ASYNC_ENTITY; else { - int len; - const char *rawName; - TAG *tag = tagStack; - tagStack = tag->parent; - tag->parent = freeTagList; - freeTagList = tag; - rawName = s + enc->minBytesPerChar*2; - len = XmlNameLength(enc, rawName); - if (len != tag->rawNameLength - || memcmp(tag->rawName, rawName, len) != 0) { - *eventPP = rawName; - return XML_ERROR_TAG_MISMATCH; - } - --tagLevel; - if (endElementHandler && tag->name.str) { - if (tag->name.localPart) { - XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen; - const XML_Char *from = tag->name.localPart; - while ((*to++ = *from++) != 0) - ; - } - endElementHandler(handlerArg, tag->name.str); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - while (tag->bindings) { - BINDING *b = tag->bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - tag->bindings = tag->bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); + int len; + const char *rawName; + TAG *tag = tagStack; + tagStack = tag->parent; + tag->parent = freeTagList; + freeTagList = tag; + rawName = s + enc->minBytesPerChar*2; + len = XmlNameLength(enc, rawName); + if (len != tag->rawNameLength + || memcmp(tag->rawName, rawName, len) != 0) { + *eventPP = rawName; + return XML_ERROR_TAG_MISMATCH; + } + --tagLevel; + if (endElementHandler) { + const XML_Char *localPart; + const XML_Char *prefix; + XML_Char *uri; + localPart = tag->name.localPart; + if (ns && localPart) { + /* localPart and prefix may have been overwritten in + tag->name.str, since this points to the binding->uri + buffer which gets re-used; so we have to add them again + */ + uri = (XML_Char *)tag->name.str + tag->name.uriLen; + /* don't need to check for space - already done in storeAtts() */ + while (*localPart) *uri++ = *localPart++; + prefix = (XML_Char *)tag->name.prefix; + if (ns_triplets && prefix) { + *uri++ = namespaceSeparator; + while (*prefix) *uri++ = *prefix++; + } + *uri = XML_T('\0'); + } + endElementHandler(handlerArg, tag->name.str); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); + while (tag->bindings) { + BINDING *b = tag->bindings; + if (endNamespaceDeclHandler) + endNamespaceDeclHandler(handlerArg, b->prefix->name); + tag->bindings = tag->bindings->nextTagBinding; + b->nextTagBinding = freeBindingList; + freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } + if (tagLevel == 0) + return epilogProcessor(parser, next, end, nextPtr); } break; case XML_TOK_CHAR_REF: { - int n = XmlCharRefNumber(enc, s); - if (n < 0) - return XML_ERROR_BAD_CHAR_REF; - if (characterDataHandler) { - XML_Char buf[XML_ENCODE_MAX]; - characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); + int n = XmlCharRefNumber(enc, s); + if (n < 0) + return XML_ERROR_BAD_CHAR_REF; + if (characterDataHandler) { + XML_Char buf[XML_ENCODE_MAX]; + characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); } break; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; case XML_TOK_DATA_NEWLINE: if (characterDataHandler) { - XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); + XML_Char c = 0xA; + characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; case XML_TOK_CDATA_SECT_OPEN: { - enum XML_Error result; - if (startCdataSectionHandler) - startCdataSectionHandler(handlerArg); + enum XML_Error result; + if (startCdataSectionHandler) + startCdataSectionHandler(handlerArg); #if 0 - /* Suppose you doing a transformation on a document that involves - changing only the character data. You set up a defaultHandler - and a characterDataHandler. The defaultHandler simply copies - characters through. The characterDataHandler does the transformation - and writes the characters out escaping them as necessary. This case - will fail to work if we leave out the following two lines (because & - and < inside CDATA sections will be incorrectly escaped). - - However, now we have a start/endCdataSectionHandler, so it seems - easier to let the user deal with this. */ - - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); + /* Suppose you doing a transformation on a document that involves + changing only the character data. You set up a defaultHandler + and a characterDataHandler. The defaultHandler simply copies + characters through. The characterDataHandler does the + transformation and writes the characters out escaping them as + necessary. This case will fail to work if we leave out the + following two lines (because & and < inside CDATA sections will + be incorrectly escaped). + + However, now we have a start/endCdataSectionHandler, so it seems + easier to let the user deal with this. + */ + else if (characterDataHandler) + characterDataHandler(handlerArg, dataBuf, 0); #endif - else if (defaultHandler) - reportDefault(parser, enc, s, next); - result = doCdataSection(parser, enc, &next, end, nextPtr); - if (!next) { - processor = cdataSectionProcessor; - return result; - } + else if (defaultHandler) + reportDefault(parser, enc, s, next); + result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore); + if (result != XML_ERROR_NONE) + return result; + else if (!next) { + processor = cdataSectionProcessor; + return result; + } } break; case XML_TOK_TRAILING_RSQB: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; } if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)end - (XML_Char *)s); + if (MUST_CONVERT(enc, s)) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + characterDataHandler(handlerArg, dataBuf, + (int)(dataPtr - (ICHAR *)dataBuf)); + } + else + characterDataHandler(handlerArg, + (XML_Char *)s, + (int)((XML_Char *)end - (XML_Char *)s)); } else if (defaultHandler) - reportDefault(parser, enc, s, end); + reportDefault(parser, enc, s, end); + /* We are at the end of the final buffer, should we check for + XML_SUSPENDED, XML_FINISHED? + */ if (startTagLevel == 0) { *eventPP = end; - return XML_ERROR_NO_ELEMENTS; + return XML_ERROR_NO_ELEMENTS; } if (tagLevel != startTagLevel) { - *eventPP = end; - return XML_ERROR_ASYNC_ENTITY; + *eventPP = end; + return XML_ERROR_ASYNC_ENTITY; } + *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_DATA_CHARS: - if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = s; - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - if (s == next) - break; - *eventPP = s; - } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); + { + XML_CharacterDataHandler charDataHandler = characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = s; + charDataHandler(handlerArg, dataBuf, + (int)(dataPtr - (ICHAR *)dataBuf)); + if (s == next) + break; + *eventPP = s; + } + } + else + charDataHandler(handlerArg, + (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (!reportComment(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; default: if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; } *eventPP = s = next; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: ; + } } /* not reached */ } -/* If tagNamePtr is non-null, build a real list of attributes, -otherwise just check the attributes for well-formedness. */ - -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, - const char *s, TAG_NAME *tagNamePtr, - BINDING **bindingsPtr) +/* Precondition: all arguments must be non-NULL; + Purpose: + - normalize attributes + - check attributes for well-formedness + - generate namespace aware attribute names (URI, prefix) + - build list of attributes for startElementHandler + - default attributes + - process namespace declarations (check and report them) + - generate namespace aware element name (URI, prefix) +*/ +static enum XML_Error +storeAtts(XML_Parser parser, const ENCODING *enc, + const char *attStr, TAG_NAME *tagNamePtr, + BINDING **bindingsPtr) { - ELEMENT_TYPE *elementType = 0; - int nDefaultAtts = 0; - const XML_Char **appAtts; + DTD * const dtd = _dtd; /* save one level of indirection */ + ELEMENT_TYPE *elementType; + int nDefaultAtts; + const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; + int prefixLen; int i; int n; + XML_Char *uri; int nPrefixes = 0; BINDING *binding; const XML_Char *localPart; - if (tagNamePtr) { - elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0); - if (!elementType) { - tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str); - if (!tagNamePtr->str) - return XML_ERROR_NO_MEMORY; - elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE)); - if (!elementType) - return XML_ERROR_NO_MEMORY; - if (ns && !setElementTypePrefix(parser, elementType)) - return XML_ERROR_NO_MEMORY; - } - nDefaultAtts = elementType->nDefaultAtts; + /* lookup the element type name */ + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0); + if (!elementType) { + const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); + if (!name) + return XML_ERROR_NO_MEMORY; + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, + sizeof(ELEMENT_TYPE)); + if (!elementType) + return XML_ERROR_NO_MEMORY; + if (ns && !setElementTypePrefix(parser, elementType)) + return XML_ERROR_NO_MEMORY; } - n = XmlGetAttributes(enc, s, attsSize, atts); + nDefaultAtts = elementType->nDefaultAtts; + + /* get the attributes from the tokenizer */ + n = XmlGetAttributes(enc, attStr, attsSize, atts); if (n + nDefaultAtts > attsSize) { int oldAttsSize = attsSize; + ATTRIBUTE *temp; +#ifdef XML_ATTR_INFO + XML_AttrInfo *temp2; +#endif attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; - atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (!atts) + temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + atts = temp; +#ifdef XML_ATTR_INFO + temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); + if (temp2 == NULL) return XML_ERROR_NO_MEMORY; + attInfo = temp2; +#endif if (n > oldAttsSize) - XmlGetAttributes(enc, s, n, atts); + XmlGetAttributes(enc, attStr, n, atts); } + appAtts = (const XML_Char **)atts; for (i = 0; i < n; i++) { - ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, - atts[i].name - + XmlNameLength(enc, atts[i].name)); + ATTRIBUTE *currAtt = &atts[i]; +#ifdef XML_ATTR_INFO + XML_AttrInfo *currAttInfo = &attInfo[i]; +#endif + /* add the name and value to the attribute list */ + ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name, + currAtt->name + + XmlNameLength(enc, currAtt->name)); if (!attId) return XML_ERROR_NO_MEMORY; +#ifdef XML_ATTR_INFO + currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name); + currAttInfo->nameEnd = currAttInfo->nameStart + + XmlNameLength(enc, currAtt->name); + currAttInfo->valueStart = parseEndByteIndex - + (parseEndPtr - currAtt->valuePtr); + currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd); +#endif + /* Detect duplicate attributes by their QNames. This does not work when + namespace processing is turned on and different prefixes for the same + namespace are used. For this case we have a check further down. + */ if ((attId->name)[-1]) { if (enc == encoding) - eventPtr = atts[i].name; + eventPtr = atts[i].name; return XML_ERROR_DUPLICATE_ATTRIBUTE; } (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; if (!atts[i].normalized) { enum XML_Error result; - int isCdata = 1; + XML_Bool isCdata = XML_TRUE; + /* figure out whether declared as other than CDATA */ if (attId->maybeTokenized) { - int j; - for (j = 0; j < nDefaultAtts; j++) { - if (attId == elementType->defaultAtts[j].id) { - isCdata = elementType->defaultAtts[j].isCdata; - break; - } - } + int j; + for (j = 0; j < nDefaultAtts; j++) { + if (attId == elementType->defaultAtts[j].id) { + isCdata = elementType->defaultAtts[j].isCdata; + break; + } + } } + /* normalize the attribute value */ result = storeAttributeValue(parser, enc, isCdata, - atts[i].valuePtr, atts[i].valueEnd, - &tempPool); + atts[i].valuePtr, atts[i].valueEnd, + &tempPool); if (result) - return result; - if (tagNamePtr) { - appAtts[attIndex] = poolStart(&tempPool); - poolFinish(&tempPool); - } - else - poolDiscard(&tempPool); + return result; + appAtts[attIndex] = poolStart(&tempPool); + poolFinish(&tempPool); } - else if (tagNamePtr) { - appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd); + else { + /* the value did not need normalizing */ + appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, + atts[i].valueEnd); if (appAtts[attIndex] == 0) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); } - if (attId->prefix && tagNamePtr) { + /* handle prefixed attribute names */ + if (attId->prefix) { if (attId->xmlns) { - if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr)) - return XML_ERROR_NO_MEMORY; + /* deal with namespace declarations here */ + enum XML_Error result = addBinding(parser, attId->prefix, attId, + appAtts[attIndex], bindingsPtr); + if (result) + return result; --attIndex; } else { + /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; @@ -1577,128 +2833,311 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, else attIndex++; } + + /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ nSpecifiedAtts = attIndex; - if (tagNamePtr) { - int j; - for (j = 0; j < nDefaultAtts; j++) { - const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j; - if (!(da->id->name)[-1] && da->value) { - if (da->id->prefix) { - if (da->id->xmlns) { - if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr)) - return XML_ERROR_NO_MEMORY; - } - else { - (da->id->name)[-1] = 2; - nPrefixes++; - appAtts[attIndex++] = da->id->name; - appAtts[attIndex++] = da->value; - } - } - else { - (da->id->name)[-1] = 1; - appAtts[attIndex++] = da->id->name; - appAtts[attIndex++] = da->value; - } + if (elementType->idAtt && (elementType->idAtt->name)[-1]) { + for (i = 0; i < attIndex; i += 2) + if (appAtts[i] == elementType->idAtt->name) { + idAttIndex = i; + break; + } + } + else + idAttIndex = -1; + + /* do attribute defaulting */ + for (i = 0; i < nDefaultAtts; i++) { + const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; + if (!(da->id->name)[-1] && da->value) { + if (da->id->prefix) { + if (da->id->xmlns) { + enum XML_Error result = addBinding(parser, da->id->prefix, da->id, + da->value, bindingsPtr); + if (result) + return result; + } + else { + (da->id->name)[-1] = 2; + nPrefixes++; + appAtts[attIndex++] = da->id->name; + appAtts[attIndex++] = da->value; + } + } + else { + (da->id->name)[-1] = 1; + appAtts[attIndex++] = da->id->name; + appAtts[attIndex++] = da->value; } } - appAtts[attIndex] = 0; } + appAtts[attIndex] = 0; + + /* expand prefixed attribute names, check for duplicates, + and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { + int j; /* hash table index */ + unsigned long version = nsAttsVersion; + int nsAttsSize = (int)1 << nsAttsPower; + /* size of hash table must be at least 2 * (# of prefixed attributes) */ + if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ + NS_ATT *temp; + /* hash table size must also be a power of 2 and >= 8 */ + while (nPrefixes >> nsAttsPower++); + if (nsAttsPower < 3) + nsAttsPower = 3; + nsAttsSize = (int)1 << nsAttsPower; + temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); + if (!temp) + return XML_ERROR_NO_MEMORY; + nsAtts = temp; + version = 0; /* force re-initialization of nsAtts hash table */ + } + /* using a version flag saves us from initializing nsAtts every time */ + if (!version) { /* initialize version flags when version wraps around */ + version = INIT_ATTS_VERSION; + for (j = nsAttsSize; j != 0; ) + nsAtts[--j].version = version; + } + nsAttsVersion = --version; + + /* expand prefixed names and check for duplicates */ for (; i < attIndex; i += 2) { - if (appAtts[i][-1] == 2) { + const XML_Char *s = appAtts[i]; + if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; - ((XML_Char *)(appAtts[i]))[-1] = 0; - id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0); - if (id->prefix->binding) { - int j; - const BINDING *b = id->prefix->binding; - const XML_Char *s = appAtts[i]; - for (j = 0; j < b->uriLen; j++) { - if (!poolAppendChar(&tempPool, b->uri[j])) - return XML_ERROR_NO_MEMORY; - } - while (*s++ != ':') - ; - do { - if (!poolAppendChar(&tempPool, *s)) - return XML_ERROR_NO_MEMORY; - } while (*s++); - appAtts[i] = poolStart(&tempPool); - poolFinish(&tempPool); - } - if (!--nPrefixes) - break; + const BINDING *b; + unsigned long uriHash = hash_secret_salt; + ((XML_Char *)s)[-1] = 0; /* clear flag */ + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); + b = id->prefix->binding; + if (!b) + return XML_ERROR_UNBOUND_PREFIX; + + /* as we expand the name we also calculate its hash value */ + for (j = 0; j < b->uriLen; j++) { + const XML_Char c = b->uri[j]; + if (!poolAppendChar(&tempPool, c)) + return XML_ERROR_NO_MEMORY; + uriHash = CHAR_HASH(uriHash, c); + } + while (*s++ != XML_T(ASCII_COLON)) + ; + do { /* copies null terminator */ + const XML_Char c = *s; + if (!poolAppendChar(&tempPool, *s)) + return XML_ERROR_NO_MEMORY; + uriHash = CHAR_HASH(uriHash, c); + } while (*s++); + + { /* Check hash table for duplicate of expanded name (uriName). + Derived from code in lookup(parser, HASH_TABLE *table, ...). + */ + unsigned char step = 0; + unsigned long mask = nsAttsSize - 1; + j = uriHash & mask; /* index into hash table */ + while (nsAtts[j].version == version) { + /* for speed we compare stored hash values first */ + if (uriHash == nsAtts[j].hash) { + const XML_Char *s1 = poolStart(&tempPool); + const XML_Char *s2 = nsAtts[j].uriName; + /* s1 is null terminated, but not s2 */ + for (; *s1 == *s2 && *s1 != 0; s1++, s2++); + if (*s1 == 0) + return XML_ERROR_DUPLICATE_ATTRIBUTE; + } + if (!step) + step = PROBE_STEP(uriHash, mask, nsAttsPower); + j < step ? (j += nsAttsSize - step) : (j -= step); + } + } + + if (ns_triplets) { /* append namespace separator and prefix */ + tempPool.ptr[-1] = namespaceSeparator; + s = b->prefix->name; + do { + if (!poolAppendChar(&tempPool, *s)) + return XML_ERROR_NO_MEMORY; + } while (*s++); + } + + /* store expanded name in attribute list */ + s = poolStart(&tempPool); + poolFinish(&tempPool); + appAtts[i] = s; + + /* fill empty slot with new version, uriName and hash value */ + nsAtts[j].version = version; + nsAtts[j].hash = uriHash; + nsAtts[j].uriName = s; + + if (!--nPrefixes) { + i += 2; + break; + } } - else - ((XML_Char *)(appAtts[i]))[-1] = 0; + else /* not prefixed */ + ((XML_Char *)s)[-1] = 0; /* clear flag */ } } + /* clear flags for the remaining attributes */ for (; i < attIndex; i += 2) ((XML_Char *)(appAtts[i]))[-1] = 0; - if (!tagNamePtr) - return XML_ERROR_NONE; for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; + + if (!ns) + return XML_ERROR_NONE; + + /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; if (!binding) - return XML_ERROR_NONE; + return XML_ERROR_UNBOUND_PREFIX; localPart = tagNamePtr->str; - while (*localPart++ != XML_T(':')) + while (*localPart++ != XML_T(ASCII_COLON)) ; } - else if (dtd.defaultPrefix.binding) { - binding = dtd.defaultPrefix.binding; + else if (dtd->defaultPrefix.binding) { + binding = dtd->defaultPrefix.binding; localPart = tagNamePtr->str; } else return XML_ERROR_NONE; + prefixLen = 0; + if (ns_triplets && binding->prefix->name) { + for (; binding->prefix->name[prefixLen++];) + ; /* prefixLen includes null terminator */ + } tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; - i = binding->uriLen; - do { - if (i == binding->uriAlloc) { - binding->uri = realloc(binding->uri, binding->uriAlloc *= 2); - if (!binding->uri) - return XML_ERROR_NO_MEMORY; - } - binding->uri[i++] = *localPart; - } while (*localPart++); + tagNamePtr->prefix = binding->prefix->name; + tagNamePtr->prefixLen = prefixLen; + for (i = 0; localPart[i++];) + ; /* i includes null terminator */ + n = i + binding->uriLen + prefixLen; + if (n > binding->uriAlloc) { + TAG *p; + uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char)); + if (!uri) + return XML_ERROR_NO_MEMORY; + binding->uriAlloc = n + EXPAND_SPARE; + memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); + for (p = tagStack; p; p = p->parent) + if (p->name.str == binding->uri) + p->name.str = uri; + FREE(binding->uri); + binding->uri = uri; + } + /* if namespaceSeparator != '\0' then uri includes it already */ + uri = binding->uri + binding->uriLen; + memcpy(uri, localPart, i * sizeof(XML_Char)); + /* we always have a namespace separator between localPart and prefix */ + if (prefixLen) { + uri += i - 1; + *uri = namespaceSeparator; /* replace null terminator */ + memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); + } tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } -static -int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) +/* addBinding() overwrites the value of prefix->binding without checking. + Therefore one must keep track of the old value outside of addBinding(). +*/ +static enum XML_Error +addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + const XML_Char *uri, BINDING **bindingsPtr) { + static const XML_Char xmlNamespace[] = { + ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, + ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, + ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, + ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH, + ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, + ASCII_e, '\0' + }; + static const int xmlLen = + (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1; + static const XML_Char xmlnsNamespace[] = { + ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, + ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, + ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0, + ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s, + ASCII_SLASH, '\0' + }; + static const int xmlnsLen = + (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1; + + XML_Bool mustBeXML = XML_FALSE; + XML_Bool isXML = XML_TRUE; + XML_Bool isXMLNS = XML_TRUE; + BINDING *b; int len; - for (len = 0; uri[len]; len++) - ; + + /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ + if (*uri == XML_T('\0') && prefix->name) + return XML_ERROR_UNDECLARING_PREFIX; + + if (prefix->name + && prefix->name[0] == XML_T(ASCII_x) + && prefix->name[1] == XML_T(ASCII_m) + && prefix->name[2] == XML_T(ASCII_l)) { + + /* Not allowed to bind xmlns */ + if (prefix->name[3] == XML_T(ASCII_n) + && prefix->name[4] == XML_T(ASCII_s) + && prefix->name[5] == XML_T('\0')) + return XML_ERROR_RESERVED_PREFIX_XMLNS; + + if (prefix->name[3] == XML_T('\0')) + mustBeXML = XML_TRUE; + } + + for (len = 0; uri[len]; len++) { + if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) + isXML = XML_FALSE; + + if (!mustBeXML && isXMLNS + && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) + isXMLNS = XML_FALSE; + } + isXML = isXML && len == xmlLen; + isXMLNS = isXMLNS && len == xmlnsLen; + + if (mustBeXML != isXML) + return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML + : XML_ERROR_RESERVED_NAMESPACE_URI; + + if (isXMLNS) + return XML_ERROR_RESERVED_NAMESPACE_URI; + if (namespaceSeparator) len++; if (freeBindingList) { b = freeBindingList; if (len > b->uriAlloc) { - b->uri = realloc(b->uri, len + EXPAND_SPARE); - if (!b->uri) - return 0; + XML_Char *temp = (XML_Char *)REALLOC(b->uri, + sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + b->uri = temp; b->uriAlloc = len + EXPAND_SPARE; } freeBindingList = b->nextTagBinding; } else { - b = malloc(sizeof(BINDING)); + b = (BINDING *)MALLOC(sizeof(BINDING)); if (!b) - return 0; - b->uri = malloc(sizeof(XML_Char) * len + EXPAND_SPARE); + return XML_ERROR_NO_MEMORY; + b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE)); if (!b->uri) { - free(b); - return 0; + FREE(b); + return XML_ERROR_NO_MEMORY; } - b->uriAlloc = len; + b->uriAlloc = len + EXPAND_SPARE; } b->uriLen = len; memcpy(b->uri, uri, len * sizeof(XML_Char)); @@ -1707,44 +3146,56 @@ int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, con b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; - if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix) - prefix->binding = 0; + /* NULL binding when default namespace undeclared */ + if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix) + prefix->binding = NULL; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; - if (startNamespaceDeclHandler) + /* if attId == NULL then we are not starting a namespace scope */ + if (attId && startNamespaceDeclHandler) startNamespaceDeclHandler(handlerArg, prefix->name, - prefix->binding ? uri : 0); - return 1; + prefix->binding ? uri : 0); + return XML_ERROR_NONE; } /* The idea here is to avoid using stack for each CDATA section when -the whole file is parsed with one call. */ - -static -enum XML_Error cdataSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) + the whole file is parsed with one call. +*/ +static enum XML_Error PTRCALL +cdataSectionProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr); + enum XML_Error result = doCdataSection(parser, encoding, &start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result != XML_ERROR_NONE) + return result; if (start) { - processor = contentProcessor; - return contentProcessor(parser, start, end, endPtr); + if (parentParser) { /* we are parsing an external entity */ + processor = externalEntityContentProcessor; + return externalEntityContentProcessor(parser, start, end, endPtr); + } + else { + processor = contentProcessor; + return contentProcessor(parser, start, end, endPtr); + } } return result; } -/* startPtr gets set to non-null is the section is closed, and to null if -the section is not yet closed. */ - -static -enum XML_Error doCdataSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr) +/* startPtr gets set to non-null if the section is closed, and to null if + the section is not yet closed. +*/ +static enum XML_Error +doCdataSection(XML_Parser parser, + const ENCODING *enc, + const char **startPtr, + const char *end, + const char **nextPtr, + XML_Bool haveMore) { const char *s = *startPtr; const char **eventPP; @@ -1759,7 +3210,8 @@ enum XML_Error doCdataSection(XML_Parser parser, eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; - *startPtr = 0; + *startPtr = NULL; + for (;;) { const char *next; int tok = XmlCdataSectionTok(enc, s, end, &next); @@ -1767,69 +3219,173 @@ enum XML_Error doCdataSection(XML_Parser parser, switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: if (endCdataSectionHandler) - endCdataSectionHandler(handlerArg); + endCdataSectionHandler(handlerArg); #if 0 /* see comment under XML_TOK_CDATA_SECT_OPEN */ else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); + characterDataHandler(handlerArg, dataBuf, 0); #endif else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); *startPtr = next; - return XML_ERROR_NONE; + *nextPtr = next; + if (ps_parsing == XML_FINISHED) + return XML_ERROR_ABORTED; + else + return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: if (characterDataHandler) { - XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); + XML_Char c = 0xA; + characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; case XML_TOK_DATA_CHARS: - if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = next; - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - if (s == next) - break; - *eventPP = s; - } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); + { + XML_CharacterDataHandler charDataHandler = characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = next; + charDataHandler(handlerArg, dataBuf, + (int)(dataPtr - (ICHAR *)dataBuf)); + if (s == next) + break; + *eventPP = s; + } + } + else + charDataHandler(handlerArg, + (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: - abort(); + *eventPP = next; + return XML_ERROR_UNEXPECTED_STATE; } + *eventPP = s = next; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: ; + } + } + /* not reached */ +} + +#ifdef XML_DTD + +/* The idea here is to avoid using stack for each IGNORE section when + the whole file is parsed with one call. +*/ +static enum XML_Error PTRCALL +ignoreSectionProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) +{ + enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result != XML_ERROR_NONE) + return result; + if (start) { + processor = prologProcessor; + return prologProcessor(parser, start, end, endPtr); + } + return result; +} + +/* startPtr gets set to non-null is the section is closed, and to null + if the section is not yet closed. +*/ +static enum XML_Error +doIgnoreSection(XML_Parser parser, + const ENCODING *enc, + const char **startPtr, + const char *end, + const char **nextPtr, + XML_Bool haveMore) +{ + const char *next; + int tok; + const char *s = *startPtr; + const char **eventPP; + const char **eventEndPP; + if (enc == encoding) { + eventPP = &eventPtr; + *eventPP = s; + eventEndPP = &eventEndPtr; + } + else { + eventPP = &(openInternalEntities->internalEventPtr); + eventEndPP = &(openInternalEntities->internalEventEndPtr); + } + *eventPP = s; + *startPtr = NULL; + tok = XmlIgnoreSectionTok(enc, s, end, &next); + *eventEndPP = next; + switch (tok) { + case XML_TOK_IGNORE_SECT: + if (defaultHandler) + reportDefault(parser, enc, s, next); + *startPtr = next; + *nextPtr = next; + if (ps_parsing == XML_FINISHED) + return XML_ERROR_ABORTED; + else + return XML_ERROR_NONE; + case XML_TOK_INVALID: + *eventPP = next; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL_CHAR: + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; + } + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_PARTIAL: + case XML_TOK_NONE: + if (haveMore) { + *nextPtr = s; + return XML_ERROR_NONE; + } + return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ + default: + *eventPP = next; + return XML_ERROR_UNEXPECTED_STATE; } /* not reached */ } +#endif /* XML_DTD */ + static enum XML_Error initializeEncoding(XML_Parser parser) { @@ -1837,15 +3393,14 @@ initializeEncoding(XML_Parser parser) #ifdef XML_UNICODE char encodingBuf[128]; if (!protocolEncodingName) - s = 0; + s = NULL; else { int i; for (i = 0; protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 - || protocolEncodingName[i] >= 0x80 - || protocolEncodingName[i] < 0) { - encodingBuf[0] = '\0'; - break; + || (protocolEncodingName[i] & ~0x7f) != 0) { + encodingBuf[0] = '\0'; + break; } encodingBuf[i] = (char)protocolEncodingName[i]; } @@ -1862,52 +3417,90 @@ initializeEncoding(XML_Parser parser) static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *s, const char *next) + const char *s, const char *next) { - const char *encodingName = 0; - const ENCODING *newEncoding = 0; - const char *version; + const char *encodingName = NULL; + const XML_Char *storedEncName = NULL; + const ENCODING *newEncoding = NULL; + const char *version = NULL; + const char *versionend; + const XML_Char *storedversion = NULL; int standalone = -1; if (!(ns ? XmlParseXmlDeclNS - : XmlParseXmlDecl)(isGeneralTextEntity, - encoding, - s, - next, - &eventPtr, - &version, - &encodingName, - &newEncoding, - &standalone)) - return XML_ERROR_SYNTAX; - if (!isGeneralTextEntity && standalone == 1) - dtd.standalone = 1; - if (defaultHandler) + : XmlParseXmlDecl)(isGeneralTextEntity, + encoding, + s, + next, + &eventPtr, + &version, + &versionend, + &encodingName, + &newEncoding, + &standalone)) { + if (isGeneralTextEntity) + return XML_ERROR_TEXT_DECL; + else + return XML_ERROR_XML_DECL; + } + if (!isGeneralTextEntity && standalone == 1) { + _dtd->standalone = XML_TRUE; +#ifdef XML_DTD + if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif /* XML_DTD */ + } + if (xmlDeclHandler) { + if (encodingName != NULL) { + storedEncName = poolStoreString(&temp2Pool, + encoding, + encodingName, + encodingName + + XmlNameLength(encoding, encodingName)); + if (!storedEncName) + return XML_ERROR_NO_MEMORY; + poolFinish(&temp2Pool); + } + if (version) { + storedversion = poolStoreString(&temp2Pool, + encoding, + version, + versionend - encoding->minBytesPerChar); + if (!storedversion) + return XML_ERROR_NO_MEMORY; + } + xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone); + } + else if (defaultHandler) reportDefault(parser, encoding, s, next); - if (!protocolEncodingName) { + if (protocolEncodingName == NULL) { if (newEncoding) { if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { - eventPtr = encodingName; - return XML_ERROR_INCORRECT_ENCODING; + eventPtr = encodingName; + return XML_ERROR_INCORRECT_ENCODING; } encoding = newEncoding; } else if (encodingName) { enum XML_Error result; - const XML_Char *s = poolStoreString(&tempPool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); - if (!s) - return XML_ERROR_NO_MEMORY; - result = handleUnknownEncoding(parser, s); - poolDiscard(&tempPool); + if (!storedEncName) { + storedEncName = poolStoreString( + &temp2Pool, encoding, encodingName, + encodingName + XmlNameLength(encoding, encodingName)); + if (!storedEncName) + return XML_ERROR_NO_MEMORY; + } + result = handleUnknownEncoding(parser, storedEncName); + poolClear(&temp2Pool); if (result == XML_ERROR_UNKNOWN_ENCODING) - eventPtr = encodingName; + eventPtr = encodingName; return result; } } + + if (storedEncName || storedversion) + poolClear(&temp2Pool); + return XML_ERROR_NONE; } @@ -1919,41 +3512,42 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) int i; for (i = 0; i < 256; i++) info.map[i] = -1; - info.convert = 0; - info.data = 0; - info.release = 0; - if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) { + info.convert = NULL; + info.data = NULL; + info.release = NULL; + if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, + &info)) { ENCODING *enc; - unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding()); + unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding()); if (!unknownEncodingMem) { - if (info.release) - info.release(info.data); - return XML_ERROR_NO_MEMORY; + if (info.release) + info.release(info.data); + return XML_ERROR_NO_MEMORY; } enc = (ns - ? XmlInitUnknownEncodingNS - : XmlInitUnknownEncoding)(unknownEncodingMem, - info.map, - info.convert, - info.data); + ? XmlInitUnknownEncodingNS + : XmlInitUnknownEncoding)(unknownEncodingMem, + info.map, + info.convert, + info.data); if (enc) { - unknownEncodingData = info.data; - unknownEncodingRelease = info.release; - encoding = enc; - return XML_ERROR_NONE; + unknownEncodingData = info.data; + unknownEncodingRelease = info.release; + encoding = enc; + return XML_ERROR_NONE; } } - if (info.release) + if (info.release != NULL) info.release(info.data); } return XML_ERROR_UNKNOWN_ENCODING; } -static enum XML_Error +static enum XML_Error PTRCALL prologInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) + const char *s, + const char *end, + const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) @@ -1962,382 +3556,1375 @@ prologInitProcessor(XML_Parser parser, return prologProcessor(parser, s, end, nextPtr); } -static enum XML_Error +#ifdef XML_DTD + +static enum XML_Error PTRCALL +externalParEntInitProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + enum XML_Error result = initializeEncoding(parser); + if (result != XML_ERROR_NONE) + return result; + + /* we know now that XML_Parse(Buffer) has been called, + so we consider the external parameter entity read */ + _dtd->paramEntityRead = XML_TRUE; + + if (prologState.inEntityValue) { + processor = entityValueInitProcessor; + return entityValueInitProcessor(parser, s, end, nextPtr); + } + else { + processor = externalParEntProcessor; + return externalParEntProcessor(parser, s, end, nextPtr); + } +} + +static enum XML_Error PTRCALL +entityValueInitProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + int tok; + const char *start = s; + const char *next = start; + eventPtr = start; + + for (;;) { + tok = XmlPrologTok(encoding, start, end, &next); + eventEndPtr = next; + if (tok <= 0) { + if (!ps_finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + /* found end of entity value - can store it now */ + return storeEntityValue(parser, encoding, s, end); + } + else if (tok == XML_TOK_XML_DECL) { + enum XML_Error result; + result = processXmlDecl(parser, 0, start, next); + if (result != XML_ERROR_NONE) + return result; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + *nextPtr = next; + } + /* stop scanning for text declaration - we found one */ + processor = entityValueProcessor; + return entityValueProcessor(parser, next, end, nextPtr); + } + /* If we are at the end of the buffer, this would cause XmlPrologTok to + return XML_TOK_NONE on the next call, which would then cause the + function to exit with *nextPtr set to s - that is what we want for other + tokens, but not for the BOM - we would rather like to skip it; + then, when this routine is entered the next time, XmlPrologTok will + return XML_TOK_INVALID, since the BOM is still in the buffer + */ + else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) { + *nextPtr = next; + return XML_ERROR_NONE; + } + start = next; + eventPtr = start; + } +} + +static enum XML_Error PTRCALL +externalParEntProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + const char *next = s; + int tok; + + tok = XmlPrologTok(encoding, s, end, &next); + if (tok <= 0) { + if (!ps_finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + } + /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. + However, when parsing an external subset, doProlog will not accept a BOM + as valid, and report a syntax error, so we have to skip the BOM + */ + else if (tok == XML_TOK_BOM) { + s = next; + tok = XmlPrologTok(encoding, s, end, &next); + } + + processor = prologProcessor; + return doProlog(parser, encoding, s, end, tok, next, + nextPtr, (XML_Bool)!ps_finalBuffer); +} + +static enum XML_Error PTRCALL +entityValueProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + const char *start = s; + const char *next = s; + const ENCODING *enc = encoding; + int tok; + + for (;;) { + tok = XmlPrologTok(enc, start, end, &next); + if (tok <= 0) { + if (!ps_finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + /* found end of entity value - can store it now */ + return storeEntityValue(parser, enc, s, end); + } + start = next; + } +} + +#endif /* XML_DTD */ + +static enum XML_Error PTRCALL prologProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) + const char *s, + const char *end, + const char **nextPtr) +{ + const char *next = s; + int tok = XmlPrologTok(encoding, s, end, &next); + return doProlog(parser, encoding, s, end, tok, next, + nextPtr, (XML_Bool)!ps_finalBuffer); +} + +static enum XML_Error +doProlog(XML_Parser parser, + const ENCODING *enc, + const char *s, + const char *end, + int tok, + const char *next, + const char **nextPtr, + XML_Bool haveMore) { +#ifdef XML_DTD + static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' }; +#endif /* XML_DTD */ + static const XML_Char atypeCDATA[] = + { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; + static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' }; + static const XML_Char atypeIDREF[] = + { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; + static const XML_Char atypeIDREFS[] = + { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; + static const XML_Char atypeENTITY[] = + { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; + static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N, + ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; + static const XML_Char atypeNMTOKEN[] = { + ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; + static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, + ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; + static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T, + ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' }; + static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' }; + static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' }; + + /* save one level of indirection */ + DTD * const dtd = _dtd; + + const char **eventPP; + const char **eventEndPP; + enum XML_Content_Quant quant; + + if (enc == encoding) { + eventPP = &eventPtr; + eventEndPP = &eventEndPtr; + } + else { + eventPP = &(openInternalEntities->internalEventPtr); + eventEndPP = &(openInternalEntities->internalEventEndPtr); + } + for (;;) { - const char *next; - int tok = XmlPrologTok(encoding, s, end, &next); + int role; + XML_Bool handleDefault = XML_TRUE; + *eventPP = s; + *eventEndPP = next; if (tok <= 0) { - if (nextPtr != 0 && tok != XML_TOK_INVALID) { - *nextPtr = s; - return XML_ERROR_NONE; + if (haveMore && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: - eventPtr = next; - return XML_ERROR_INVALID_TOKEN; - case XML_TOK_NONE: - return XML_ERROR_NO_ELEMENTS; + *eventPP = next; + return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - return XML_ERROR_UNCLOSED_TOKEN; + return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_TRAILING_CR: - eventPtr = s + encoding->minBytesPerChar; - return XML_ERROR_NO_ELEMENTS; + return XML_ERROR_PARTIAL_CHAR; + case -XML_TOK_PROLOG_S: + tok = -tok; + break; + case XML_TOK_NONE: +#ifdef XML_DTD + /* for internal PE NOT referenced between declarations */ + if (enc != encoding && !openInternalEntities->betweenDecl) { + *nextPtr = s; + return XML_ERROR_NONE; + } + /* WFC: PE Between Declarations - must check that PE contains + complete markup, not only for external PEs, but also for + internal PEs if the reference occurs between declarations. + */ + if (isParamEntity || enc != encoding) { + if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) + == XML_ROLE_ERROR) + return XML_ERROR_INCOMPLETE_PE; + *nextPtr = s; + return XML_ERROR_NONE; + } +#endif /* XML_DTD */ + return XML_ERROR_NO_ELEMENTS; default: - abort(); + tok = -tok; + next = end; + break; } } - switch (XmlTokenRole(&prologState, tok, s, next, encoding)) { + role = XmlTokenRole(&prologState, tok, s, next, enc); + switch (role) { case XML_ROLE_XML_DECL: { - enum XML_Error result = processXmlDecl(parser, 0, s, next); - if (result != XML_ERROR_NONE) - return result; + enum XML_Error result = processXmlDecl(parser, 0, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = encoding; + handleDefault = XML_FALSE; } break; - case XML_ROLE_DOCTYPE_SYSTEM_ID: - if (!dtd.standalone - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; - hadExternalDoctype = 1; + case XML_ROLE_DOCTYPE_NAME: + if (startDoctypeDeclHandler) { + doctypeName = poolStoreString(&tempPool, enc, s, next); + if (!doctypeName) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + doctypePubid = NULL; + handleDefault = XML_FALSE; + } + doctypeSysid = NULL; /* always initialize to NULL */ + break; + case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: + if (startDoctypeDeclHandler) { + startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, + doctypePubid, 1); + doctypeName = NULL; + poolClear(&tempPool); + handleDefault = XML_FALSE; + } break; +#ifdef XML_DTD + case XML_ROLE_TEXT_DECL: + { + enum XML_Error result = processXmlDecl(parser, 1, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = encoding; + handleDefault = XML_FALSE; + } + break; +#endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: +#ifdef XML_DTD + useForeignDTD = XML_FALSE; + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; +#endif /* XML_DTD */ + dtd->hasParamEntityRefs = XML_TRUE; + if (startDoctypeDeclHandler) { + XML_Char *pubId; + if (!XmlIsPublicId(enc, s, next, eventPP)) + return XML_ERROR_PUBLICID; + pubId = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!pubId) + return XML_ERROR_NO_MEMORY; + normalizePublicId(pubId); + poolFinish(&tempPool); + doctypePubid = pubId; + handleDefault = XML_FALSE; + goto alreadyChecked; + } + /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: - if (!XmlIsPublicId(encoding, s, next, &eventPtr)) - return XML_ERROR_SYNTAX; - if (declEntity) { - XML_Char *tem = poolStoreString(&dtd.pool, - encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); - if (!tem) - return XML_ERROR_NO_MEMORY; - normalizePublicId(tem); - declEntity->publicId = tem; - poolFinish(&dtd.pool); + if (!XmlIsPublicId(enc, s, next, eventPP)) + return XML_ERROR_PUBLICID; + alreadyChecked: + if (dtd->keepProcessing && declEntity) { + XML_Char *tem = poolStoreString(&dtd->pool, + enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!tem) + return XML_ERROR_NO_MEMORY; + normalizePublicId(tem); + declEntity->publicId = tem; + poolFinish(&dtd->pool); + if (entityDeclHandler) + handleDefault = XML_FALSE; + } + break; + case XML_ROLE_DOCTYPE_CLOSE: + if (doctypeName) { + startDoctypeDeclHandler(handlerArg, doctypeName, + doctypeSysid, doctypePubid, 0); + poolClear(&tempPool); + handleDefault = XML_FALSE; + } + /* doctypeSysid will be non-NULL in the case of a previous + XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler + was not set, indicating an external subset + */ +#ifdef XML_DTD + if (doctypeSysid || useForeignDTD) { + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; + dtd->hasParamEntityRefs = XML_TRUE; + if (paramEntityParsing && externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(parser, + &dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!entity) + return XML_ERROR_NO_MEMORY; + if (useForeignDTD) + entity->base = curBase; + dtd->paramEntityRead = XML_FALSE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (dtd->paramEntityRead) { + if (!dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else if (!doctypeSysid) + dtd->hasParamEntityRefs = hadParamEntityRefs; + /* end of DTD - no need to update dtd->keepProcessing */ + } + useForeignDTD = XML_FALSE; + } +#endif /* XML_DTD */ + if (endDoctypeDeclHandler) { + endDoctypeDeclHandler(handlerArg); + handleDefault = XML_FALSE; } break; case XML_ROLE_INSTANCE_START: +#ifdef XML_DTD + /* if there is no DOCTYPE declaration then now is the + last chance to read the foreign DTD + */ + if (useForeignDTD) { + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; + dtd->hasParamEntityRefs = XML_TRUE; + if (paramEntityParsing && externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!entity) + return XML_ERROR_NO_MEMORY; + entity->base = curBase; + dtd->paramEntityRead = XML_FALSE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (dtd->paramEntityRead) { + if (!dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else + dtd->hasParamEntityRefs = hadParamEntityRefs; + /* end of DTD - no need to update dtd->keepProcessing */ + } + } +#endif /* XML_DTD */ processor = contentProcessor; - if (hadExternalDoctype) - dtd.complete = 0; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: - { - const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); - if (!declElementType) - return XML_ERROR_NO_MEMORY; - if (declElementType->name != name) - poolDiscard(&dtd.pool); - else { - poolFinish(&dtd.pool); - if (!setElementTypePrefix(parser, declElementType)) - return XML_ERROR_NO_MEMORY; - } - break; - } + declElementType = getElementType(parser, enc, s, next); + if (!declElementType) + return XML_ERROR_NO_MEMORY; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_NAME: - declAttributeId = getAttributeId(parser, encoding, s, next); + declAttributeId = getAttributeId(parser, enc, s, next); if (!declAttributeId) - return XML_ERROR_NO_MEMORY; - declAttributeIsCdata = 0; - break; + return XML_ERROR_NO_MEMORY; + declAttributeIsCdata = XML_FALSE; + declAttributeType = NULL; + declAttributeIsId = XML_FALSE; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: - declAttributeIsCdata = 1; + declAttributeIsCdata = XML_TRUE; + declAttributeType = atypeCDATA; + goto checkAttListDeclHandler; + case XML_ROLE_ATTRIBUTE_TYPE_ID: + declAttributeIsId = XML_TRUE; + declAttributeType = atypeID; + goto checkAttListDeclHandler; + case XML_ROLE_ATTRIBUTE_TYPE_IDREF: + declAttributeType = atypeIDREF; + goto checkAttListDeclHandler; + case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: + declAttributeType = atypeIDREFS; + goto checkAttListDeclHandler; + case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: + declAttributeType = atypeENTITY; + goto checkAttListDeclHandler; + case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: + declAttributeType = atypeENTITIES; + goto checkAttListDeclHandler; + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: + declAttributeType = atypeNMTOKEN; + goto checkAttListDeclHandler; + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: + declAttributeType = atypeNMTOKENS; + checkAttListDeclHandler: + if (dtd->keepProcessing && attlistDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ATTRIBUTE_ENUM_VALUE: + case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: + if (dtd->keepProcessing && attlistDeclHandler) { + const XML_Char *prefix; + if (declAttributeType) { + prefix = enumValueSep; + } + else { + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE + ? notationPrefix + : enumValueStart); + } + if (!poolAppendString(&tempPool, prefix)) + return XML_ERROR_NO_MEMORY; + if (!poolAppend(&tempPool, enc, s, next)) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + handleDefault = XML_FALSE; + } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: - if (dtd.complete - && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0)) - return XML_ERROR_NO_MEMORY; + if (dtd->keepProcessing) { + if (!defineAttribute(declElementType, declAttributeId, + declAttributeIsCdata, declAttributeIsId, + 0, parser)) + return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == XML_T(ASCII_LPAREN) + || (*declAttributeType == XML_T(ASCII_N) + && declAttributeType[1] == XML_T(ASCII_O))) { + /* Enumerated or Notation type */ + if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) + || !poolAppendChar(&tempPool, XML_T('\0'))) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + handleDefault = XML_FALSE; + } + } break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: - { - const XML_Char *attVal; - enum XML_Error result - = storeAttributeValue(parser, encoding, declAttributeIsCdata, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar, - &dtd.pool); - if (result) - return result; - attVal = poolStart(&dtd.pool); - poolFinish(&dtd.pool); - if (dtd.complete - && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal)) - return XML_ERROR_NO_MEMORY; - break; + if (dtd->keepProcessing) { + const XML_Char *attVal; + enum XML_Error result = + storeAttributeValue(parser, enc, declAttributeIsCdata, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar, + &dtd->pool); + if (result) + return result; + attVal = poolStart(&dtd->pool); + poolFinish(&dtd->pool); + /* ID attributes aren't allowed to have a default */ + if (!defineAttribute(declElementType, declAttributeId, + declAttributeIsCdata, XML_FALSE, attVal, parser)) + return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == XML_T(ASCII_LPAREN) + || (*declAttributeType == XML_T(ASCII_N) + && declAttributeType[1] == XML_T(ASCII_O))) { + /* Enumerated or Notation type */ + if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) + || !poolAppendChar(&tempPool, XML_T('\0'))) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + attVal, + role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + handleDefault = XML_FALSE; + } } + break; case XML_ROLE_ENTITY_VALUE: - { - enum XML_Error result = storeEntityValue(parser, s, next); - if (result != XML_ERROR_NONE) - return result; + if (dtd->keepProcessing) { + enum XML_Error result = storeEntityValue(parser, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (declEntity) { + declEntity->textPtr = poolStart(&dtd->entityValuePool); + declEntity->textLen = (int)(poolLength(&dtd->entityValuePool)); + poolFinish(&dtd->entityValuePool); + if (entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + declEntity->is_param, + declEntity->textPtr, + declEntity->textLen, + curBase, 0, 0, 0); + handleDefault = XML_FALSE; + } + } + else + poolDiscard(&dtd->entityValuePool); + if (result != XML_ERROR_NONE) + return result; } break; + case XML_ROLE_DOCTYPE_SYSTEM_ID: +#ifdef XML_DTD + useForeignDTD = XML_FALSE; +#endif /* XML_DTD */ + dtd->hasParamEntityRefs = XML_TRUE; + if (startDoctypeDeclHandler) { + doctypeSysid = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (doctypeSysid == NULL) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + handleDefault = XML_FALSE; + } +#ifdef XML_DTD + else + /* use externalSubsetName to make doctypeSysid non-NULL + for the case where no startDoctypeDeclHandler is set */ + doctypeSysid = externalSubsetName; +#endif /* XML_DTD */ + if (!dtd->standalone +#ifdef XML_DTD + && !paramEntityParsing +#endif /* XML_DTD */ + && notStandaloneHandler + && !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; +#ifndef XML_DTD + break; +#else /* XML_DTD */ + if (!declEntity) { + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + declEntity->publicId = NULL; + } + /* fall through */ +#endif /* XML_DTD */ case XML_ROLE_ENTITY_SYSTEM_ID: - if (declEntity) { - declEntity->systemId = poolStoreString(&dtd.pool, encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); - if (!declEntity->systemId) - return XML_ERROR_NO_MEMORY; - declEntity->base = dtd.base; - poolFinish(&dtd.pool); + if (dtd->keepProcessing && declEntity) { + declEntity->systemId = poolStoreString(&dtd->pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!declEntity->systemId) + return XML_ERROR_NO_MEMORY; + declEntity->base = curBase; + poolFinish(&dtd->pool); + if (entityDeclHandler) + handleDefault = XML_FALSE; + } + break; + case XML_ROLE_ENTITY_COMPLETE: + if (dtd->keepProcessing && declEntity && entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + declEntity->is_param, + 0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + 0); + handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_NOTATION_NAME: - if (declEntity) { - declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next); - if (!declEntity->notation) - return XML_ERROR_NO_MEMORY; - poolFinish(&dtd.pool); - if (unparsedEntityDeclHandler) { - eventPtr = eventEndPtr = s; - unparsedEntityDeclHandler(handlerArg, - declEntity->name, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); - } - + if (dtd->keepProcessing && declEntity) { + declEntity->notation = poolStoreString(&dtd->pool, enc, s, next); + if (!declEntity->notation) + return XML_ERROR_NO_MEMORY; + poolFinish(&dtd->pool); + if (unparsedEntityDeclHandler) { + *eventEndPP = s; + unparsedEntityDeclHandler(handlerArg, + declEntity->name, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + declEntity->notation); + handleDefault = XML_FALSE; + } + else if (entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + 0,0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + declEntity->notation); + handleDefault = XML_FALSE; + } } break; case XML_ROLE_GENERAL_ENTITY_NAME: { - const XML_Char *name; - if (XmlPredefinedEntityName(encoding, s, next)) { - declEntity = 0; - break; - } - name = poolStoreString(&dtd.pool, encoding, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - if (dtd.complete) { - declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY)); - if (!declEntity) - return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { - poolDiscard(&dtd.pool); - declEntity = 0; - } - else - poolFinish(&dtd.pool); - } - else { - poolDiscard(&dtd.pool); - declEntity = 0; - } + if (XmlPredefinedEntityName(enc, s, next)) { + declEntity = NULL; + break; + } + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, + sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + if (declEntity->name != name) { + poolDiscard(&dtd->pool); + declEntity = NULL; + } + else { + poolFinish(&dtd->pool); + declEntity->publicId = NULL; + declEntity->is_param = XML_FALSE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + declEntity->is_internal = !(parentParser || openInternalEntities); + if (entityDeclHandler) + handleDefault = XML_FALSE; + } + } + else { + poolDiscard(&dtd->pool); + declEntity = NULL; + } } break; case XML_ROLE_PARAM_ENTITY_NAME: - declEntity = 0; +#ifdef XML_DTD + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, + name, sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + if (declEntity->name != name) { + poolDiscard(&dtd->pool); + declEntity = NULL; + } + else { + poolFinish(&dtd->pool); + declEntity->publicId = NULL; + declEntity->is_param = XML_TRUE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + declEntity->is_internal = !(parentParser || openInternalEntities); + if (entityDeclHandler) + handleDefault = XML_FALSE; + } + } + else { + poolDiscard(&dtd->pool); + declEntity = NULL; + } +#else /* not XML_DTD */ + declEntity = NULL; +#endif /* XML_DTD */ break; case XML_ROLE_NOTATION_NAME: - declNotationPublicId = 0; - declNotationName = 0; + declNotationPublicId = NULL; + declNotationName = NULL; if (notationDeclHandler) { - declNotationName = poolStoreString(&tempPool, encoding, s, next); - if (!declNotationName) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + declNotationName = poolStoreString(&tempPool, enc, s, next); + if (!declNotationName) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_PUBLIC_ID: - if (!XmlIsPublicId(encoding, s, next, &eventPtr)) - return XML_ERROR_SYNTAX; - if (declNotationName) { - XML_Char *tem = poolStoreString(&tempPool, - encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); - if (!tem) - return XML_ERROR_NO_MEMORY; - normalizePublicId(tem); - declNotationPublicId = tem; - poolFinish(&tempPool); + if (!XmlIsPublicId(enc, s, next, eventPP)) + return XML_ERROR_PUBLICID; + if (declNotationName) { /* means notationDeclHandler != NULL */ + XML_Char *tem = poolStoreString(&tempPool, + enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!tem) + return XML_ERROR_NO_MEMORY; + normalizePublicId(tem); + declNotationPublicId = tem; + poolFinish(&tempPool); + handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_SYSTEM_ID: if (declNotationName && notationDeclHandler) { - const XML_Char *systemId - = poolStoreString(&tempPool, encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); - if (!systemId) - return XML_ERROR_NO_MEMORY; - eventPtr = eventEndPtr = s; - notationDeclHandler(handlerArg, - declNotationName, - dtd.base, - systemId, - declNotationPublicId); + const XML_Char *systemId + = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!systemId) + return XML_ERROR_NO_MEMORY; + *eventEndPP = s; + notationDeclHandler(handlerArg, + declNotationName, + curBase, + systemId, + declNotationPublicId); + handleDefault = XML_FALSE; } poolClear(&tempPool); break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: if (declNotationPublicId && notationDeclHandler) { - eventPtr = eventEndPtr = s; - notationDeclHandler(handlerArg, - declNotationName, - dtd.base, - 0, - declNotationPublicId); + *eventEndPP = s; + notationDeclHandler(handlerArg, + declNotationName, + curBase, + 0, + declNotationPublicId); + handleDefault = XML_FALSE; } poolClear(&tempPool); break; case XML_ROLE_ERROR: - eventPtr = s; switch (tok) { case XML_TOK_PARAM_ENTITY_REF: - return XML_ERROR_PARAM_ENTITY_REF; + /* PE references in internal subset are + not allowed within declarations. */ + return XML_ERROR_PARAM_ENTITY_REF; case XML_TOK_XML_DECL: - return XML_ERROR_MISPLACED_XML_PI; + return XML_ERROR_MISPLACED_XML_PI; default: - return XML_ERROR_SYNTAX; + return XML_ERROR_SYNTAX; + } +#ifdef XML_DTD + case XML_ROLE_IGNORE_SECT: + { + enum XML_Error result; + if (defaultHandler) + reportDefault(parser, enc, s, next); + handleDefault = XML_FALSE; + result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); + if (result != XML_ERROR_NONE) + return result; + else if (!next) { + processor = ignoreSectionProcessor; + return result; + } } + break; +#endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: if (prologState.level >= groupSize) { - if (groupSize) - groupConnector = realloc(groupConnector, groupSize *= 2); - else - groupConnector = malloc(groupSize = 32); - if (!groupConnector) - return XML_ERROR_NO_MEMORY; + if (groupSize) { + char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + groupConnector = temp; + if (dtd->scaffIndex) { + int *temp = (int *)REALLOC(dtd->scaffIndex, + groupSize * sizeof(int)); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + dtd->scaffIndex = temp; + } + } + else { + groupConnector = (char *)MALLOC(groupSize = 32); + if (!groupConnector) + return XML_ERROR_NO_MEMORY; + } + } + groupConnector[prologState.level] = 0; + if (dtd->in_eldecl) { + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd->scaffIndex[dtd->scaffLevel] = myindex; + dtd->scaffLevel++; + dtd->scaffold[myindex].type = XML_CTYPE_SEQ; + if (elementDeclHandler) + handleDefault = XML_FALSE; + } + break; + case XML_ROLE_GROUP_SEQUENCE: + if (groupConnector[prologState.level] == ASCII_PIPE) + return XML_ERROR_SYNTAX; + groupConnector[prologState.level] = ASCII_COMMA; + if (dtd->in_eldecl && elementDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_GROUP_CHOICE: + if (groupConnector[prologState.level] == ASCII_COMMA) + return XML_ERROR_SYNTAX; + if (dtd->in_eldecl + && !groupConnector[prologState.level] + && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + != XML_CTYPE_MIXED) + ) { + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + = XML_CTYPE_CHOICE; + if (elementDeclHandler) + handleDefault = XML_FALSE; + } + groupConnector[prologState.level] = ASCII_PIPE; + break; + case XML_ROLE_PARAM_ENTITY_REF: +#ifdef XML_DTD + case XML_ROLE_INNER_PARAM_ENTITY_REF: + dtd->hasParamEntityRefs = XML_TRUE; + if (!paramEntityParsing) + dtd->keepProcessing = dtd->standalone; + else { + const XML_Char *name; + ENTITY *entity; + name = poolStoreString(&dtd->pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); + poolDiscard(&dtd->pool); + /* first, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity handler + */ + if (prologState.documentEntity && + (dtd->standalone + ? !openInternalEntities + : !dtd->hasParamEntityRefs)) { + if (!entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (!entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } + else if (!entity) { + dtd->keepProcessing = dtd->standalone; + /* cannot report skipped entities in declarations */ + if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) { + skippedEntityHandler(handlerArg, name, 1); + handleDefault = XML_FALSE; + } + break; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + XML_Bool betweenDecl = + (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); + result = processInternalEntity(parser, entity, betweenDecl); + if (result != XML_ERROR_NONE) + return result; + handleDefault = XML_FALSE; + break; + } + if (externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) { + entity->open = XML_FALSE; + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + } + entity->open = XML_FALSE; + handleDefault = XML_FALSE; + if (!dtd->paramEntityRead) { + dtd->keepProcessing = dtd->standalone; + break; + } + } + else { + dtd->keepProcessing = dtd->standalone; + break; + } + } +#endif /* XML_DTD */ + if (!dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + break; + + /* Element declaration stuff */ + + case XML_ROLE_ELEMENT_NAME: + if (elementDeclHandler) { + declElementType = getElementType(parser, enc, s, next); + if (!declElementType) + return XML_ERROR_NO_MEMORY; + dtd->scaffLevel = 0; + dtd->scaffCount = 0; + dtd->in_eldecl = XML_TRUE; + handleDefault = XML_FALSE; + } + break; + + case XML_ROLE_CONTENT_ANY: + case XML_ROLE_CONTENT_EMPTY: + if (dtd->in_eldecl) { + if (elementDeclHandler) { + XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content)); + if (!content) + return XML_ERROR_NO_MEMORY; + content->quant = XML_CQUANT_NONE; + content->name = NULL; + content->numchildren = 0; + content->children = NULL; + content->type = ((role == XML_ROLE_CONTENT_ANY) ? + XML_CTYPE_ANY : + XML_CTYPE_EMPTY); + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, content); + handleDefault = XML_FALSE; + } + dtd->in_eldecl = XML_FALSE; } - groupConnector[prologState.level] = 0; break; - case XML_ROLE_GROUP_SEQUENCE: - if (groupConnector[prologState.level] == '|') { - eventPtr = s; - return XML_ERROR_SYNTAX; + + case XML_ROLE_CONTENT_PCDATA: + if (dtd->in_eldecl) { + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + = XML_CTYPE_MIXED; + if (elementDeclHandler) + handleDefault = XML_FALSE; } - groupConnector[prologState.level] = ','; break; - case XML_ROLE_GROUP_CHOICE: - if (groupConnector[prologState.level] == ',') { - eventPtr = s; - return XML_ERROR_SYNTAX; + + case XML_ROLE_CONTENT_ELEMENT: + quant = XML_CQUANT_NONE; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_OPT: + quant = XML_CQUANT_OPT; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_REP: + quant = XML_CQUANT_REP; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_PLUS: + quant = XML_CQUANT_PLUS; + elementContent: + if (dtd->in_eldecl) { + ELEMENT_TYPE *el; + const XML_Char *name; + int nameLen; + const char *nxt = (quant == XML_CQUANT_NONE + ? next + : next - enc->minBytesPerChar); + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd->scaffold[myindex].type = XML_CTYPE_NAME; + dtd->scaffold[myindex].quant = quant; + el = getElementType(parser, enc, s, nxt); + if (!el) + return XML_ERROR_NO_MEMORY; + name = el->name; + dtd->scaffold[myindex].name = name; + nameLen = 0; + for (; name[nameLen++]; ); + dtd->contentStringLen += nameLen; + if (elementDeclHandler) + handleDefault = XML_FALSE; } - groupConnector[prologState.level] = '|'; break; - case XML_ROLE_PARAM_ENTITY_REF: - if (!dtd.standalone - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; - dtd.complete = 0; + + case XML_ROLE_GROUP_CLOSE: + quant = XML_CQUANT_NONE; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_OPT: + quant = XML_CQUANT_OPT; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_REP: + quant = XML_CQUANT_REP; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_PLUS: + quant = XML_CQUANT_PLUS; + closeGroup: + if (dtd->in_eldecl) { + if (elementDeclHandler) + handleDefault = XML_FALSE; + dtd->scaffLevel--; + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; + if (dtd->scaffLevel == 0) { + if (!handleDefault) { + XML_Content *model = build_model(parser); + if (!model) + return XML_ERROR_NO_MEMORY; + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, model); + } + dtd->in_eldecl = XML_FALSE; + dtd->contentStringLen = 0; + } + } + break; + /* End element declaration stuff */ + + case XML_ROLE_PI: + if (!reportProcessingInstruction(parser, enc, s, next)) + return XML_ERROR_NO_MEMORY; + handleDefault = XML_FALSE; + break; + case XML_ROLE_COMMENT: + if (!reportComment(parser, enc, s, next)) + return XML_ERROR_NO_MEMORY; + handleDefault = XML_FALSE; break; case XML_ROLE_NONE: switch (tok) { - case XML_TOK_PI: - eventPtr = s; - eventEndPtr = next; - if (!reportProcessingInstruction(parser, encoding, s, next)) - return XML_ERROR_NO_MEMORY; - break; - case XML_TOK_COMMENT: - eventPtr = s; - eventEndPtr = next; - if (!reportComment(parser, encoding, s, next)) - return XML_ERROR_NO_MEMORY; - break; - } - break; - } - if (defaultHandler) { - switch (tok) { - case XML_TOK_PI: - case XML_TOK_COMMENT: case XML_TOK_BOM: - case XML_TOK_XML_DECL: - break; - default: - eventPtr = s; - eventEndPtr = next; - reportDefault(parser, encoding, s, next); + handleDefault = XML_FALSE; + break; } + break; + case XML_ROLE_DOCTYPE_NONE: + if (startDoctypeDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ENTITY_NONE: + if (dtd->keepProcessing && entityDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_NOTATION_NONE: + if (notationDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ATTLIST_NONE: + if (dtd->keepProcessing && attlistDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ELEMENT_NONE: + if (elementDeclHandler) + handleDefault = XML_FALSE; + break; + } /* end of big switch */ + + if (handleDefault && defaultHandler) + reportDefault(parser, enc, s, next); + + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + s = next; + tok = XmlPrologTok(enc, s, end, &next); } - s = next; } /* not reached */ } -static -enum XML_Error epilogProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) +static enum XML_Error PTRCALL +epilogProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) { processor = epilogProcessor; eventPtr = s; for (;;) { - const char *next; + const char *next = NULL; int tok = XmlPrologTok(encoding, s, end, &next); eventEndPtr = next; switch (tok) { - case XML_TOK_TRAILING_CR: + /* report partial linebreak - it might be the last token */ + case -XML_TOK_PROLOG_S: if (defaultHandler) { - eventEndPtr = end; - reportDefault(parser, encoding, s, end); + reportDefault(parser, encoding, s, next); + if (ps_parsing == XML_FINISHED) + return XML_ERROR_ABORTED; } - /* fall through */ + *nextPtr = next; + return XML_ERROR_NONE; case XML_TOK_NONE: - if (nextPtr) - *nextPtr = end; + *nextPtr = s; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: if (defaultHandler) - reportDefault(parser, encoding, s, next); + reportDefault(parser, encoding, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, encoding, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (!reportComment(parser, encoding, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_INVALID: eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (!ps_finalBuffer) { + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + if (!ps_finalBuffer) { + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; default: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } eventPtr = s = next; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: ; + } + } +} + +static enum XML_Error +processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl) +{ + const char *textStart, *textEnd; + const char *next; + enum XML_Error result; + OPEN_INTERNAL_ENTITY *openEntity; + + if (freeInternalEntities) { + openEntity = freeInternalEntities; + freeInternalEntities = openEntity->next; + } + else { + openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY)); + if (!openEntity) + return XML_ERROR_NO_MEMORY; + } + entity->open = XML_TRUE; + entity->processed = 0; + openEntity->next = openInternalEntities; + openInternalEntities = openEntity; + openEntity->entity = entity; + openEntity->startTagLevel = tagLevel; + openEntity->betweenDecl = betweenDecl; + openEntity->internalEventPtr = NULL; + openEntity->internalEventEndPtr = NULL; + textStart = (char *)entity->textPtr; + textEnd = (char *)(entity->textPtr + entity->textLen); + +#ifdef XML_DTD + if (entity->is_param) { + int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, internalEncoding, textStart, textEnd, tok, + next, &next, XML_FALSE); + } + else +#endif /* XML_DTD */ + result = doContent(parser, tagLevel, internalEncoding, textStart, + textEnd, &next, XML_FALSE); + + if (result == XML_ERROR_NONE) { + if (textEnd != next && ps_parsing == XML_SUSPENDED) { + entity->processed = (int)(next - textStart); + processor = internalEntityProcessor; + } + else { + entity->open = XML_FALSE; + openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = freeInternalEntities; + freeInternalEntities = openEntity; + } + } + return result; +} + +static enum XML_Error PTRCALL +internalEntityProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + ENTITY *entity; + const char *textStart, *textEnd; + const char *next; + enum XML_Error result; + OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities; + if (!openEntity) + return XML_ERROR_UNEXPECTED_STATE; + + entity = openEntity->entity; + textStart = ((char *)entity->textPtr) + entity->processed; + textEnd = (char *)(entity->textPtr + entity->textLen); + +#ifdef XML_DTD + if (entity->is_param) { + int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, internalEncoding, textStart, textEnd, tok, + next, &next, XML_FALSE); + } + else +#endif /* XML_DTD */ + result = doContent(parser, openEntity->startTagLevel, internalEncoding, + textStart, textEnd, &next, XML_FALSE); + + if (result != XML_ERROR_NONE) + return result; + else if (textEnd != next && ps_parsing == XML_SUSPENDED) { + entity->processed = (int)(next - (char *)entity->textPtr); + return result; + } + else { + entity->open = XML_FALSE; + openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = freeInternalEntities; + freeInternalEntities = openEntity; + } + +#ifdef XML_DTD + if (entity->is_param) { + int tok; + processor = prologProcessor; + tok = XmlPrologTok(encoding, s, end, &next); + return doProlog(parser, encoding, s, end, tok, next, nextPtr, + (XML_Bool)!ps_finalBuffer); } + else +#endif /* XML_DTD */ + { + processor = contentProcessor; + /* see externalEntityContentProcessor vs contentProcessor */ + return doContent(parser, parentParser ? 1 : 0, encoding, s, end, + nextPtr, (XML_Bool)!ps_finalBuffer); + } +} + +static enum XML_Error PTRCALL +errorProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + return errorCode; } static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) +storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, + STRING_POOL *pool) { - enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); + enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, + end, pool); if (result) return result; if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) @@ -2348,11 +4935,11 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, } static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) +appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, + STRING_POOL *pool) { - const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); + DTD * const dtd = _dtd; /* save one level of indirection */ for (;;) { const char *next; int tok = XmlAttributeValueTok(enc, ptr, end, &next); @@ -2361,42 +4948,41 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, return XML_ERROR_NONE; case XML_TOK_INVALID: if (enc == encoding) - eventPtr = next; + eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (enc == encoding) - eventPtr = ptr; + eventPtr = ptr; return XML_ERROR_INVALID_TOKEN; case XML_TOK_CHAR_REF: { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, ptr); - if (n < 0) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - if (!isCdata - && n == 0x20 /* space */ - && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) - break; - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (!poolAppendChar(pool, buf[i])) - return XML_ERROR_NO_MEMORY; - } + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, ptr); + if (n < 0) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; + } + if (!isCdata + && n == 0x20 /* space */ + && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + break; + n = XmlEncode(n, (ICHAR *)buf); + if (!n) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; + } + for (i = 0; i < n; i++) { + if (!poolAppendChar(pool, buf[i])) + return XML_ERROR_NO_MEMORY; + } } break; case XML_TOK_DATA_CHARS: if (!poolAppend(pool, enc, ptr, next)) - return XML_ERROR_NO_MEMORY; - break; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: next = ptr + enc->minBytesPerChar; @@ -2404,143 +4990,267 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) - break; + break; if (!poolAppendChar(pool, 0x20)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_ENTITY_REF: { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = XmlPredefinedEntityName(enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (!poolAppendChar(pool, ch)) - return XML_ERROR_NO_MEMORY; - break; - } - name = poolStoreString(&temp2Pool, enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) - return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); - poolDiscard(&temp2Pool); - if (!entity) { - if (dtd.complete) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_UNDEFINED_ENTITY; - } - } - else if (entity->open) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_RECURSIVE_ENTITY_REF; - } - else if (entity->notation) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BINARY_ENTITY_REF; - } - else if (!entity->textPtr) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; - } - else { - enum XML_Error result; - const XML_Char *textEnd = entity->textPtr + entity->textLen; - entity->open = 1; - result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool); - entity->open = 0; - if (result) - return result; - } + const XML_Char *name; + ENTITY *entity; + char checkEntityDecl; + XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (ch) { + if (!poolAppendChar(pool, ch)) + return XML_ERROR_NO_MEMORY; + break; + } + name = poolStoreString(&temp2Pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); + poolDiscard(&temp2Pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal. + */ + if (pool == &dtd->pool) /* are we called from prolog? */ + checkEntityDecl = +#ifdef XML_DTD + prologState.documentEntity && +#endif /* XML_DTD */ + (dtd->standalone + ? !openInternalEntities + : !dtd->hasParamEntityRefs); + else /* if (pool == &tempPool): we are called from content */ + checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone; + if (checkEntityDecl) { + if (!entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (!entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } + else if (!entity) { + /* Cannot report skipped entity here - see comments on + skippedEntityHandler. + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, name, 0); + */ + /* Cannot call the default handler because this would be + out of sync with the call to the startElementHandler. + if ((pool == &tempPool) && defaultHandler) + reportDefault(parser, enc, ptr, next); + */ + break; + } + if (entity->open) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_RECURSIVE_ENTITY_REF; + } + if (entity->notation) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_BINARY_ENTITY_REF; + } + if (!entity->textPtr) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + } + else { + enum XML_Error result; + const XML_Char *textEnd = entity->textPtr + entity->textLen; + entity->open = XML_TRUE; + result = appendAttributeValue(parser, internalEncoding, isCdata, + (char *)entity->textPtr, + (char *)textEnd, pool); + entity->open = XML_FALSE; + if (result) + return result; + } } break; default: - abort(); + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_UNEXPECTED_STATE; } ptr = next; } /* not reached */ } -static -enum XML_Error storeEntityValue(XML_Parser parser, - const char *entityTextPtr, - const char *entityTextEnd) +static enum XML_Error +storeEntityValue(XML_Parser parser, + const ENCODING *enc, + const char *entityTextPtr, + const char *entityTextEnd) { - STRING_POOL *pool = &(dtd.pool); - entityTextPtr += encoding->minBytesPerChar; - entityTextEnd -= encoding->minBytesPerChar; + DTD * const dtd = _dtd; /* save one level of indirection */ + STRING_POOL *pool = &(dtd->entityValuePool); + enum XML_Error result = XML_ERROR_NONE; +#ifdef XML_DTD + int oldInEntityValue = prologState.inEntityValue; + prologState.inEntityValue = 1; +#endif /* XML_DTD */ + /* never return Null for the value argument in EntityDeclHandler, + since this would indicate an external entity; therefore we + have to make sure that entityValuePool.start is not null */ + if (!pool->blocks) { + if (!poolGrow(pool)) + return XML_ERROR_NO_MEMORY; + } + for (;;) { const char *next; - int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next); + int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); switch (tok) { case XML_TOK_PARAM_ENTITY_REF: +#ifdef XML_DTD + if (isParamEntity || enc != encoding) { + const XML_Char *name; + ENTITY *entity; + name = poolStoreString(&tempPool, enc, + entityTextPtr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); + poolDiscard(&tempPool); + if (!entity) { + /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ + /* cannot report skipped entity here - see comments on + skippedEntityHandler + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, name, 0); + */ + dtd->keepProcessing = dtd->standalone; + goto endEntityValue; + } + if (entity->open) { + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_RECURSIVE_ENTITY_REF; + goto endEntityValue; + } + if (entity->systemId) { + if (externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) { + entity->open = XML_FALSE; + result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; + goto endEntityValue; + } + entity->open = XML_FALSE; + if (!dtd->paramEntityRead) + dtd->keepProcessing = dtd->standalone; + } + else + dtd->keepProcessing = dtd->standalone; + } + else { + entity->open = XML_TRUE; + result = storeEntityValue(parser, + internalEncoding, + (char *)entity->textPtr, + (char *)(entity->textPtr + + entity->textLen)); + entity->open = XML_FALSE; + if (result) + goto endEntityValue; + } + break; + } +#endif /* XML_DTD */ + /* In the internal subset, PE references are not legal + within markup declarations, e.g entity values in this case. */ eventPtr = entityTextPtr; - return XML_ERROR_SYNTAX; + result = XML_ERROR_PARAM_ENTITY_REF; + goto endEntityValue; case XML_TOK_NONE: - if (declEntity) { - declEntity->textPtr = pool->start; - declEntity->textLen = pool->ptr - pool->start; - poolFinish(pool); - } - else - poolDiscard(pool); - return XML_ERROR_NONE; + result = XML_ERROR_NONE; + goto endEntityValue; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, encoding, entityTextPtr, next)) - return XML_ERROR_NO_MEMORY; + if (!poolAppend(pool, enc, entityTextPtr, next)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } break; case XML_TOK_TRAILING_CR: - next = entityTextPtr + encoding->minBytesPerChar; + next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: - if (pool->end == pool->ptr && !poolGrow(pool)) - return XML_ERROR_NO_MEMORY; + if (pool->end == pool->ptr && !poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } *(pool->ptr)++ = 0xA; break; case XML_TOK_CHAR_REF: { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(encoding, entityTextPtr); - if (n < 0) { - eventPtr = entityTextPtr; - return XML_ERROR_BAD_CHAR_REF; - } - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - eventPtr = entityTextPtr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (pool->end == pool->ptr && !poolGrow(pool)) - return XML_ERROR_NO_MEMORY; - *(pool->ptr)++ = buf[i]; - } + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, entityTextPtr); + if (n < 0) { + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_BAD_CHAR_REF; + goto endEntityValue; + } + n = XmlEncode(n, (ICHAR *)buf); + if (!n) { + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_BAD_CHAR_REF; + goto endEntityValue; + } + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && !poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } + *(pool->ptr)++ = buf[i]; + } } break; case XML_TOK_PARTIAL: - eventPtr = entityTextPtr; - return XML_ERROR_INVALID_TOKEN; + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_INVALID_TOKEN; + goto endEntityValue; case XML_TOK_INVALID: - eventPtr = next; - return XML_ERROR_INVALID_TOKEN; + if (enc == encoding) + eventPtr = next; + result = XML_ERROR_INVALID_TOKEN; + goto endEntityValue; default: - abort(); + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_UNEXPECTED_STATE; + goto endEntityValue; } entityTextPtr = next; } - /* not reached */ +endEntityValue: +#ifdef XML_DTD + prologState.inEntityValue = oldInEntityValue; +#endif /* XML_DTD */ + return result; } -static void +static void FASTCALL normalizeLines(XML_Char *s) { XML_Char *p; @@ -2564,7 +5274,8 @@ normalizeLines(XML_Char *s) } static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) +reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end) { const XML_Char *target; XML_Char *data; @@ -2581,8 +5292,8 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char * return 0; poolFinish(&tempPool); data = poolStoreString(&tempPool, enc, - XmlSkipS(enc, tem), - end - enc->minBytesPerChar*2); + XmlSkipS(enc, tem), + end - enc->minBytesPerChar*2); if (!data) return 0; normalizeLines(data); @@ -2592,7 +5303,8 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char * } static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) +reportComment(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end) { XML_Char *data; if (!commentHandler) { @@ -2602,8 +5314,8 @@ reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const c } data = poolStoreString(&tempPool, enc, - start + enc->minBytesPerChar * 4, - end - enc->minBytesPerChar * 3); + start + enc->minBytesPerChar * 4, + end - enc->minBytesPerChar * 3); if (!data) return 0; normalizeLines(data); @@ -2613,7 +5325,8 @@ reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const c } static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) +reportDefault(XML_Parser parser, const ENCODING *enc, + const char *s, const char *end) { if (MUST_CONVERT(enc, s)) { const char **eventPP; @@ -2630,62 +5343,82 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char ICHAR *dataPtr = (ICHAR *)dataBuf; XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; - defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); + defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); *eventPP = s; } while (s != end); } else - defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); + defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); } static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value) +defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, + XML_Bool isId, const XML_Char *value, XML_Parser parser) { DEFAULT_ATTRIBUTE *att; + if (value || isId) { + /* The handling of default attributes gets messed up if we have + a default which duplicates a non-default. */ + int i; + for (i = 0; i < type->nDefaultAtts; i++) + if (attId == type->defaultAtts[i].id) + return 1; + if (isId && !type->idAtt && !attId->xmlns) + type->idAtt = attId; + } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); + type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts + * sizeof(DEFAULT_ATTRIBUTE)); + if (!type->defaultAtts) + return 0; } else { - type->allocDefaultAtts *= 2; - type->defaultAtts = realloc(type->defaultAtts, - type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); + DEFAULT_ATTRIBUTE *temp; + int count = type->allocDefaultAtts * 2; + temp = (DEFAULT_ATTRIBUTE *) + REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE))); + if (temp == NULL) + return 0; + type->allocDefaultAtts = count; + type->defaultAtts = temp; } - if (!type->defaultAtts) - return 0; } att = type->defaultAtts + type->nDefaultAtts; att->id = attId; att->value = value; att->isCdata = isCdata; if (!isCdata) - attId->maybeTokenized = 1; + attId->maybeTokenized = XML_TRUE; type->nDefaultAtts += 1; return 1; } -static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) +static int +setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { + DTD * const dtd = _dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { - if (*name == XML_T(':')) { + if (*name == XML_T(ASCII_COLON)) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { - if (!poolAppendChar(&dtd.pool, *s)) - return 0; + if (!poolAppendChar(&dtd->pool, *s)) + return 0; } - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + return 0; + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), + sizeof(PREFIX)); if (!prefix) - return 0; - if (prefix->name == poolStart(&dtd.pool)) - poolFinish(&dtd.pool); + return 0; + if (prefix->name == poolStart(&dtd->pool)) + poolFinish(&dtd->pool); else - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); elementType->prefix = prefix; } @@ -2694,84 +5427,90 @@ static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) } static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) +getAttributeId(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end) { + DTD * const dtd = _dtd; /* save one level of indirection */ ATTRIBUTE_ID *id; const XML_Char *name; - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - name = poolStoreString(&dtd.pool, enc, start, end); + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + name = poolStoreString(&dtd->pool, enc, start, end); if (!name) - return 0; + return NULL; + /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; - id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID)); + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); if (!id) - return 0; + return NULL; if (id->name != name) - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); else { - poolFinish(&dtd.pool); + poolFinish(&dtd->pool); if (!ns) ; - else if (name[0] == 'x' - && name[1] == 'm' - && name[2] == 'l' - && name[3] == 'n' - && name[4] == 's' - && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { - if (name[5] == '\0') - id->prefix = &dtd.defaultPrefix; + else if (name[0] == XML_T(ASCII_x) + && name[1] == XML_T(ASCII_m) + && name[2] == XML_T(ASCII_l) + && name[3] == XML_T(ASCII_n) + && name[4] == XML_T(ASCII_s) + && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { + if (name[5] == XML_T('\0')) + id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX)); - id->xmlns = 1; + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX)); + id->xmlns = XML_TRUE; } else { int i; for (i = 0; name[i]; i++) { - if (name[i] == XML_T(':')) { - int j; - for (j = 0; j < i; j++) { - if (!poolAppendChar(&dtd.pool, name[j])) - return 0; - } - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); - if (id->prefix->name == poolStart(&dtd.pool)) - poolFinish(&dtd.pool); - else - poolDiscard(&dtd.pool); - break; - } + /* attributes without prefix are *not* in the default namespace */ + if (name[i] == XML_T(ASCII_COLON)) { + int j; + for (j = 0; j < i; j++) { + if (!poolAppendChar(&dtd->pool, name[j])) + return NULL; + } + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), + sizeof(PREFIX)); + if (id->prefix->name == poolStart(&dtd->pool)) + poolFinish(&dtd->pool); + else + poolDiscard(&dtd->pool); + break; + } } } } return id; } -#define CONTEXT_SEP XML_T('\f') +#define CONTEXT_SEP XML_T(ASCII_FF) -static -const XML_Char *getContext(XML_Parser parser) +static const XML_Char * +getContext(XML_Parser parser) { + DTD * const dtd = _dtd; /* save one level of indirection */ HASH_TABLE_ITER iter; - int needSep = 0; + XML_Bool needSep = XML_FALSE; - if (dtd.defaultPrefix.binding) { + if (dtd->defaultPrefix.binding) { int i; int len; - if (!poolAppendChar(&tempPool, XML_T('='))) - return 0; - len = dtd.defaultPrefix.binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) + return NULL; + len = dtd->defaultPrefix.binding->uriLen; + if (namespaceSeparator) len--; for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i])) - return 0; - needSep = 1; + if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) + return NULL; + needSep = XML_TRUE; } - hashTableIterInit(&iter, &(dtd.prefixes)); + hashTableIterInit(&iter, &(dtd->prefixes)); for (;;) { int i; int len; @@ -2782,23 +5521,23 @@ const XML_Char *getContext(XML_Parser parser) if (!prefix->binding) continue; if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) - return 0; + return NULL; for (s = prefix->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) - return 0; - if (!poolAppendChar(&tempPool, XML_T('='))) - return 0; + return NULL; + if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) + return NULL; len = prefix->binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (namespaceSeparator) len--; for (i = 0; i < len; i++) if (!poolAppendChar(&tempPool, prefix->binding->uri[i])) - return 0; - needSep = 1; + return NULL; + needSep = XML_TRUE; } - hashTableIterInit(&iter, &(dtd.generalEntities)); + hashTableIterInit(&iter, &(dtd->generalEntities)); for (;;) { const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); @@ -2807,75 +5546,81 @@ const XML_Char *getContext(XML_Parser parser) if (!e->open) continue; if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) - return 0; + return NULL; for (s = e->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) return 0; - needSep = 1; + needSep = XML_TRUE; } if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; + return NULL; return tempPool.start; } -static -int setContext(XML_Parser parser, const XML_Char *context) +static XML_Bool +setContext(XML_Parser parser, const XML_Char *context) { + DTD * const dtd = _dtd; /* save one level of indirection */ const XML_Char *s = context; while (*context != XML_T('\0')) { if (*s == CONTEXT_SEP || *s == XML_T('\0')) { ENTITY *e; if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0); + return XML_FALSE; + e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0); if (e) - e->open = 1; + e->open = XML_TRUE; if (*s != XML_T('\0')) - s++; + s++; context = s; poolDiscard(&tempPool); } - else if (*s == '=') { + else if (*s == XML_T(ASCII_EQUALS)) { PREFIX *prefix; if (poolLength(&tempPool) == 0) - prefix = &dtd.defaultPrefix; + prefix = &dtd->defaultPrefix; else { - if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX)); - if (!prefix) - return 0; - if (prefix->name == poolStart(&tempPool)) - poolFinish(&tempPool); - else - poolDiscard(&tempPool); + if (!poolAppendChar(&tempPool, XML_T('\0'))) + return XML_FALSE; + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool), + sizeof(PREFIX)); + if (!prefix) + return XML_FALSE; + if (prefix->name == poolStart(&tempPool)) { + prefix->name = poolCopyString(&dtd->pool, prefix->name); + if (!prefix->name) + return XML_FALSE; + } + poolDiscard(&tempPool); } - for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) + for (context = s + 1; + *context != CONTEXT_SEP && *context != XML_T('\0'); + context++) if (!poolAppendChar(&tempPool, *context)) - return 0; + return XML_FALSE; if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings)) - return 0; + return XML_FALSE; + if (addBinding(parser, prefix, NULL, poolStart(&tempPool), + &inheritedBindings) != XML_ERROR_NONE) + return XML_FALSE; poolDiscard(&tempPool); if (*context != XML_T('\0')) - ++context; + ++context; s = context; } else { if (!poolAppendChar(&tempPool, *s)) - return 0; + return XML_FALSE; s++; } } - return 1; + return XML_TRUE; } - -static -void normalizePublicId(XML_Char *publicId) +static void FASTCALL +normalizePublicId(XML_Char *publicId) { XML_Char *p = publicId; XML_Char *s; @@ -2885,7 +5630,7 @@ void normalizePublicId(XML_Char *publicId) case 0xD: case 0xA: if (p != publicId && p[-1] != 0x20) - *p++ = 0x20; + *p++ = 0x20; break; default: *p++ = *s; @@ -2896,22 +5641,83 @@ void normalizePublicId(XML_Char *publicId) *p = XML_T('\0'); } -static int dtdInit(DTD *p) +static DTD * +dtdCreate(const XML_Memory_Handling_Suite *ms) { - poolInit(&(p->pool)); - hashTableInit(&(p->generalEntities)); - hashTableInit(&(p->elementTypes)); - hashTableInit(&(p->attributeIds)); - hashTableInit(&(p->prefixes)); - p->complete = 1; - p->standalone = 0; - p->base = 0; - p->defaultPrefix.name = 0; - p->defaultPrefix.binding = 0; - return 1; + DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD)); + if (p == NULL) + return p; + poolInit(&(p->pool), ms); + poolInit(&(p->entityValuePool), ms); + hashTableInit(&(p->generalEntities), ms); + hashTableInit(&(p->elementTypes), ms); + hashTableInit(&(p->attributeIds), ms); + hashTableInit(&(p->prefixes), ms); +#ifdef XML_DTD + p->paramEntityRead = XML_FALSE; + hashTableInit(&(p->paramEntities), ms); +#endif /* XML_DTD */ + p->defaultPrefix.name = NULL; + p->defaultPrefix.binding = NULL; + + p->in_eldecl = XML_FALSE; + p->scaffIndex = NULL; + p->scaffold = NULL; + p->scaffLevel = 0; + p->scaffSize = 0; + p->scaffCount = 0; + p->contentStringLen = 0; + + p->keepProcessing = XML_TRUE; + p->hasParamEntityRefs = XML_FALSE; + p->standalone = XML_FALSE; + return p; +} + +static void +dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) +{ + HASH_TABLE_ITER iter; + hashTableIterInit(&iter, &(p->elementTypes)); + for (;;) { + ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); + if (!e) + break; + if (e->allocDefaultAtts != 0) + ms->free_fcn(e->defaultAtts); + } + hashTableClear(&(p->generalEntities)); +#ifdef XML_DTD + p->paramEntityRead = XML_FALSE; + hashTableClear(&(p->paramEntities)); +#endif /* XML_DTD */ + hashTableClear(&(p->elementTypes)); + hashTableClear(&(p->attributeIds)); + hashTableClear(&(p->prefixes)); + poolClear(&(p->pool)); + poolClear(&(p->entityValuePool)); + p->defaultPrefix.name = NULL; + p->defaultPrefix.binding = NULL; + + p->in_eldecl = XML_FALSE; + + ms->free_fcn(p->scaffIndex); + p->scaffIndex = NULL; + ms->free_fcn(p->scaffold); + p->scaffold = NULL; + + p->scaffLevel = 0; + p->scaffSize = 0; + p->scaffCount = 0; + p->contentStringLen = 0; + + p->keepProcessing = XML_TRUE; + p->hasParamEntityRefs = XML_FALSE; + p->standalone = XML_FALSE; } -static void dtdDestroy(DTD *p) +static void +dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); @@ -2920,29 +5726,32 @@ static void dtdDestroy(DTD *p) if (!e) break; if (e->allocDefaultAtts != 0) - free(e->defaultAtts); + ms->free_fcn(e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); +#ifdef XML_DTD + hashTableDestroy(&(p->paramEntities)); +#endif /* XML_DTD */ hashTableDestroy(&(p->elementTypes)); hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); + poolDestroy(&(p->entityValuePool)); + if (isDocEntity) { + ms->free_fcn(p->scaffIndex); + ms->free_fcn(p->scaffold); + } + ms->free_fcn(p); } -/* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise. -The new DTD has already been initialized. */ - -static int dtdCopy(DTD *newDtd, const DTD *oldDtd) +/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. + The new DTD has already been initialized. +*/ +static int +dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; - if (oldDtd->base) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base); - if (!tem) - return 0; - newDtd->base = tem; - } - /* Copy the prefix table. */ hashTableIterInit(&iter, &(oldDtd->prefixes)); @@ -2954,7 +5763,7 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) name = poolCopyString(&(newDtd->pool), oldP->name); if (!name) return 0; - if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX))) + if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } @@ -2976,16 +5785,18 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) if (!name) return 0; ++name; - newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); + newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, + sizeof(ATTRIBUTE_ID)); if (!newA) return 0; newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { newA->xmlns = oldA->xmlns; if (oldA->prefix == &oldDtd->defaultPrefix) - newA->prefix = &newDtd->defaultPrefix; + newA->prefix = &newDtd->defaultPrefix; else - newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0); + newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), + oldA->prefix->name, 0); } } @@ -3003,33 +5814,82 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) name = poolCopyString(&(newDtd->pool), oldE->name); if (!name) return 0; - newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); + newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, + sizeof(ELEMENT_TYPE)); if (!newE) return 0; if (oldE->nDefaultAtts) { - newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); - if (!newE->defaultAtts) - return 0; + newE->defaultAtts = (DEFAULT_ATTRIBUTE *) + ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (!newE->defaultAtts) { + ms->free_fcn(newE); + return 0; + } } + if (oldE->idAtt) + newE->idAtt = (ATTRIBUTE_ID *) + lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) - newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0); + newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), + oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { - newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + newE->defaultAtts[i].id = (ATTRIBUTE_ID *) + lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { - newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); - if (!newE->defaultAtts[i].value) - return 0; + newE->defaultAtts[i].value + = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); + if (!newE->defaultAtts[i].value) + return 0; } else - newE->defaultAtts[i].value = 0; + newE->defaultAtts[i].value = NULL; } } - /* Copy the entity table. */ + /* Copy the entity tables. */ + if (!copyEntityTable(oldParser, + &(newDtd->generalEntities), + &(newDtd->pool), + &(oldDtd->generalEntities))) + return 0; + +#ifdef XML_DTD + if (!copyEntityTable(oldParser, + &(newDtd->paramEntities), + &(newDtd->pool), + &(oldDtd->paramEntities))) + return 0; + newDtd->paramEntityRead = oldDtd->paramEntityRead; +#endif /* XML_DTD */ + + newDtd->keepProcessing = oldDtd->keepProcessing; + newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; + newDtd->standalone = oldDtd->standalone; + + /* Don't want deep copying for scaffolding */ + newDtd->in_eldecl = oldDtd->in_eldecl; + newDtd->scaffold = oldDtd->scaffold; + newDtd->contentStringLen = oldDtd->contentStringLen; + newDtd->scaffSize = oldDtd->scaffSize; + newDtd->scaffLevel = oldDtd->scaffLevel; + newDtd->scaffIndex = oldDtd->scaffIndex; + + return 1; +} /* End dtdCopy */ + +static int +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *newTable, + STRING_POOL *newPool, + const HASH_TABLE *oldTable) +{ + HASH_TABLE_ITER iter; + const XML_Char *cachedOldBase = NULL; + const XML_Char *cachedNewBase = NULL; - hashTableIterInit(&iter, &(oldDtd->generalEntities)); + hashTableIterInit(&iter, oldTable); for (;;) { ENTITY *newE; @@ -3037,58 +5897,215 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); if (!oldE) break; - name = poolCopyString(&(newDtd->pool), oldE->name); + name = poolCopyString(newPool, oldE->name); if (!name) return 0; - newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY)); + newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); if (!newE) return 0; if (oldE->systemId) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId); + const XML_Char *tem = poolCopyString(newPool, oldE->systemId); if (!tem) - return 0; + return 0; newE->systemId = tem; if (oldE->base) { - if (oldE->base == oldDtd->base) - newE->base = newDtd->base; - tem = poolCopyString(&(newDtd->pool), oldE->base); - if (!tem) - return 0; - newE->base = tem; + if (oldE->base == cachedOldBase) + newE->base = cachedNewBase; + else { + cachedOldBase = oldE->base; + tem = poolCopyString(newPool, cachedOldBase); + if (!tem) + return 0; + cachedNewBase = newE->base = tem; + } + } + if (oldE->publicId) { + tem = poolCopyString(newPool, oldE->publicId); + if (!tem) + return 0; + newE->publicId = tem; } } else { - const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen); + const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, + oldE->textLen); if (!tem) - return 0; + return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation); + const XML_Char *tem = poolCopyString(newPool, oldE->notation); if (!tem) - return 0; + return 0; newE->notation = tem; } + newE->is_param = oldE->is_param; + newE->is_internal = oldE->is_internal; } - - newDtd->complete = oldDtd->complete; - newDtd->standalone = oldDtd->standalone; return 1; } -static -void poolInit(STRING_POOL *pool) +#define INIT_POWER 6 + +static XML_Bool FASTCALL +keyeq(KEY s1, KEY s2) +{ + for (; *s1 == *s2; s1++, s2++) + if (*s1 == 0) + return XML_TRUE; + return XML_FALSE; +} + +static unsigned long FASTCALL +hash(XML_Parser parser, KEY s) +{ + unsigned long h = hash_secret_salt; + while (*s) + h = CHAR_HASH(h, *s++); + return h; +} + +static NAMED * +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) +{ + size_t i; + if (table->size == 0) { + size_t tsize; + if (!createSize) + return NULL; + table->power = INIT_POWER; + /* table->size is a power of 2 */ + table->size = (size_t)1 << INIT_POWER; + tsize = table->size * sizeof(NAMED *); + table->v = (NAMED **)table->mem->malloc_fcn(tsize); + if (!table->v) { + table->size = 0; + return NULL; + } + memset(table->v, 0, tsize); + i = hash(parser, name) & ((unsigned long)table->size - 1); + } + else { + unsigned long h = hash(parser, name); + unsigned long mask = (unsigned long)table->size - 1; + unsigned char step = 0; + i = h & mask; + while (table->v[i]) { + if (keyeq(name, table->v[i]->name)) + return table->v[i]; + if (!step) + step = PROBE_STEP(h, mask, table->power); + i < step ? (i += table->size - step) : (i -= step); + } + if (!createSize) + return NULL; + + /* check for overflow (table is half full) */ + if (table->used >> (table->power - 1)) { + unsigned char newPower = table->power + 1; + size_t newSize = (size_t)1 << newPower; + unsigned long newMask = (unsigned long)newSize - 1; + size_t tsize = newSize * sizeof(NAMED *); + NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); + if (!newV) + return NULL; + memset(newV, 0, tsize); + for (i = 0; i < table->size; i++) + if (table->v[i]) { + unsigned long newHash = hash(parser, table->v[i]->name); + size_t j = newHash & newMask; + step = 0; + while (newV[j]) { + if (!step) + step = PROBE_STEP(newHash, newMask, newPower); + j < step ? (j += newSize - step) : (j -= step); + } + newV[j] = table->v[i]; + } + table->mem->free_fcn(table->v); + table->v = newV; + table->power = newPower; + table->size = newSize; + i = h & newMask; + step = 0; + while (table->v[i]) { + if (!step) + step = PROBE_STEP(h, newMask, newPower); + i < step ? (i += newSize - step) : (i -= step); + } + } + } + table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize); + if (!table->v[i]) + return NULL; + memset(table->v[i], 0, createSize); + table->v[i]->name = name; + (table->used)++; + return table->v[i]; +} + +static void FASTCALL +hashTableClear(HASH_TABLE *table) { - pool->blocks = 0; - pool->freeBlocks = 0; - pool->start = 0; - pool->ptr = 0; - pool->end = 0; + size_t i; + for (i = 0; i < table->size; i++) { + table->mem->free_fcn(table->v[i]); + table->v[i] = NULL; + } + table->used = 0; +} + +static void FASTCALL +hashTableDestroy(HASH_TABLE *table) +{ + size_t i; + for (i = 0; i < table->size; i++) + table->mem->free_fcn(table->v[i]); + table->mem->free_fcn(table->v); +} + +static void FASTCALL +hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) +{ + p->power = 0; + p->size = 0; + p->used = 0; + p->v = NULL; + p->mem = ms; +} + +static void FASTCALL +hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) +{ + iter->p = table->v; + iter->end = iter->p + table->size; +} + +static NAMED * FASTCALL +hashTableIterNext(HASH_TABLE_ITER *iter) +{ + while (iter->p != iter->end) { + NAMED *tem = *(iter->p)++; + if (tem) + return tem; + } + return NULL; +} + +static void FASTCALL +poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) +{ + pool->blocks = NULL; + pool->freeBlocks = NULL; + pool->start = NULL; + pool->ptr = NULL; + pool->end = NULL; + pool->mem = ms; } -static -void poolClear(STRING_POOL *pool) +static void FASTCALL +poolClear(STRING_POOL *pool) { if (!pool->freeBlocks) pool->freeBlocks = pool->blocks; @@ -3101,117 +6118,129 @@ void poolClear(STRING_POOL *pool) p = tem; } } - pool->blocks = 0; - pool->start = 0; - pool->ptr = 0; - pool->end = 0; + pool->blocks = NULL; + pool->start = NULL; + pool->ptr = NULL; + pool->end = NULL; } -static -void poolDestroy(STRING_POOL *pool) +static void FASTCALL +poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; - free(p); + pool->mem->free_fcn(p); p = tem; } - pool->blocks = 0; p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; - free(p); + pool->mem->free_fcn(p); p = tem; } - pool->freeBlocks = 0; - pool->ptr = 0; - pool->start = 0; - pool->end = 0; } -static -XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) +static XML_Char * +poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) { if (!pool->ptr && !poolGrow(pool)) - return 0; + return NULL; for (;;) { XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); if (ptr == end) break; if (!poolGrow(pool)) - return 0; + return NULL; } return pool->start; } -static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s) +static const XML_Char * FASTCALL +poolCopyString(STRING_POOL *pool, const XML_Char *s) { do { if (!poolAppendChar(pool, *s)) - return 0; + return NULL; } while (*s++); s = pool->start; poolFinish(pool); return s; } -static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) +static const XML_Char * +poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { if (!pool->ptr && !poolGrow(pool)) - return 0; + return NULL; for (; n > 0; --n, s++) { if (!poolAppendChar(pool, *s)) - return 0; - + return NULL; } s = pool->start; poolFinish(pool); return s; } -static -XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) +static const XML_Char * FASTCALL +poolAppendString(STRING_POOL *pool, const XML_Char *s) +{ + while (*s) { + if (!poolAppendChar(pool, *s)) + return NULL; + s++; + } + return pool->start; +} + +static XML_Char * +poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) { if (!poolAppend(pool, enc, ptr, end)) - return 0; + return NULL; if (pool->ptr == pool->end && !poolGrow(pool)) - return 0; + return NULL; *(pool->ptr)++ = 0; return pool->start; } -static -int poolGrow(STRING_POOL *pool) +static XML_Bool FASTCALL +poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { if (pool->start == 0) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; - pool->blocks->next = 0; + pool->blocks->next = NULL; pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; pool->ptr = pool->start; - return 1; + return XML_TRUE; } if (pool->end - pool->start < pool->freeBlocks->size) { BLOCK *tem = pool->freeBlocks->next; pool->freeBlocks->next = pool->blocks; pool->blocks = pool->freeBlocks; pool->freeBlocks = tem; - memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char)); + memcpy(pool->blocks->s, pool->start, + (pool->end - pool->start) * sizeof(XML_Char)); pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; - return 1; + return XML_TRUE; } } if (pool->blocks && pool->start == pool->blocks->s) { - int blockSize = (pool->end - pool->start)*2; - pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); - if (!pool->blocks) - return 0; + int blockSize = (int)(pool->end - pool->start)*2; + BLOCK *temp = (BLOCK *) + pool->mem->realloc_fcn(pool->blocks, + (offsetof(BLOCK, s) + + blockSize * sizeof(XML_Char))); + if (temp == NULL) + return XML_FALSE; + pool->blocks = temp; pool->blocks->size = blockSize; pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; @@ -3219,21 +6248,156 @@ int poolGrow(STRING_POOL *pool) } else { BLOCK *tem; - int blockSize = pool->end - pool->start; + int blockSize = (int)(pool->end - pool->start); if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; else blockSize *= 2; - tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); + tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s) + + blockSize * sizeof(XML_Char)); if (!tem) - return 0; + return XML_FALSE; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; - memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); + if (pool->ptr != pool->start) + memcpy(tem->s, pool->start, + (pool->ptr - pool->start) * sizeof(XML_Char)); pool->ptr = tem->s + (pool->ptr - pool->start); pool->start = tem->s; pool->end = tem->s + blockSize; } - return 1; + return XML_TRUE; +} + +static int FASTCALL +nextScaffoldPart(XML_Parser parser) +{ + DTD * const dtd = _dtd; /* save one level of indirection */ + CONTENT_SCAFFOLD * me; + int next; + + if (!dtd->scaffIndex) { + dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int)); + if (!dtd->scaffIndex) + return -1; + dtd->scaffIndex[0] = 0; + } + + if (dtd->scaffCount >= dtd->scaffSize) { + CONTENT_SCAFFOLD *temp; + if (dtd->scaffold) { + temp = (CONTENT_SCAFFOLD *) + REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + if (temp == NULL) + return -1; + dtd->scaffSize *= 2; + } + else { + temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS + * sizeof(CONTENT_SCAFFOLD)); + if (temp == NULL) + return -1; + dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; + } + dtd->scaffold = temp; + } + next = dtd->scaffCount++; + me = &dtd->scaffold[next]; + if (dtd->scaffLevel) { + CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]]; + if (parent->lastchild) { + dtd->scaffold[parent->lastchild].nextsib = next; + } + if (!parent->childcnt) + parent->firstchild = next; + parent->lastchild = next; + parent->childcnt++; + } + me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; + return next; +} + +static void +build_node(XML_Parser parser, + int src_node, + XML_Content *dest, + XML_Content **contpos, + XML_Char **strpos) +{ + DTD * const dtd = _dtd; /* save one level of indirection */ + dest->type = dtd->scaffold[src_node].type; + dest->quant = dtd->scaffold[src_node].quant; + if (dest->type == XML_CTYPE_NAME) { + const XML_Char *src; + dest->name = *strpos; + src = dtd->scaffold[src_node].name; + for (;;) { + *(*strpos)++ = *src; + if (!*src) + break; + src++; + } + dest->numchildren = 0; + dest->children = NULL; + } + else { + unsigned int i; + int cn; + dest->numchildren = dtd->scaffold[src_node].childcnt; + dest->children = *contpos; + *contpos += dest->numchildren; + for (i = 0, cn = dtd->scaffold[src_node].firstchild; + i < dest->numchildren; + i++, cn = dtd->scaffold[cn].nextsib) { + build_node(parser, cn, &(dest->children[i]), contpos, strpos); + } + dest->name = NULL; + } +} + +static XML_Content * +build_model (XML_Parser parser) +{ + DTD * const dtd = _dtd; /* save one level of indirection */ + XML_Content *ret; + XML_Content *cpos; + XML_Char * str; + int allocsize = (dtd->scaffCount * sizeof(XML_Content) + + (dtd->contentStringLen * sizeof(XML_Char))); + + ret = (XML_Content *)MALLOC(allocsize); + if (!ret) + return NULL; + + str = (XML_Char *) (&ret[dtd->scaffCount]); + cpos = &ret[1]; + + build_node(parser, 0, ret, &cpos, &str); + return ret; +} + +static ELEMENT_TYPE * +getElementType(XML_Parser parser, + const ENCODING *enc, + const char *ptr, + const char *end) +{ + DTD * const dtd = _dtd; /* save one level of indirection */ + const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); + ELEMENT_TYPE *ret; + + if (!name) + return NULL; + ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); + if (!ret) + return NULL; + if (ret->name != name) + poolDiscard(&dtd->pool); + else { + poolFinish(&dtd->pool); + if (!setElementTypePrefix(parser, ret)) + return NULL; + } + return ret; } diff --git a/simgear/xml/xmlrole.c b/simgear/xml/xmlrole.c index b18e35eb..ae8ae012 100644 --- a/simgear/xml/xmlrole.c +++ b/simgear/xml/xmlrole.c @@ -1,35 +1,27 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ -#include "xmldef.h" +#include + +#ifdef COMPILED_FROM_DSP +#include "winconfig.h" +#elif defined(MACOS_CLASSIC) +#include "macconfig.h" +#elif defined(__amigaos__) +#include "amigaconfig.h" +#elif defined(__WATCOMC__) +#include "watcomconfig.h" +#else +#ifdef HAVE_EXPAT_CONFIG_H +#include "expat_config.h" +#endif +#endif /* ndef COMPILED_FROM_DSP */ + +#include "expat_external.h" +#include "internal.h" #include "xmlrole.h" +#include "ascii.h" /* Doesn't check: @@ -38,39 +30,106 @@ your version of this file under either the MPL or the GPL. */ +static const char KW_ANY[] = { + ASCII_A, ASCII_N, ASCII_Y, '\0' }; +static const char KW_ATTLIST[] = { + ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; +static const char KW_CDATA[] = { + ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_DOCTYPE[] = { + ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; +static const char KW_ELEMENT[] = { + ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; +static const char KW_EMPTY[] = { + ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; +static const char KW_ENTITIES[] = { + ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, + '\0' }; +static const char KW_ENTITY[] = { + ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; +static const char KW_FIXED[] = { + ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; +static const char KW_ID[] = { + ASCII_I, ASCII_D, '\0' }; +static const char KW_IDREF[] = { + ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; +static const char KW_IDREFS[] = { + ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; +#ifdef XML_DTD +static const char KW_IGNORE[] = { + ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; +#endif +static const char KW_IMPLIED[] = { + ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; +#ifdef XML_DTD +static const char KW_INCLUDE[] = { + ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; +#endif +static const char KW_NDATA[] = { + ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_NMTOKEN[] = { + ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; +static const char KW_NMTOKENS[] = { + ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, + '\0' }; +static const char KW_NOTATION[] = + { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, + '\0' }; +static const char KW_PCDATA[] = { + ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_PUBLIC[] = { + ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; +static const char KW_REQUIRED[] = { + ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, + '\0' }; +static const char KW_SYSTEM[] = { + ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; + #ifndef MIN_BYTES_PER_CHAR #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif -typedef int PROLOG_HANDLER(struct prolog_state *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); +#ifdef XML_DTD +#define setTopLevel(state) \ + ((state)->handler = ((state)->documentEntity \ + ? internalSubset \ + : externalSubset1)) +#else /* not XML_DTD */ +#define setTopLevel(state) ((state)->handler = internalSubset) +#endif /* not XML_DTD */ + +typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc); static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, entity3, entity4, entity5, entity6, - entity7, entity8, entity9, + entity7, entity8, entity9, entity10, notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, attlist9, element0, element1, element2, element3, element4, element5, element6, element7, +#ifdef XML_DTD + externalSubset0, externalSubset1, + condSect0, condSect1, condSect2, +#endif /* XML_DTD */ declClose, error; -static -int syntaxError(PROLOG_STATE *); +static int FASTCALL common(PROLOG_STATE *state, int tok); -static -int prolog0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +prolog0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: @@ -81,799 +140,884 @@ int prolog0(PROLOG_STATE *state, return XML_ROLE_XML_DECL; case XML_TOK_PI: state->handler = prolog1; - return XML_ROLE_NONE; + return XML_ROLE_PI; case XML_TOK_COMMENT: state->handler = prolog1; + return XML_ROLE_COMMENT; case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "DOCTYPE")) + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_DOCTYPE)) break; state->handler = doctype0; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } -static -int prolog1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +prolog1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: + return XML_ROLE_COMMENT; case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "DOCTYPE")) + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_DOCTYPE)) break; state->handler = doctype0; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } -static -int prolog2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +prolog2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: - return XML_ROLE_NONE; + return XML_ROLE_COMMENT; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } -static -int doctype0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = doctype1; return XML_ROLE_DOCTYPE_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int doctype1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = doctype3; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = doctype2; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; } break; } - return syntaxError(state); + return common(state, tok); } -static -int doctype2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_LITERAL: state->handler = doctype3; return XML_ROLE_DOCTYPE_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } -static -int doctype3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_LITERAL: state->handler = doctype4; return XML_ROLE_DOCTYPE_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } -static -int doctype4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } - return syntaxError(state); + return common(state, tok); } -static -int doctype5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } - return syntaxError(state); + return common(state, tok); } -static -int internalSubset(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +internalSubset(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "ENTITY")) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_ENTITY)) { state->handler = entity0; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "ATTLIST")) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_ATTLIST)) { state->handler = attlist0; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "ELEMENT")) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_ELEMENT)) { state->handler = element0; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "NOTATION")) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_NOTATION)) { state->handler = notation0; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } break; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: - return XML_ROLE_NONE; + return XML_ROLE_COMMENT; case XML_TOK_PARAM_ENTITY_REF: return XML_ROLE_PARAM_ENTITY_REF; case XML_TOK_CLOSE_BRACKET: state->handler = doctype5; + return XML_ROLE_DOCTYPE_NONE; + case XML_TOK_NONE: return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } -static -int entity0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +#ifdef XML_DTD + +static int PTRCALL +externalSubset0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + state->handler = externalSubset1; + if (tok == XML_TOK_XML_DECL) + return XML_ROLE_TEXT_DECL; + return externalSubset1(state, tok, ptr, end, enc); +} + +static int PTRCALL +externalSubset1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { + case XML_TOK_COND_SECT_OPEN: + state->handler = condSect0; + return XML_ROLE_NONE; + case XML_TOK_COND_SECT_CLOSE: + if (state->includeLevel == 0) + break; + state->includeLevel -= 1; + return XML_ROLE_NONE; case XML_TOK_PROLOG_S: return XML_ROLE_NONE; + case XML_TOK_CLOSE_BRACKET: + break; + case XML_TOK_NONE: + if (state->includeLevel) + break; + return XML_ROLE_NONE; + default: + return internalSubset(state, tok, ptr, end, enc); + } + return common(state, tok); +} + +#endif /* XML_DTD */ + +static int PTRCALL +entity0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_ENTITY_NONE; case XML_TOK_PERCENT: state->handler = entity1; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = entity2; return XML_ROLE_GENERAL_ENTITY_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int entity1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int entity2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity4; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity3; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_VALUE; } - return syntaxError(state); + return common(state, tok); } -static -int entity3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity4; return XML_ROLE_ENTITY_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } - -static -int entity4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity5; return XML_ROLE_ENTITY_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } -static -int entity5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; - return XML_ROLE_NONE; + setTopLevel(state); + return XML_ROLE_ENTITY_COMPLETE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "NDATA")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { state->handler = entity6; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; } - return syntaxError(state); + return common(state, tok); } -static -int entity6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity6(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_NOTATION_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int entity7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity7(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity9; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity8; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_VALUE; } - return syntaxError(state); + return common(state, tok); } -static -int entity8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity8(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity9; return XML_ROLE_ENTITY_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } -static -int entity9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity9(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: - state->handler = declClose; + state->handler = entity10; return XML_ROLE_ENTITY_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } -static -int notation0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity10(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; + case XML_TOK_DECL_CLOSE: + setTopLevel(state); + return XML_ROLE_ENTITY_COMPLETE; + } + return common(state, tok); +} + +static int PTRCALL +notation0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NOTATION_NONE; case XML_TOK_NAME: state->handler = notation1; return XML_ROLE_NOTATION_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int notation1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = notation3; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = notation2; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } break; } - return syntaxError(state); + return common(state, tok); } -static -int notation2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } -static -int notation3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_NOTATION_NONE; return XML_ROLE_NOTATION_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } -static -int notation4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_NOTATION_NONE; return XML_ROLE_NOTATION_SYSTEM_ID; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NOTATION_NO_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } -static -int attlist0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist1; return XML_ROLE_ATTLIST_ELEMENT_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int attlist1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; - return XML_ROLE_NONE; + setTopLevel(state); + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist2; return XML_ROLE_ATTRIBUTE_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int attlist2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: { - static const char *types[] = { - "CDATA", - "ID", - "IDREF", - "IDREFS", - "ENTITY", - "ENTITIES", - "NMTOKEN", - "NMTOKENS", + static const char * const types[] = { + KW_CDATA, + KW_ID, + KW_IDREF, + KW_IDREFS, + KW_ENTITY, + KW_ENTITIES, + KW_NMTOKEN, + KW_NMTOKENS, }; int i; for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) - if (XmlNameMatchesAscii(enc, ptr, types[i])) { - state->handler = attlist8; - return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; - } + if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { + state->handler = attlist8; + return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; + } } - if (XmlNameMatchesAscii(enc, ptr, "NOTATION")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } break; case XML_TOK_OPEN_PAREN: state->handler = attlist3; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } - return syntaxError(state); + return common(state, tok); } -static -int attlist3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NMTOKEN: case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist4; return XML_ROLE_ATTRIBUTE_ENUM_VALUE; } - return syntaxError(state); + return common(state, tok); } -static -int attlist4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OR: state->handler = attlist3; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } - return syntaxError(state); + return common(state, tok); } -static -int attlist5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OPEN_PAREN: state->handler = attlist6; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } - return syntaxError(state); + return common(state, tok); } - -static -int attlist6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist6(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: state->handler = attlist7; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; } - return syntaxError(state); + return common(state, tok); } -static -int attlist7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist7(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OR: state->handler = attlist6; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } - return syntaxError(state); + return common(state, tok); } /* default value */ -static -int attlist8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist8(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - "IMPLIED")) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - "REQUIRED")) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - "FIXED")) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_FIXED)) { state->handler = attlist9; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } break; case XML_TOK_LITERAL: state->handler = attlist1; return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; } - return syntaxError(state); + return common(state, tok); } -static -int attlist9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist9(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_LITERAL: state->handler = attlist1; return XML_ROLE_FIXED_ATTRIBUTE_VALUE; } - return syntaxError(state); + return common(state, tok); } -static -int element0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element1; return XML_ROLE_ELEMENT_NAME; } - return syntaxError(state); + return common(state, tok); } -static -int element1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "EMPTY")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_CONTENT_EMPTY; } - if (XmlNameMatchesAscii(enc, ptr, "ANY")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_CONTENT_ANY; } break; @@ -882,23 +1026,24 @@ int element1(PROLOG_STATE *state, state->level = 1; return XML_ROLE_GROUP_OPEN; } - return syntaxError(state); + return common(state, tok); } -static -int element2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - "PCDATA")) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; } @@ -921,78 +1066,83 @@ int element2(PROLOG_STATE *state, state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } - return syntaxError(state); + return common(state, tok); } -static -int element3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN: + state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } - return syntaxError(state); + return common(state, tok); } -static -int element4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element5; return XML_ROLE_CONTENT_ELEMENT; } - return syntaxError(state); + return common(state, tok); } -static -int element5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } - return syntaxError(state); + return common(state, tok); } -static -int element6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element6(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_OPEN_PAREN: state->level += 1; return XML_ROLE_GROUP_OPEN; @@ -1010,38 +1160,46 @@ int element6(PROLOG_STATE *state, state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } - return syntaxError(state); + return common(state, tok); } -static -int element7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element7(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_CLOSE_PAREN_QUESTION: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_OPT; case XML_TOK_CLOSE_PAREN_PLUS: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_PLUS; case XML_TOK_COMMA: state->handler = element6; @@ -1050,64 +1208,129 @@ int element7(PROLOG_STATE *state, state->handler = element6; return XML_ROLE_GROUP_CHOICE; } - return syntaxError(state); + return common(state, tok); } -static -int declClose(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +#ifdef XML_DTD + +static int PTRCALL +condSect0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; - case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + case XML_TOK_NAME: + if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { + state->handler = condSect1; + return XML_ROLE_NONE; + } + if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { + state->handler = condSect2; + return XML_ROLE_NONE; + } + break; + } + return common(state, tok); +} + +static int PTRCALL +condSect1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_OPEN_BRACKET: + state->handler = externalSubset1; + state->includeLevel += 1; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } -#if 0 +static int PTRCALL +condSect2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_OPEN_BRACKET: + state->handler = externalSubset1; + return XML_ROLE_IGNORE_SECT; + } + return common(state, tok); +} + +#endif /* XML_DTD */ -static -int ignore(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +declClose(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { + case XML_TOK_PROLOG_S: + return state->role_none; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; - return 0; - default: - return XML_ROLE_NONE; + setTopLevel(state); + return state->role_none; } - return syntaxError(state); + return common(state, tok); } -#endif -static -int error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +error(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { return XML_ROLE_NONE; } -static -int syntaxError(PROLOG_STATE *state) +static int FASTCALL +common(PROLOG_STATE *state, int tok) { +#ifdef XML_DTD + if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) + return XML_ROLE_INNER_PARAM_ENTITY_REF; +#endif state->handler = error; return XML_ROLE_ERROR; } -void XmlPrologStateInit(PROLOG_STATE *state) +void +XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; +#ifdef XML_DTD + state->documentEntity = 1; + state->includeLevel = 0; + state->inEntityValue = 0; +#endif /* XML_DTD */ } + +#ifdef XML_DTD + +void +XmlPrologStateInitExternalEntity(PROLOG_STATE *state) +{ + state->handler = externalSubset0; + state->documentEntity = 0; + state->includeLevel = 0; +} + +#endif /* XML_DTD */ diff --git a/simgear/xml/xmlrole.h b/simgear/xml/xmlrole.h index 877c40ba..4dd9f06f 100644 --- a/simgear/xml/xmlrole.h +++ b/simgear/xml/xmlrole.h @@ -1,36 +1,16 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifndef XmlRole_INCLUDED #define XmlRole_INCLUDED 1 +#ifdef __VMS +/* 0 1 2 3 0 1 2 3 + 1234567890123456789012345678901 1234567890123456789012345678901 */ +#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt +#endif + #include "xmltok.h" #ifdef __cplusplus @@ -42,16 +22,21 @@ enum { XML_ROLE_NONE = 0, XML_ROLE_XML_DECL, XML_ROLE_INSTANCE_START, + XML_ROLE_DOCTYPE_NONE, XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_PUBLIC_ID, + XML_ROLE_DOCTYPE_INTERNAL_SUBSET, XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME, + XML_ROLE_ENTITY_NONE, XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_PUBLIC_ID, + XML_ROLE_ENTITY_COMPLETE, XML_ROLE_ENTITY_NOTATION_NAME, + XML_ROLE_NOTATION_NONE, XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_SYSTEM_ID, XML_ROLE_NOTATION_NO_SYSTEM_ID, @@ -67,11 +52,13 @@ enum { XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS, XML_ROLE_ATTRIBUTE_ENUM_VALUE, XML_ROLE_ATTRIBUTE_NOTATION_VALUE, + XML_ROLE_ATTLIST_NONE, XML_ROLE_ATTLIST_ELEMENT_NAME, XML_ROLE_IMPLIED_ATTRIBUTE_VALUE, XML_ROLE_REQUIRED_ATTRIBUTE_VALUE, XML_ROLE_DEFAULT_ATTRIBUTE_VALUE, XML_ROLE_FIXED_ATTRIBUTE_VALUE, + XML_ROLE_ELEMENT_NONE, XML_ROLE_ELEMENT_NAME, XML_ROLE_CONTENT_ANY, XML_ROLE_CONTENT_EMPTY, @@ -87,19 +74,35 @@ enum { XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_PLUS, + XML_ROLE_PI, + XML_ROLE_COMMENT, +#ifdef XML_DTD + XML_ROLE_TEXT_DECL, + XML_ROLE_IGNORE_SECT, + XML_ROLE_INNER_PARAM_ENTITY_REF, +#endif /* XML_DTD */ XML_ROLE_PARAM_ENTITY_REF }; typedef struct prolog_state { - int (*handler)(struct prolog_state *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); + int (PTRCALL *handler) (struct prolog_state *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc); unsigned level; + int role_none; +#ifdef XML_DTD + unsigned includeLevel; + int documentEntity; + int inEntityValue; +#endif /* XML_DTD */ } PROLOG_STATE; -void XMLTOKAPI XmlPrologStateInit(PROLOG_STATE *); +void XmlPrologStateInit(PROLOG_STATE *); +#ifdef XML_DTD +void XmlPrologStateInitExternalEntity(PROLOG_STATE *); +#endif /* XML_DTD */ #define XmlTokenRole(state, tok, ptr, end, enc) \ (((state)->handler)(state, tok, ptr, end, enc)) diff --git a/simgear/xml/xmltok.c b/simgear/xml/xmltok.c index daca383d..1662e941 100644 --- a/simgear/xml/xmltok.c +++ b/simgear/xml/xmltok.c @@ -1,39 +1,37 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ -#include "xmldef.h" +#include + +#ifdef COMPILED_FROM_DSP +#include "winconfig.h" +#elif defined(MACOS_CLASSIC) +#include "macconfig.h" +#elif defined(__amigaos__) +#include "amigaconfig.h" +#elif defined(__WATCOMC__) +#include "watcomconfig.h" +#else +#ifdef HAVE_EXPAT_CONFIG_H +#include "expat_config.h" +#endif +#endif /* ndef COMPILED_FROM_DSP */ + +#include "expat_external.h" +#include "internal.h" #include "xmltok.h" #include "nametab.h" +#ifdef XML_DTD +#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) +#else +#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ +#endif + #define VTABLE1 \ - { PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \ + { PREFIX(prologTok), PREFIX(contentTok), \ + PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ PREFIX(sameName), \ PREFIX(nameMatchesAscii), \ @@ -50,24 +48,25 @@ your version of this file under either the MPL or the GPL. #define UCS2_GET_NAMING(pages, hi, lo) \ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) -/* A 2 byte UTF-8 representation splits the characters 11 bits -between the bottom 5 and 6 bits of the bytes. -We need 8 bits to index into pages, 3 bits to add to that index and -5 bits to generate the mask. */ +/* A 2 byte UTF-8 representation splits the characters 11 bits between + the bottom 5 and 6 bits of the bytes. We need 8 bits to index into + pages, 3 bits to add to that index and 5 bits to generate the mask. +*/ #define UTF8_GET_NAMING2(pages, byte) \ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ & (1 << (((byte)[1]) & 0x1F))) -/* A 3 byte UTF-8 representation splits the characters 16 bits -between the bottom 4, 6 and 6 bits of the bytes. -We need 8 bits to index into pages, 3 bits to add to that index and -5 bits to generate the mask. */ +/* A 3 byte UTF-8 representation splits the characters 16 bits between + the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index + into pages, 3 bits to add to that index and 5 bits to generate the + mask. +*/ #define UTF8_GET_NAMING3(pages, byte) \ (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + ((((byte)[1]) >> 2) & 0xF)] \ - << 3) \ + << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F))) @@ -79,59 +78,97 @@ We need 8 bits to index into pages, 3 bits to add to that index and ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) -#define UTF8_INVALID3(p) \ - ((*p) == 0xED \ - ? (((p)[1] & 0x20) != 0) \ - : ((*p) == 0xEF \ - ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ - : 0)) +/* Detection of invalid UTF-8 sequences is based on Table 3.1B + of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ + with the additional restriction of not allowing the Unicode + code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). + Implementation details: + (A & 0x80) == 0 means A < 0x80 + and + (A & 0xC0) == 0xC0 means A > 0xBF +*/ -#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) +#define UTF8_INVALID2(p) \ + ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) -static -int isNever(const ENCODING *enc, const char *p) +#define UTF8_INVALID3(p) \ + (((p)[2] & 0x80) == 0 \ + || \ + ((*p) == 0xEF && (p)[1] == 0xBF \ + ? \ + (p)[2] > 0xBD \ + : \ + ((p)[2] & 0xC0) == 0xC0) \ + || \ + ((*p) == 0xE0 \ + ? \ + (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ + : \ + ((p)[1] & 0x80) == 0 \ + || \ + ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) + +#define UTF8_INVALID4(p) \ + (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \ + || \ + ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \ + || \ + ((*p) == 0xF0 \ + ? \ + (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ + : \ + ((p)[1] & 0x80) == 0 \ + || \ + ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) + +static int PTRFASTCALL +isNever(const ENCODING *enc, const char *p) { return 0; } -static -int utf8_isName2(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isName2(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } -static -int utf8_isName3(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isName3(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } #define utf8_isName4 isNever -static -int utf8_isNmstrt2(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isNmstrt2(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } -static -int utf8_isNmstrt3(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isNmstrt3(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } #define utf8_isNmstrt4 isNever -#define utf8_isInvalid2 isNever +static int PTRFASTCALL +utf8_isInvalid2(const ENCODING *enc, const char *p) +{ + return UTF8_INVALID2((const unsigned char *)p); +} -static -int utf8_isInvalid3(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isInvalid3(const ENCODING *enc, const char *p) { return UTF8_INVALID3((const unsigned char *)p); } -static -int utf8_isInvalid4(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isInvalid4(const ENCODING *enc, const char *p) { return UTF8_INVALID4((const unsigned char *)p); } @@ -140,23 +177,25 @@ struct normal_encoding { ENCODING enc; unsigned char type[256]; #ifdef XML_MIN_SIZE - int (*byteType)(const ENCODING *, const char *); - int (*isNameMin)(const ENCODING *, const char *); - int (*isNmstrtMin)(const ENCODING *, const char *); - int (*byteToAscii)(const ENCODING *, const char *); - int (*charMatches)(const ENCODING *, const char *, int); + int (PTRFASTCALL *byteType)(const ENCODING *, const char *); + int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); + int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); + int (PTRCALL *charMatches)(const ENCODING *, const char *, int); #endif /* XML_MIN_SIZE */ - int (*isName2)(const ENCODING *, const char *); - int (*isName3)(const ENCODING *, const char *); - int (*isName4)(const ENCODING *, const char *); - int (*isNmstrt2)(const ENCODING *, const char *); - int (*isNmstrt3)(const ENCODING *, const char *); - int (*isNmstrt4)(const ENCODING *, const char *); - int (*isInvalid2)(const ENCODING *, const char *); - int (*isInvalid3)(const ENCODING *, const char *); - int (*isInvalid4)(const ENCODING *, const char *); + int (PTRFASTCALL *isName2)(const ENCODING *, const char *); + int (PTRFASTCALL *isName3)(const ENCODING *, const char *); + int (PTRFASTCALL *isName4)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); + int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); + int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); + int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); }; +#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc)) + #ifdef XML_MIN_SIZE #define STANDARD_VTABLE(E) \ @@ -183,9 +222,10 @@ struct normal_encoding { E ## isInvalid3, \ E ## isInvalid4 -static int checkCharRefNumber(int); +static int FASTCALL checkCharRefNumber(int); #include "xmltok_impl.h" +#include "ascii.h" #ifdef XML_MIN_SIZE #define sb_isNameMin isNever @@ -203,41 +243,41 @@ static int checkCharRefNumber(int); (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE -static -int sb_byteType(const ENCODING *enc, const char *p) +static int PTRFASTCALL +sb_byteType(const ENCODING *enc, const char *p) { return SB_BYTE_TYPE(enc, p); } #define BYTE_TYPE(enc, p) \ - (((const struct normal_encoding *)(enc))->byteType(enc, p)) + (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) #else #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) #endif #ifdef XML_MIN_SIZE #define BYTE_TO_ASCII(enc, p) \ - (((const struct normal_encoding *)(enc))->byteToAscii(enc, p)) -static -int sb_byteToAscii(const ENCODING *enc, const char *p) + (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) +static int PTRFASTCALL +sb_byteToAscii(const ENCODING *enc, const char *p) { return *p; } #else -#define BYTE_TO_ASCII(enc, p) (*p) +#define BYTE_TO_ASCII(enc, p) (*(p)) #endif #define IS_NAME_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) + (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p)) #define IS_NMSTRT_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) + (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p)) #define IS_INVALID_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) + (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p)) #ifdef XML_MIN_SIZE #define IS_NAME_CHAR_MINBPC(enc, p) \ - (((const struct normal_encoding *)(enc))->isNameMin(enc, p)) + (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) #define IS_NMSTRT_CHAR_MINBPC(enc, p) \ - (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p)) + (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) #else #define IS_NAME_CHAR_MINBPC(enc, p) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) @@ -245,9 +285,9 @@ int sb_byteToAscii(const ENCODING *enc, const char *p) #ifdef XML_MIN_SIZE #define CHAR_MATCHES(enc, p, c) \ - (((const struct normal_encoding *)(enc))->charMatches(enc, p, c)) -static -int sb_charMatches(const ENCODING *enc, const char *p, int c) + (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) +static int PTRCALL +sb_charMatches(const ENCODING *enc, const char *p, int c) { return *p == c; } @@ -257,7 +297,9 @@ int sb_charMatches(const ENCODING *enc, const char *p, int c) #endif #define PREFIX(ident) normal_ ## ident +#define XML_TOK_IMPL_C #include "xmltok_impl.c" +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -276,10 +318,10 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval4 = 0xf0 }; -static -void utf8_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +utf8_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { char *to; const char *from; @@ -287,7 +329,7 @@ void utf8_toUtf8(const ENCODING *enc, /* Avoid copying partial characters. */ for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) - break; + break; } for (to = *toP, from = *fromP; from != fromLim; from++, to++) *to = *from; @@ -295,34 +337,36 @@ void utf8_toUtf8(const ENCODING *enc, *toP = to; } -static -void utf8_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) +static void PTRCALL +utf8_toUtf16(const ENCODING *enc, + const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { unsigned short *to = *toP; const char *from = *fromP; while (from != fromLim && to != toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: - *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f); + *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: - *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f); + *to++ = (unsigned short)(((from[0] & 0xf) << 12) + | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); from += 3; break; case BT_LEAD4: { - unsigned long n; - if (to + 1 == toLim) - break; - n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); - n -= 0x10000; - to[0] = (unsigned short)((n >> 10) | 0xD800); - to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); - to += 2; - from += 4; + unsigned long n; + if (to + 1 == toLim) + goto after; + n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) + | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); + n -= 0x10000; + to[0] = (unsigned short)((n >> 10) | 0xD800); + to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); + to += 2; + from += 4; } break; default: @@ -330,6 +374,7 @@ void utf8_toUtf16(const ENCODING *enc, break; } } +after: *fromP = from; *toP = to; } @@ -380,10 +425,10 @@ static const struct normal_encoding internal_utf8_encoding = { STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; -static -void latin1_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +latin1_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { for (;;) { unsigned char c; @@ -392,23 +437,23 @@ void latin1_toUtf8(const ENCODING *enc, c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; - *(*toP)++ = ((c >> 6) | UTF8_cval2); - *(*toP)++ = ((c & 0x3f) | 0x80); + break; + *(*toP)++ = (char)((c >> 6) | UTF8_cval2); + *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; } else { if (*toP == toLim) - break; + break; *(*toP)++ = *(*fromP)++; } } } -static -void latin1_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) +static void PTRCALL +latin1_toUtf16(const ENCODING *enc, + const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { while (*fromP != fromLim && *toP != toLim) *(*toP)++ = (unsigned char)*(*fromP)++; @@ -438,10 +483,10 @@ static const struct normal_encoding latin1_encoding = { STANDARD_VTABLE(sb_) }; -static -void ascii_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +ascii_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { while (*fromP != fromLim && *toP != toLim) *(*toP)++ = *(*fromP)++; @@ -471,7 +516,8 @@ static const struct normal_encoding ascii_encoding = { STANDARD_VTABLE(sb_) }; -static int unicode_byte_type(char hi, char lo) +static int PTRFASTCALL +unicode_byte_type(char hi, char lo) { switch ((unsigned char)hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: @@ -490,10 +536,10 @@ static int unicode_byte_type(char hi, char lo) } #define DEFINE_UTF16_TO_UTF8(E) \ -static \ -void E ## toUtf8(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - char **toP, const char *toLim) \ +static void PTRCALL \ +E ## toUtf8(const ENCODING *enc, \ + const char **fromP, const char *fromLim, \ + char **toP, const char *toLim) \ { \ const char *from; \ for (from = *fromP; from != fromLim; from += 2) { \ @@ -506,7 +552,7 @@ void E ## toUtf8(const ENCODING *enc, \ if (lo < 0x80) { \ if (*toP == toLim) { \ *fromP = from; \ - return; \ + return; \ } \ *(*toP)++ = lo; \ break; \ @@ -516,7 +562,7 @@ void E ## toUtf8(const ENCODING *enc, \ case 0x4: case 0x5: case 0x6: case 0x7: \ if (toLim - *toP < 2) { \ *fromP = from; \ - return; \ + return; \ } \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ @@ -524,7 +570,7 @@ void E ## toUtf8(const ENCODING *enc, \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ - return; \ + return; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ @@ -533,8 +579,8 @@ void E ## toUtf8(const ENCODING *enc, \ break; \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ if (toLim - *toP < 4) { \ - *fromP = from; \ - return; \ + *fromP = from; \ + return; \ } \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ @@ -542,9 +588,9 @@ void E ## toUtf8(const ENCODING *enc, \ from += 2; \ lo2 = GET_LO(from); \ *(*toP)++ = (((lo & 0x3) << 4) \ - | ((GET_HI(from) & 0x3) << 2) \ - | (lo2 >> 6) \ - | 0x80); \ + | ((GET_HI(from) & 0x3) << 2) \ + | (lo2 >> 6) \ + | 0x80); \ *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ break; \ } \ @@ -553,10 +599,10 @@ void E ## toUtf8(const ENCODING *enc, \ } #define DEFINE_UTF16_TO_UTF16(E) \ -static \ -void E ## toUtf16(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - unsigned short **toP, const unsigned short *toLim) \ +static void PTRCALL \ +E ## toUtf16(const ENCODING *enc, \ + const char **fromP, const char *fromLim, \ + unsigned short **toP, const unsigned short *toLim) \ { \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ @@ -603,32 +649,32 @@ DEFINE_UTF16_TO_UTF16(big2_) #ifdef XML_MIN_SIZE -static -int little2_byteType(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_byteType(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TYPE(enc, p); } -static -int little2_byteToAscii(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_byteToAscii(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TO_ASCII(enc, p); } -static -int little2_charMatches(const ENCODING *enc, const char *p, int c) +static int PTRCALL +little2_charMatches(const ENCODING *enc, const char *p, int c) { return LITTLE2_CHAR_MATCHES(enc, p, c); } -static -int little2_isNameMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_isNameMin(const ENCODING *enc, const char *p) { return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); } -static -int little2_isNmstrtMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_isNmstrtMin(const ENCODING *enc, const char *p) { return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); } @@ -643,14 +689,16 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p) #define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) +#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) #define IS_NAME_CHAR(enc, p, n) 0 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) #define IS_NMSTRT_CHAR(enc, p, n) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) +#define XML_TOK_IMPL_C #include "xmltok_impl.c" +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -666,9 +714,9 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p) #ifdef XML_NS -static const struct normal_encoding little2_encoding_ns = { +static const struct normal_encoding little2_encoding_ns = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 12 +#if BYTEORDER == 1234 1 #else 0 @@ -683,9 +731,9 @@ static const struct normal_encoding little2_encoding_ns = { #endif -static const struct normal_encoding little2_encoding = { +static const struct normal_encoding little2_encoding = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 12 +#if BYTEORDER == 1234 1 #else 0 @@ -700,11 +748,11 @@ static const struct normal_encoding little2_encoding = { STANDARD_VTABLE(little2_) }; -#if XML_BYTE_ORDER != 21 +#if BYTEORDER != 4321 #ifdef XML_NS -static const struct normal_encoding internal_little2_encoding_ns = { +static const struct normal_encoding internal_little2_encoding_ns = { { VTABLE, 2, 0, 1 }, { #include "iasciitab.h" @@ -715,7 +763,7 @@ static const struct normal_encoding internal_little2_encoding_ns = { #endif -static const struct normal_encoding internal_little2_encoding = { +static const struct normal_encoding internal_little2_encoding = { { VTABLE, 2, 0, 1 }, { #define BT_COLON BT_NMSTRT @@ -742,32 +790,32 @@ static const struct normal_encoding internal_little2_encoding = { #ifdef XML_MIN_SIZE -static -int big2_byteType(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_byteType(const ENCODING *enc, const char *p) { return BIG2_BYTE_TYPE(enc, p); } -static -int big2_byteToAscii(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_byteToAscii(const ENCODING *enc, const char *p) { return BIG2_BYTE_TO_ASCII(enc, p); } -static -int big2_charMatches(const ENCODING *enc, const char *p, int c) +static int PTRCALL +big2_charMatches(const ENCODING *enc, const char *p, int c) { return BIG2_CHAR_MATCHES(enc, p, c); } -static -int big2_isNameMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_isNameMin(const ENCODING *enc, const char *p) { return BIG2_IS_NAME_CHAR_MINBPC(enc, p); } -static -int big2_isNmstrtMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_isNmstrtMin(const ENCODING *enc, const char *p) { return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); } @@ -782,14 +830,16 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p) #define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) +#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) #define IS_NAME_CHAR(enc, p, n) 0 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) #define IS_NMSTRT_CHAR(enc, p, n) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) +#define XML_TOK_IMPL_C #include "xmltok_impl.c" +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -807,7 +857,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p) static const struct normal_encoding big2_encoding_ns = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 21 +#if BYTEORDER == 4321 1 #else 0 @@ -824,7 +874,7 @@ static const struct normal_encoding big2_encoding_ns = { static const struct normal_encoding big2_encoding = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 21 +#if BYTEORDER == 4321 1 #else 0 @@ -839,7 +889,7 @@ static const struct normal_encoding big2_encoding = { STANDARD_VTABLE(big2_) }; -#if XML_BYTE_ORDER != 12 +#if BYTEORDER != 1234 #ifdef XML_NS @@ -869,16 +919,16 @@ static const struct normal_encoding internal_big2_encoding = { #undef PREFIX -static -int streqci(const char *s1, const char *s2) +static int FASTCALL +streqci(const char *s1, const char *s2) { for (;;) { char c1 = *s1++; char c2 = *s2++; - if ('a' <= c1 && c1 <= 'z') - c1 += 'A' - 'a'; - if ('a' <= c2 && c2 <= 'z') - c2 += 'A' - 'a'; + if (ASCII_a <= c1 && c1 <= ASCII_z) + c1 += ASCII_A - ASCII_a; + if (ASCII_a <= c2 && c2 <= ASCII_z) + c2 += ASCII_A - ASCII_a; if (c1 != c2) return 0; if (!c1) @@ -887,15 +937,15 @@ int streqci(const char *s1, const char *s2) return 1; } -static -void initUpdatePosition(const ENCODING *enc, const char *ptr, - const char *end, POSITION *pos) +static void PTRCALL +initUpdatePosition(const ENCODING *enc, const char *ptr, + const char *end, POSITION *pos) { normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); } -static -int toAscii(const ENCODING *enc, const char *ptr, const char *end) +static int +toAscii(const ENCODING *enc, const char *ptr, const char *end) { char buf[1]; char *p = buf; @@ -906,33 +956,35 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end) return buf[0]; } -static -int isSpace(int c) +static int FASTCALL +isSpace(int c) { switch (c) { case 0x20: case 0xD: case 0xA: - case 0x9: + case 0x9: return 1; } return 0; } -/* Return 1 if there's just optional white space -or there's an S followed by name=val. */ -static -int parsePseudoAttribute(const ENCODING *enc, - const char *ptr, - const char *end, - const char **namePtr, - const char **valPtr, - const char **nextTokPtr) +/* Return 1 if there's just optional white space or there's an S + followed by name=val. +*/ +static int +parsePseudoAttribute(const ENCODING *enc, + const char *ptr, + const char *end, + const char **namePtr, + const char **nameEndPtr, + const char **valPtr, + const char **nextTokPtr) { int c; char open; if (ptr == end) { - *namePtr = 0; + *namePtr = NULL; return 1; } if (!isSpace(toAscii(enc, ptr, end))) { @@ -943,7 +995,7 @@ int parsePseudoAttribute(const ENCODING *enc, ptr += enc->minBytesPerChar; } while (isSpace(toAscii(enc, ptr, end))); if (ptr == end) { - *namePtr = 0; + *namePtr = NULL; return 1; } *namePtr = ptr; @@ -953,15 +1005,18 @@ int parsePseudoAttribute(const ENCODING *enc, *nextTokPtr = ptr; return 0; } - if (c == '=') + if (c == ASCII_EQUALS) { + *nameEndPtr = ptr; break; + } if (isSpace(c)) { + *nameEndPtr = ptr; do { - ptr += enc->minBytesPerChar; + ptr += enc->minBytesPerChar; } while (isSpace(c = toAscii(enc, ptr, end))); - if (c != '=') { - *nextTokPtr = ptr; - return 0; + if (c != ASCII_EQUALS) { + *nextTokPtr = ptr; + return 0; } break; } @@ -977,23 +1032,23 @@ int parsePseudoAttribute(const ENCODING *enc, ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); } - if (c != '"' && c != '\'') { + if (c != ASCII_QUOT && c != ASCII_APOS) { *nextTokPtr = ptr; return 0; } - open = c; + open = (char)c; ptr += enc->minBytesPerChar; *valPtr = ptr; for (;; ptr += enc->minBytesPerChar) { c = toAscii(enc, ptr, end); if (c == open) break; - if (!('a' <= c && c <= 'z') - && !('A' <= c && c <= 'Z') - && !('0' <= c && c <= '9') - && c != '.' - && c != '-' - && c != '_') { + if (!(ASCII_a <= c && c <= ASCII_z) + && !(ASCII_A <= c && c <= ASCII_Z) + && !(ASCII_0 <= c && c <= ASCII_9) + && c != ASCII_PERIOD + && c != ASCII_MINUS + && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } @@ -1002,29 +1057,53 @@ int parsePseudoAttribute(const ENCODING *enc, return 1; } -static -int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, - const char *, - const char *), - int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) +static const char KW_version[] = { + ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' +}; + +static const char KW_encoding[] = { + ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' +}; + +static const char KW_standalone[] = { + ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, + ASCII_n, ASCII_e, '\0' +}; + +static const char KW_yes[] = { + ASCII_y, ASCII_e, ASCII_s, '\0' +}; + +static const char KW_no[] = { + ASCII_n, ASCII_o, '\0' +}; + +static int +doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, + const char *, + const char *), + int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) { - const char *val = 0; - const char *name = 0; + const char *val = NULL; + const char *name = NULL; + const char *nameEnd = NULL; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; - if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) { + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) + || !name) { *badPtr = ptr; return 0; } - if (!XmlNameMatchesAscii(enc, name, "version")) { + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { if (!isGeneralTextEntity) { *badPtr = name; return 0; @@ -1033,22 +1112,24 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, else { if (versionPtr) *versionPtr = val; - if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { + if (versionEndPtr) + *versionEndPtr = ptr; + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } if (!name) { if (isGeneralTextEntity) { - /* a TextDecl must have an EncodingDecl */ - *badPtr = ptr; - return 0; + /* a TextDecl must have an EncodingDecl */ + *badPtr = ptr; + return 0; } return 1; } } - if (XmlNameMatchesAscii(enc, name, "encoding")) { + if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { int c = toAscii(enc, val, end); - if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) { + if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { *badPtr = val; return 0; } @@ -1056,22 +1137,23 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, *encodingName = val; if (encoding) *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); - if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } if (!name) return 1; } - if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) { + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) + || isGeneralTextEntity) { *badPtr = name; return 0; } - if (XmlNameMatchesAscii(enc, val, "yes")) { + if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { if (standalone) *standalone = 1; } - else if (XmlNameMatchesAscii(enc, val, "no")) { + else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { if (standalone) *standalone = 0; } @@ -1088,8 +1170,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, return 1; } -static -int checkCharRefNumber(int result) +static int FASTCALL +checkCharRefNumber(int result) { switch (result >> 8) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: @@ -1107,7 +1189,8 @@ int checkCharRefNumber(int result) return result; } -int XmlUtf8Encode(int c, char *buf) +int FASTCALL +XmlUtf8Encode(int c, char *buf) { enum { /* minN is minimum legal resulting value for N byte sequence */ @@ -1119,42 +1202,43 @@ int XmlUtf8Encode(int c, char *buf) if (c < 0) return 0; if (c < min2) { - buf[0] = (c | UTF8_cval1); + buf[0] = (char)(c | UTF8_cval1); return 1; } if (c < min3) { - buf[0] = ((c >> 6) | UTF8_cval2); - buf[1] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 6) | UTF8_cval2); + buf[1] = (char)((c & 0x3f) | 0x80); return 2; } if (c < min4) { - buf[0] = ((c >> 12) | UTF8_cval3); - buf[1] = (((c >> 6) & 0x3f) | 0x80); - buf[2] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 12) | UTF8_cval3); + buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); + buf[2] = (char)((c & 0x3f) | 0x80); return 3; } if (c < 0x110000) { - buf[0] = ((c >> 18) | UTF8_cval4); - buf[1] = (((c >> 12) & 0x3f) | 0x80); - buf[2] = (((c >> 6) & 0x3f) | 0x80); - buf[3] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 18) | UTF8_cval4); + buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); + buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); + buf[3] = (char)((c & 0x3f) | 0x80); return 4; } return 0; } -int XmlUtf16Encode(int charNum, unsigned short *buf) +int FASTCALL +XmlUtf16Encode(int charNum, unsigned short *buf) { if (charNum < 0) return 0; if (charNum < 0x10000) { - buf[0] = charNum; + buf[0] = (unsigned short)charNum; return 1; } if (charNum < 0x110000) { charNum -= 0x10000; - buf[0] = (charNum >> 10) + 0xD800; - buf[1] = (charNum & 0x3FF) + 0xDC00; + buf[0] = (unsigned short)((charNum >> 10) + 0xD800); + buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); return 2; } return 0; @@ -1162,71 +1246,74 @@ int XmlUtf16Encode(int charNum, unsigned short *buf) struct unknown_encoding { struct normal_encoding normal; - int (*convert)(void *userData, const char *p); + CONVERTER convert; void *userData; unsigned short utf16[256]; char utf8[256][4]; }; -int XmlSizeOfUnknownEncoding() +#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) + +int +XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } -static -int unknown_isName(const ENCODING *enc, const char *p) +static int PTRFASTCALL +unknown_isName(const ENCODING *enc, const char *p) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); } -static -int unknown_isNmstrt(const ENCODING *enc, const char *p) +static int PTRFASTCALL +unknown_isNmstrt(const ENCODING *enc, const char *p) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); } -static -int unknown_isInvalid(const ENCODING *enc, const char *p) +static int PTRFASTCALL +unknown_isInvalid(const ENCODING *enc, const char *p) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static -void unknown_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +unknown_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); char buf[XML_UTF8_ENCODE_MAX]; for (;;) { const char *utf8; int n; if (*fromP == fromLim) break; - utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; + utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); + int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + break; utf8 = buf; - *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - - (BT_LEAD2 - 2); + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] + - (BT_LEAD2 - 2)); } else { if (n > toLim - *toP) - break; + break; (*fromP)++; } do { @@ -1235,19 +1322,19 @@ void unknown_toUtf8(const ENCODING *enc, } } -static -void unknown_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) +static void PTRCALL +unknown_toUtf16(const ENCODING *enc, + const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); while (*fromP != fromLim && *toP != toLim) { - unsigned short c - = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP]; + unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { - c = (unsigned short)((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); - *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - - (BT_LEAD2 - 2); + c = (unsigned short) + uenc->convert(uenc->userData, *fromP); + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] + - (BT_LEAD2 - 2)); } else (*fromP)++; @@ -1257,18 +1344,18 @@ void unknown_toUtf16(const ENCODING *enc, ENCODING * XmlInitUnknownEncoding(void *mem, - int *table, - int (*convert)(void *userData, const char *p), - void *userData) + int *table, + CONVERTER convert, + void *userData) { int i; - struct unknown_encoding *e = mem; - for (i = 0; i < sizeof(struct normal_encoding); i++) + struct unknown_encoding *e = (struct unknown_encoding *)mem; + for (i = 0; i < (int)sizeof(struct normal_encoding); i++) ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER && latin1_encoding.type[i] != BT_NONXML - && table[i] != i) + && table[i] != i) return 0; for (i = 0; i < 256; i++) { int c = table[i]; @@ -1281,20 +1368,20 @@ XmlInitUnknownEncoding(void *mem, } else if (c < 0) { if (c < -4) - return 0; - e->normal.type[i] = BT_LEAD2 - (c + 2); + return 0; + e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; } else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER - && latin1_encoding.type[c] != BT_NONXML - && c != i) - return 0; + && latin1_encoding.type[c] != BT_NONXML + && c != i) + return 0; e->normal.type[i] = latin1_encoding.type[c]; e->utf8[i][0] = 1; e->utf8[i][1] = (char)c; - e->utf16[i] = c == 0 ? 0xFFFF : c; + e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; @@ -1305,15 +1392,15 @@ XmlInitUnknownEncoding(void *mem, } else { if (c > 0xFFFF) - return 0; + return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) - e->normal.type[i] = BT_NMSTRT; + e->normal.type[i] = BT_NMSTRT; else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) - e->normal.type[i] = BT_NAME; + e->normal.type[i] = BT_NAME; else - e->normal.type[i] = BT_OTHER; + e->normal.type[i] = BT_OTHER; e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); - e->utf16[i] = c; + e->utf16[i] = (unsigned short)c; } } e->userData = userData; @@ -1348,50 +1435,71 @@ enum { NO_ENC }; -static -int getEncodingIndex(const char *name) +static const char KW_ISO_8859_1[] = { + ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, + ASCII_MINUS, ASCII_1, '\0' +}; +static const char KW_US_ASCII[] = { + ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, + '\0' +}; +static const char KW_UTF_8[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' +}; +static const char KW_UTF_16[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' +}; +static const char KW_UTF_16BE[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, + '\0' +}; +static const char KW_UTF_16LE[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, + '\0' +}; + +static int FASTCALL +getEncodingIndex(const char *name) { - static const char *encodingNames[] = { - "ISO-8859-1", - "US-ASCII", - "UTF-8", - "UTF-16", - "UTF-16BE" - "UTF-16LE", + static const char * const encodingNames[] = { + KW_ISO_8859_1, + KW_US_ASCII, + KW_UTF_8, + KW_UTF_16, + KW_UTF_16BE, + KW_UTF_16LE, }; int i; - if (name == 0) + if (name == NULL) return NO_ENC; - for (i = 0; i < sizeof(encodingNames)/sizeof(encodingNames[0]); i++) + for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) if (streqci(name, encodingNames[i])) return i; - - if (streqci(name, "ASCII")) - return US_ASCII_ENC; - return UNKNOWN_ENC; } -/* For binary compatibility, we store the index of the encoding specified -at initialization in the isUtf16 member. */ +/* For binary compatibility, we store the index of the encoding + specified at initialization in the isUtf16 member. +*/ -#define INIT_ENC_INDEX(enc) ((enc)->initEnc.isUtf16) +#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) +#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) -/* This is what detects the encoding. -encodingTable maps from encoding indices to encodings; -INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding; -state is XML_CONTENT_STATE if we're parsing an external text entity, -and XML_PROLOG_STATE otherwise. +/* This is what detects the encoding. encodingTable maps from + encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of + the external (protocol) specified encoding; state is + XML_CONTENT_STATE if we're parsing an external text entity, and + XML_PROLOG_STATE otherwise. */ -static -int initScan(const ENCODING **encodingTable, - const INIT_ENCODING *enc, - int state, - const char *ptr, - const char *end, - const char **nextTokPtr) +static int +initScan(const ENCODING * const *encodingTable, + const INIT_ENCODING *enc, + int state, + const char *ptr, + const char *end, + const char **nextTokPtr) { const ENCODING **encPtr; @@ -1400,9 +1508,11 @@ int initScan(const ENCODING **encodingTable, encPtr = enc->encPtr; if (ptr + 1 == end) { /* only a single byte available for auto-detection */ +#ifndef XML_DTD /* FIXME */ /* a well-formed document entity must have more than one byte */ if (state != XML_CONTENT_STATE) return XML_TOK_PARTIAL; +#endif /* so we're parsing an external text entity... */ /* if UTF-16 was externally specified, then we need at least 2 bytes */ switch (INIT_ENC_INDEX(enc)) { @@ -1416,8 +1526,8 @@ int initScan(const ENCODING **encodingTable, case 0xFF: case 0xEF: /* possibly first byte of UTF-8 BOM */ if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) - break; + && state == XML_CONTENT_STATE) + break; /* fall through */ case 0x00: case 0x3C: @@ -1428,23 +1538,23 @@ int initScan(const ENCODING **encodingTable, switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { case 0xFEFF: if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) - break; + && state == XML_CONTENT_STATE) + break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16BE_ENC]; return XML_TOK_BOM; /* 00 3C is handled in the default case */ case 0x3C00: if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC - || INIT_ENC_INDEX(enc) == UTF_16_ENC) - && state == XML_CONTENT_STATE) - break; + || INIT_ENC_INDEX(enc) == UTF_16_ENC) + && state == XML_CONTENT_STATE) + break; *encPtr = encodingTable[UTF_16LE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFFFE: if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) - break; + && state == XML_CONTENT_STATE) + break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16LE_ENC]; return XML_TOK_BOM; @@ -1453,56 +1563,64 @@ int initScan(const ENCODING **encodingTable, /* If there's an explicitly specified (external) encoding of ISO-8859-1 or some flavour of UTF-16 and this is an external text entity, - don't look for the BOM, - because it might be a legal data. */ + don't look for the BOM, + because it might be a legal data. + */ if (state == XML_CONTENT_STATE) { - int e = INIT_ENC_INDEX(enc); - if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC) - break; + int e = INIT_ENC_INDEX(enc); + if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC + || e == UTF_16LE_ENC || e == UTF_16_ENC) + break; } if (ptr + 2 == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; if ((unsigned char)ptr[2] == 0xBF) { - *encPtr = encodingTable[UTF_8_ENC]; - return XML_TOK_BOM; + *nextTokPtr = ptr + 3; + *encPtr = encodingTable[UTF_8_ENC]; + return XML_TOK_BOM; } break; default: if (ptr[0] == '\0') { - /* 0 isn't a legal data character. Furthermore a document entity can only - start with ASCII characters. So the only way this can fail to be big-endian - UTF-16 if it it's an external parsed general entity that's labelled as - UTF-16LE. */ - if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) - break; - *encPtr = encodingTable[UTF_16BE_ENC]; - return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + /* 0 isn't a legal data character. Furthermore a document + entity can only start with ASCII characters. So the only + way this can fail to be big-endian UTF-16 if it it's an + external parsed general entity that's labelled as + UTF-16LE. + */ + if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) + break; + *encPtr = encodingTable[UTF_16BE_ENC]; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } else if (ptr[1] == '\0') { - /* We could recover here in the case: - - parsing an external entity - - second byte is 0 - - no externally specified encoding - - no encoding declaration - by assuming UTF-16LE. But we don't, because this would mean when - presented just with a single byte, we couldn't reliably determine - whether we needed further bytes. */ - if (state == XML_CONTENT_STATE) - break; - *encPtr = encodingTable[UTF_16LE_ENC]; - return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + /* We could recover here in the case: + - parsing an external entity + - second byte is 0 + - no externally specified encoding + - no encoding declaration + by assuming UTF-16LE. But we don't, because this would mean when + presented just with a single byte, we couldn't reliably determine + whether we needed further bytes. + */ + if (state == XML_CONTENT_STATE) + break; + *encPtr = encodingTable[UTF_16LE_ENC]; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } break; } } - *encPtr = encodingTable[(int)INIT_ENC_INDEX(enc)]; + *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } #define NS(x) x #define ns(x) x +#define XML_TOK_NS_C #include "xmltok_ns.c" +#undef XML_TOK_NS_C #undef NS #undef ns @@ -1511,20 +1629,22 @@ int initScan(const ENCODING **encodingTable, #define NS(x) x ## NS #define ns(x) x ## _ns +#define XML_TOK_NS_C #include "xmltok_ns.c" +#undef XML_TOK_NS_C #undef NS #undef ns ENCODING * XmlInitUnknownEncodingNS(void *mem, - int *table, - int (*convert)(void *userData, const char *p), - void *userData) + int *table, + CONVERTER convert, + void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) - ((struct normal_encoding *)enc)->type[':'] = BT_COLON; + ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; return enc; } diff --git a/simgear/xml/xmltok.h b/simgear/xml/xmltok.h index 06544d15..ca867aa6 100644 --- a/simgear/xml/xmltok.h +++ b/simgear/xml/xmltok.h @@ -1,31 +1,5 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifndef XmlTok_INCLUDED @@ -35,24 +9,22 @@ your version of this file under either the MPL or the GPL. extern "C" { #endif -#ifndef XMLTOKAPI -#define XMLTOKAPI /* as nothing */ -#endif - /* The following token may be returned by XmlContentTok */ -#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of - illegal ]]> sequence */ -/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_NONE -4 /* The string to be scanned is empty */ -#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; - might be part of CRLF sequence */ -#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ -#define XML_TOK_PARTIAL -1 /* only part of a token */ +#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be + start of illegal ]]> sequence */ +/* The following tokens may be returned by both XmlPrologTok and + XmlContentTok. +*/ +#define XML_TOK_NONE -4 /* The string to be scanned is empty */ +#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; + might be part of CRLF sequence */ +#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ +#define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also - returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ - + returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. +*/ #define XML_TOK_START_TAG_WITH_ATTS 1 #define XML_TOK_START_TAG_NO_ATTS 2 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag */ @@ -62,22 +34,24 @@ extern "C" { #define XML_TOK_DATA_NEWLINE 7 #define XML_TOK_CDATA_SECT_OPEN 8 #define XML_TOK_ENTITY_REF 9 -#define XML_TOK_CHAR_REF 10 /* numeric character reference */ +#define XML_TOK_CHAR_REF 10 /* numeric character reference */ -/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_PI 11 /* processing instruction */ -#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ +/* The following tokens may be returned by both XmlPrologTok and + XmlContentTok. +*/ +#define XML_TOK_PI 11 /* processing instruction */ +#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_COMMENT 13 -#define XML_TOK_BOM 14 /* Byte order mark */ +#define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ #define XML_TOK_PROLOG_S 15 -#define XML_TOK_DECL_OPEN 16 /* */ +#define XML_TOK_DECL_OPEN 16 /* */ #define XML_TOK_NAME 18 #define XML_TOK_NMTOKEN 19 -#define XML_TOK_POUND_NAME 20 /* #name */ -#define XML_TOK_OR 21 /* | */ +#define XML_TOK_POUND_NAME 20 /* #name */ +#define XML_TOK_OR 21 /* | */ #define XML_TOK_PERCENT 22 #define XML_TOK_OPEN_PAREN 23 #define XML_TOK_CLOSE_PAREN 24 @@ -88,14 +62,14 @@ extern "C" { #define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_NAME_QUESTION 30 /* name? */ -#define XML_TOK_NAME_ASTERISK 31 /* name* */ -#define XML_TOK_NAME_PLUS 32 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 33 /* */ -#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ -#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ +#define XML_TOK_NAME_QUESTION 30 /* name? */ +#define XML_TOK_NAME_ASTERISK 31 /* name* */ +#define XML_TOK_NAME_PLUS 32 /* name+ */ +#define XML_TOK_COND_SECT_OPEN 33 /* */ +#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ +#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ +#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ #define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ @@ -104,14 +78,27 @@ extern "C" { /* The following token is returned only by XmlCdataSectionTok */ #define XML_TOK_CDATA_SECT_CLOSE 40 -/* With namespace processing this is returned by XmlPrologTok - for a name with a colon. */ +/* With namespace processing this is returned by XmlPrologTok for a + name with a colon. +*/ #define XML_TOK_PREFIXED_NAME 41 +#ifdef XML_DTD +#define XML_TOK_IGNORE_SECT 42 +#endif /* XML_DTD */ + +#ifdef XML_DTD +#define XML_N_STATES 4 +#else /* not XML_DTD */ #define XML_N_STATES 3 +#endif /* not XML_DTD */ + #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 #define XML_CDATA_SECTION_STATE 2 +#ifdef XML_DTD +#define XML_IGNORE_SECTION_STATE 3 +#endif /* XML_DTD */ #define XML_N_LITERAL_TYPES 2 #define XML_ATTRIBUTE_VALUE_LITERAL 0 @@ -124,8 +111,8 @@ extern "C" { typedef struct position { /* first line and first column are 0 not 1 */ - unsigned long lineNumber; - unsigned long columnNumber; + XML_Size lineNumber; + XML_Size columnNumber; } POSITION; typedef struct { @@ -138,64 +125,73 @@ typedef struct { struct encoding; typedef struct encoding ENCODING; +typedef int (PTRCALL *SCANNER)(const ENCODING *, + const char *, + const char *, + const char **); + struct encoding { - int (*scanners[XML_N_STATES])(const ENCODING *, - const char *, - const char *, - const char **); - int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *, - const char *, - const char *, - const char **); - int (*sameName)(const ENCODING *, - const char *, const char *); - int (*nameMatchesAscii)(const ENCODING *, - const char *, const char *); - int (*nameLength)(const ENCODING *, const char *); - const char *(*skipS)(const ENCODING *, const char *); - int (*getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts); - int (*charRefNumber)(const ENCODING *enc, const char *ptr); - int (*predefinedEntityName)(const ENCODING *, const char *, const char *); - void (*updatePosition)(const ENCODING *, - const char *ptr, - const char *end, - POSITION *); - int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr); - void (*utf8Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - char **toP, - const char *toLim); - void (*utf16Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - unsigned short **toP, - const unsigned short *toLim); + SCANNER scanners[XML_N_STATES]; + SCANNER literalScanners[XML_N_LITERAL_TYPES]; + int (PTRCALL *sameName)(const ENCODING *, + const char *, + const char *); + int (PTRCALL *nameMatchesAscii)(const ENCODING *, + const char *, + const char *, + const char *); + int (PTRFASTCALL *nameLength)(const ENCODING *, const char *); + const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *); + int (PTRCALL *getAtts)(const ENCODING *enc, + const char *ptr, + int attsMax, + ATTRIBUTE *atts); + int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); + int (PTRCALL *predefinedEntityName)(const ENCODING *, + const char *, + const char *); + void (PTRCALL *updatePosition)(const ENCODING *, + const char *ptr, + const char *end, + POSITION *); + int (PTRCALL *isPublicId)(const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr); + void (PTRCALL *utf8Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, + char **toP, + const char *toLim); + void (PTRCALL *utf16Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, + unsigned short **toP, + const unsigned short *toLim); int minBytesPerChar; char isUtf8; char isUtf16; }; -/* -Scan the string starting at ptr until the end of the next complete token, -but do not scan past eptr. Return an integer giving the type of token. +/* Scan the string starting at ptr until the end of the next complete + token, but do not scan past eptr. Return an integer giving the + type of token. -Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. + Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. -Return XML_TOK_PARTIAL when the string does not contain a complete token; -nextTokPtr will not be set. + Return XML_TOK_PARTIAL when the string does not contain a complete + token; nextTokPtr will not be set. -Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr -will be set to point to the character which made the token invalid. + Return XML_TOK_INVALID when the string does not start a valid + token; nextTokPtr will be set to point to the character which made + the token invalid. -Otherwise the string starts with a valid token; nextTokPtr will be set to point -to the character following the end of that token. + Otherwise the string starts with a valid token; nextTokPtr will be + set to point to the character following the end of that token. -Each data character counts as a single token, but adjacent data characters -may be returned together. Similarly for characters in the prolog outside -literals, comments and processing instructions. + Each data character counts as a single token, but adjacent data + characters may be returned together. Similarly for characters in + the prolog outside literals, comments and processing instructions. */ @@ -211,9 +207,16 @@ literals, comments and processing instructions. #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) -/* This is used for performing a 2nd-level tokenization on -the content of a literal that has already been returned by XmlTok. */ +#ifdef XML_DTD + +#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) +#endif /* XML_DTD */ + +/* This is used for performing a 2nd-level tokenization on the content + of a literal that has already been returned by XmlTok. +*/ #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) @@ -225,8 +228,8 @@ the content of a literal that has already been returned by XmlTok. */ #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) -#define XmlNameMatchesAscii(enc, ptr1, ptr2) \ - (((enc)->nameMatchesAscii)(enc, ptr1, ptr2)) +#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ + (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) #define XmlNameLength(enc, ptr) \ (((enc)->nameLength)(enc, ptr)) @@ -260,46 +263,52 @@ typedef struct { const ENCODING **encPtr; } INIT_ENCODING; -int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); - -int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding(); -const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding(); -int XMLTOKAPI XmlUtf8Encode(int charNumber, char *buf); -int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf); - -int XMLTOKAPI XmlSizeOfUnknownEncoding(); -ENCODING XMLTOKAPI * +int XmlParseXmlDecl(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, + int *standalonePtr); + +int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncoding(void); +const ENCODING *XmlGetUtf16InternalEncoding(void); +int FASTCALL XmlUtf8Encode(int charNumber, char *buf); +int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf); +int XmlSizeOfUnknownEncoding(void); + + +typedef int (XMLCALL *CONVERTER) (void *userData, const char *p); + +ENCODING * XmlInitUnknownEncoding(void *mem, - int *table, - int (*conv)(void *userData, const char *p), - void *userData); - -int XMLTOKAPI XmlParseXmlDeclNS(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); -int XMLTOKAPI XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS(); -const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS(); -ENCODING XMLTOKAPI * + int *table, + CONVERTER convert, + void *userData); + +int XmlParseXmlDeclNS(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, + int *standalonePtr); + +int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncodingNS(void); +const ENCODING *XmlGetUtf16InternalEncodingNS(void); +ENCODING * XmlInitUnknownEncodingNS(void *mem, - int *table, - int (*conv)(void *userData, const char *p), - void *userData); + int *table, + CONVERTER convert, + void *userData); #ifdef __cplusplus } #endif diff --git a/simgear/xml/xmltok_impl.c b/simgear/xml/xmltok_impl.c index c52539be..9c2895b8 100644 --- a/simgear/xml/xmltok_impl.c +++ b/simgear/xml/xmltok_impl.c @@ -1,33 +1,10 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ +/* This file is included! */ +#ifdef XML_TOK_IMPL_C + #ifndef IS_INVALID_CHAR #define IS_INVALID_CHAR(enc, ptr, n) (0) #endif @@ -35,7 +12,7 @@ your version of this file under either the MPL or the GPL. #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ + return XML_TOK_PARTIAL_CHAR; \ if (IS_INVALID_CHAR(enc, ptr, n)) { \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; \ @@ -112,12 +89,12 @@ your version of this file under either the MPL or the GPL. /* ptr points to character following "')) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_COMMENT; - } - break; + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_COMMENT; + } + break; default: - ptr += MINBPC(enc); - break; + ptr += MINBPC(enc); + break; } } } @@ -150,9 +127,9 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following " */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } /* fall through */ case BT_S: case BT_CR: case BT_LF: @@ -197,17 +174,18 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) +static int PTRCALL +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, + const char *end, int *tokPtr) { int upper = 0; *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { - case 'x': + case ASCII_x: break; - case 'X': + case ASCII_X: upper = 1; break; default: @@ -215,9 +193,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { - case 'm': + case ASCII_m: break; - case 'M': + case ASCII_M: upper = 1; break; default: @@ -225,9 +203,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { - case 'l': + case ASCII_l: break; - case 'L': + case ASCII_L: upper = 1; break; default: @@ -241,9 +219,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "')) { - *nextTokPtr = ptr + MINBPC(enc); - return tok; - } - break; - default: - ptr += MINBPC(enc); - break; - } + case BT_QUEST: + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr + MINBPC(enc); + return tok; + } + break; + default: + ptr += MINBPC(enc); + break; + } } return XML_TOK_PARTIAL; case BT_QUEST: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '>')) { - *nextTokPtr = ptr + MINBPC(enc); - return tok; + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr + MINBPC(enc); + return tok; } /* fall through */ default: @@ -303,17 +281,18 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } - -static -int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { + static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, + ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC(enc)) return XML_TOK_PARTIAL; for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { - if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) { + if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -322,9 +301,9 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e return XML_TOK_CDATA_SECT_OPEN; } -static -int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_NONE; @@ -333,7 +312,7 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; end = ptr + n; } } @@ -342,12 +321,12 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ']')) + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -374,8 +353,8 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; @@ -400,9 +379,9 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en /* ptr points to character following "')) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - break; - } - break; + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + break; + } + break; } default: *nextTokPtr = ptr; @@ -702,9 +681,9 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "<" */ -static -int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { #ifdef XML_NS int hadColon; @@ -720,7 +699,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LSQB: - return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), + end, nextTokPtr); } *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -742,13 +722,13 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS case BT_COLON: if (hadColon) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } hadColon = 1; ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -760,23 +740,23 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: - goto gt; - case BT_SOL: - goto sol; - case BT_S: case BT_CR: case BT_LF: - ptr += MINBPC(enc); - continue; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); - } - return XML_TOK_PARTIAL; + while (ptr != end) { + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + case BT_GT: + goto gt; + case BT_SOL: + goto sol; + case BT_S: case BT_CR: case BT_LF: + ptr += MINBPC(enc); + continue; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); + } + return XML_TOK_PARTIAL; } case BT_GT: gt: @@ -786,10 +766,10 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, sol: ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + return XML_TOK_PARTIAL; + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_EMPTY_ELEMENT_NO_ATTS; @@ -801,9 +781,9 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr == end) return XML_TOK_NONE; @@ -812,7 +792,7 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; end = ptr + n; } } @@ -836,12 +816,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ']')) + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -857,8 +837,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; @@ -866,18 +846,18 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, #undef LEAD_CASE case BT_RSQB: if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) { - ptr += MINBPC(enc); - break; - } - if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) { - ptr += MINBPC(enc); - break; - } - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_INVALID; - } + if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { + ptr += MINBPC(enc); + break; + } + if (ptr + 2*MINBPC(enc) != end) { + if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { + ptr += MINBPC(enc); + break; + } + *nextTokPtr = ptr + 2*MINBPC(enc); + return XML_TOK_INVALID; + } } /* fall through */ case BT_AMP: @@ -900,9 +880,9 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "%" */ -static -int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; @@ -929,9 +909,9 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; @@ -953,13 +933,13 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - return XML_TOK_PARTIAL; + return -XML_TOK_POUND_NAME; } -static -int PREFIX(scanLit)(int open, const ENCODING *enc, - const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanLit)(int open, const ENCODING *enc, + const char *ptr, const char *end, + const char **nextTokPtr) { while (ptr != end) { int t = BYTE_TYPE(enc, ptr); @@ -969,16 +949,16 @@ int PREFIX(scanLit)(int open, const ENCODING *enc, case BT_APOS: ptr += MINBPC(enc); if (t != open) - break; + break; if (ptr == end) - return XML_TOK_PARTIAL; + return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: case BT_GT: case BT_PERCNT: case BT_LSQB: - return XML_TOK_LITERAL; + return XML_TOK_LITERAL; default: - return XML_TOK_INVALID; + return XML_TOK_INVALID; } default: ptr += MINBPC(enc); @@ -988,9 +968,9 @@ int PREFIX(scanLit)(int open, const ENCODING *enc, return XML_TOK_PARTIAL; } -static -int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int tok; if (ptr == end) @@ -1000,7 +980,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; end = ptr + n; } } @@ -1013,44 +993,47 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, { ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: - return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_NMSTRT: case BT_HEX: case BT_NONASCII: case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: - *nextTokPtr = ptr - MINBPC(enc); - return XML_TOK_INSTANCE_START; + *nextTokPtr = ptr - MINBPC(enc); + return XML_TOK_INSTANCE_START; } *nextTokPtr = ptr; return XML_TOK_INVALID; } case BT_CR: - if (ptr + MINBPC(enc) == end) - return XML_TOK_TRAILING_CR; + if (ptr + MINBPC(enc) == end) { + *nextTokPtr = end; + /* indicate that this might be part of a CR/LF pair */ + return -XML_TOK_PROLOG_S; + } /* fall through */ case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); if (ptr == end) - break; + break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: - break; + break; case BT_CR: - /* don't split CR/LF pair */ - if (ptr + MINBPC(enc) != end) - break; - /* fall through */ + /* don't split CR/LF pair */ + if (ptr + MINBPC(enc) != end) + break; + /* fall through */ default: - *nextTokPtr = ptr; - return XML_TOK_PROLOG_S; + *nextTokPtr = ptr; + return XML_TOK_PROLOG_S; } } *nextTokPtr = ptr; @@ -1066,13 +1049,13 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ']')) { + return -XML_TOK_CLOSE_BRACKET; + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) { - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_COND_SECT_CLOSE; + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { + *nextTokPtr = ptr + 2*MINBPC(enc); + return XML_TOK_COND_SECT_CLOSE; } } *nextTokPtr = ptr; @@ -1083,7 +1066,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_RPAR: ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: *nextTokPtr = ptr + MINBPC(enc); @@ -1171,40 +1154,40 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; - tok = XML_TOK_PREFIXED_NAME; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - default: - tok = XML_TOK_NMTOKEN; - break; - } - break; + if (ptr == end) + return XML_TOK_PARTIAL; + tok = XML_TOK_PREFIXED_NAME; + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + default: + tok = XML_TOK_NMTOKEN; + break; + } + break; case XML_TOK_PREFIXED_NAME: - tok = XML_TOK_NMTOKEN; - break; + tok = XML_TOK_NMTOKEN; + break; } break; #endif case BT_PLUS: if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_QUESTION; @@ -1213,12 +1196,12 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - return XML_TOK_PARTIAL; + return -tok; } -static -int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) @@ -1232,7 +1215,7 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * #undef LEAD_CASE case BT_AMP: if (ptr == start) - return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LT: @@ -1241,27 +1224,27 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * return XML_TOK_INVALID; case BT_LF: if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_CR; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_TRAILING_CR; + if (BYTE_TYPE(enc, ptr) == BT_LF) + ptr += MINBPC(enc); + *nextTokPtr = ptr; + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_S: if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_ATTRIBUTE_VALUE_S; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_ATTRIBUTE_VALUE_S; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; @@ -1274,9 +1257,9 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * return XML_TOK_DATA_CHARS; } -static -int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) @@ -1290,30 +1273,33 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end #undef LEAD_CASE case BT_AMP: if (ptr == start) - return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: - if (ptr == start) - return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); + if (ptr == start) { + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), + end, nextTokPtr); + return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; + } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_CR; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_TRAILING_CR; + if (BYTE_TYPE(enc, ptr) == BT_LF) + ptr += MINBPC(enc); + *nextTokPtr = ptr; + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; @@ -1326,9 +1312,64 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end return XML_TOK_DATA_CHARS; } -static -int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr) +#ifdef XML_DTD + +static int PTRCALL +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) +{ + int level = 0; + if (MINBPC(enc) > 1) { + size_t n = end - ptr; + if (n & (MINBPC(enc) - 1)) { + n &= ~(MINBPC(enc) - 1); + end = ptr + n; + } + } + while (ptr != end) { + switch (BYTE_TYPE(enc, ptr)) { + INVALID_CASES(ptr, nextTokPtr) + case BT_LT: + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { + ++level; + ptr += MINBPC(enc); + } + } + break; + case BT_RSQB: + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { + ptr += MINBPC(enc); + if (level == 0) { + *nextTokPtr = ptr; + return XML_TOK_IGNORE_SECT; + } + --level; + } + } + break; + default: + ptr += MINBPC(enc); + break; + } + } + return XML_TOK_PARTIAL; +} + +#endif /* XML_DTD */ + +static int PTRCALL +PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, + const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); @@ -1357,23 +1398,23 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, #endif break; case BT_S: - if (CHAR_MATCHES(enc, ptr, '\t')) { - *badPtr = ptr; - return 0; + if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { + *badPtr = ptr; + return 0; } break; case BT_NAME: case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) - break; + break; default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ case 0x40: /* @ */ - break; + break; default: - *badPtr = ptr; - return 0; + *badPtr = ptr; + return 0; } break; } @@ -1381,27 +1422,29 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, return 1; } -/* This must only be called for a well-formed start-tag or empty element tag. -Returns the number of attributes. Pointers to the first attsMax attributes -are stored in atts. */ +/* This must only be called for a well-formed start-tag or empty + element tag. Returns the number of attributes. Pointers to the + first attsMax attributes are stored in atts. +*/ -static -int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts) +static int PTRCALL +PREFIX(getAtts)(const ENCODING *enc, const char *ptr, + int attsMax, ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; - int open = 0; + int open = 0; /* defined when state == inValue; + initialization just to shut up compilers */ for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { #define START_NAME \ if (state == other) { \ - if (nAtts < attsMax) { \ - atts[nAtts].name = ptr; \ - atts[nAtts].normalized = 1; \ - } \ - state = inName; \ + if (nAtts < attsMax) { \ + atts[nAtts].name = ptr; \ + atts[nAtts].normalized = 1; \ + } \ + state = inName; \ } #define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; @@ -1415,47 +1458,47 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, #undef START_NAME case BT_QUOT: if (state != inValue) { - if (nAtts < attsMax) - atts[nAtts].valuePtr = ptr + MINBPC(enc); + if (nAtts < attsMax) + atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; } else if (open == BT_QUOT) { state = other; - if (nAtts < attsMax) - atts[nAtts].valueEnd = ptr; - nAtts++; + if (nAtts < attsMax) + atts[nAtts].valueEnd = ptr; + nAtts++; } break; case BT_APOS: if (state != inValue) { - if (nAtts < attsMax) - atts[nAtts].valuePtr = ptr + MINBPC(enc); + if (nAtts < attsMax) + atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; } else if (open == BT_APOS) { state = other; - if (nAtts < attsMax) - atts[nAtts].valueEnd = ptr; - nAtts++; + if (nAtts < attsMax) + atts[nAtts].valueEnd = ptr; + nAtts++; } break; case BT_AMP: if (nAtts < attsMax) - atts[nAtts].normalized = 0; + atts[nAtts].normalized = 0; break; case BT_S: if (state == inName) state = other; else if (state == inValue - && nAtts < attsMax - && atts[nAtts].normalized - && (ptr == atts[nAtts].valuePtr - || BYTE_TO_ASCII(enc, ptr) != ' ' - || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' ' - || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) - atts[nAtts].normalized = 0; + && nAtts < attsMax + && atts[nAtts].normalized + && (ptr == atts[nAtts].valuePtr + || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE + || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE + || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) + atts[nAtts].normalized = 0; break; case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted @@ -1463,12 +1506,12 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, if (state == inName) state = other; else if (state == inValue && nAtts < attsMax) - atts[nAtts].normalized = 0; + atts[nAtts].normalized = 0; break; case BT_GT: case BT_SOL: if (state != inValue) - return nAtts; + return nAtts; break; default: break; @@ -1477,92 +1520,97 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, /* not reached */ } -static -int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) +static int PTRFASTCALL +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'x')) { - for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { + if (CHAR_MATCHES(enc, ptr, ASCII_x)) { + for (ptr += MINBPC(enc); + !CHAR_MATCHES(enc, ptr, ASCII_SEMI); + ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - result <<= 4; - result |= (c - '0'); - break; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - result <<= 4; - result += 10 + (c - 'A'); - break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - result <<= 4; - result += 10 + (c - 'a'); - break; + case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: + case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: + result <<= 4; + result |= (c - ASCII_0); + break; + case ASCII_A: case ASCII_B: case ASCII_C: + case ASCII_D: case ASCII_E: case ASCII_F: + result <<= 4; + result += 10 + (c - ASCII_A); + break; + case ASCII_a: case ASCII_b: case ASCII_c: + case ASCII_d: case ASCII_e: case ASCII_f: + result <<= 4; + result += 10 + (c - ASCII_a); + break; } if (result >= 0x110000) - return -1; + return -1; } } else { - for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { + for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; - result += (c - '0'); + result += (c - ASCII_0); if (result >= 0x110000) - return -1; + return -1; } } return checkCharRefNumber(result); } -static -int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) +static int PTRCALL +PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, + const char *end) { switch ((end - ptr)/MINBPC(enc)) { case 2: - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) { + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { - case 'l': - return '<'; - case 'g': - return '>'; + case ASCII_l: + return ASCII_LT; + case ASCII_g: + return ASCII_GT; } } break; case 3: - if (CHAR_MATCHES(enc, ptr, 'a')) { + if (CHAR_MATCHES(enc, ptr, ASCII_a)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'm')) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'p')) - return '&'; + if (CHAR_MATCHES(enc, ptr, ASCII_m)) { + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_p)) + return ASCII_AMP; } } break; case 4: switch (BYTE_TO_ASCII(enc, ptr)) { - case 'q': + case ASCII_q: ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'u')) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'o')) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 't')) - return '"'; - } + if (CHAR_MATCHES(enc, ptr, ASCII_u)) { + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_t)) + return ASCII_QUOT; + } } break; - case 'a': + case ASCII_a: ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'p')) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'o')) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 's')) - return '\''; - } + if (CHAR_MATCHES(enc, ptr, ASCII_p)) { + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_s)) + return ASCII_APOS; + } } break; } @@ -1570,20 +1618,20 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha return 0; } -static -int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) +static int PTRCALL +PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { for (;;) { switch (BYTE_TYPE(enc, ptr1)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (*ptr1++ != *ptr2++) \ - return 0; + return 0; LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) #undef LEAD_CASE /* fall through */ if (*ptr1++ != *ptr2++) - return 0; + return 0; break; case BT_NONASCII: case BT_NMSTRT: @@ -1595,23 +1643,23 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) case BT_NAME: case BT_MINUS: if (*ptr2++ != *ptr1++) - return 0; + return 0; if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; + if (*ptr2++ != *ptr1++) + return 0; + if (MINBPC(enc) > 2) { + if (*ptr2++ != *ptr1++) + return 0; if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } + if (*ptr2++ != *ptr1++) + return 0; + } + } } break; default: if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; + return 1; switch (BYTE_TYPE(enc, ptr2)) { case BT_LEAD2: case BT_LEAD3: @@ -1625,43 +1673,30 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) case BT_DIGIT: case BT_NAME: case BT_MINUS: - return 0; + return 0; default: - return 1; + return 1; } } } /* not reached */ } -static -int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2) +static int PTRCALL +PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, + const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { + if (ptr1 == end1) + return 0; if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } - switch (BYTE_TYPE(enc, ptr1)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } + return ptr1 == end1; } -static -int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) +static int PTRFASTCALL +PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { @@ -1682,13 +1717,13 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) ptr += MINBPC(enc); break; default: - return ptr - start; + return (int)(ptr - start); } } } -static -const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) +static const char * PTRFASTCALL +PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { @@ -1703,13 +1738,13 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) } } -static -void PREFIX(updatePosition)(const ENCODING *enc, - const char *ptr, - const char *end, - POSITION *pos) +static void PTRCALL +PREFIX(updatePosition)(const ENCODING *enc, + const char *ptr, + const char *end, + POSITION *pos) { - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -1718,7 +1753,7 @@ void PREFIX(updatePosition)(const ENCODING *enc, LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_LF: - pos->columnNumber = (unsigned)-1; + pos->columnNumber = (XML_Size)-1; pos->lineNumber++; ptr += MINBPC(enc); break; @@ -1726,8 +1761,8 @@ void PREFIX(updatePosition)(const ENCODING *enc, pos->lineNumber++; ptr += MINBPC(enc); if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - pos->columnNumber = (unsigned)-1; + ptr += MINBPC(enc); + pos->columnNumber = (XML_Size)-1; break; default: ptr += MINBPC(enc); @@ -1744,3 +1779,5 @@ void PREFIX(updatePosition)(const ENCODING *enc, #undef CHECK_NAME_CASES #undef CHECK_NMSTRT_CASE #undef CHECK_NMSTRT_CASES + +#endif /* XML_TOK_IMPL_C */ diff --git a/simgear/xml/xmltok_ns.c b/simgear/xml/xmltok_ns.c index f2541616..c3b88fdf 100644 --- a/simgear/xml/xmltok_ns.c +++ b/simgear/xml/xmltok_ns.c @@ -1,22 +1,32 @@ -const ENCODING *NS(XmlGetUtf8InternalEncoding)() +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. +*/ + +/* This file is included! */ +#ifdef XML_TOK_NS_C + +const ENCODING * +NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } -const ENCODING *NS(XmlGetUtf16InternalEncoding)() +const ENCODING * +NS(XmlGetUtf16InternalEncoding)(void) { -#if XML_BYTE_ORDER == 12 +#if BYTEORDER == 1234 return &ns(internal_little2_encoding).enc; -#elif XML_BYTE_ORDER == 21 +#elif BYTEORDER == 4321 return &ns(internal_big2_encoding).enc; #else const short n = 1; - return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc; + return (*(const char *)&n + ? &ns(internal_little2_encoding).enc + : &ns(internal_big2_encoding).enc); #endif } -static -const ENCODING *NS(encodings)[] = { +static const ENCODING * const NS(encodings)[] = { &ns(latin1_encoding).enc, &ns(ascii_encoding).enc, &ns(utf8_encoding).enc, @@ -26,26 +36,30 @@ const ENCODING *NS(encodings)[] = { &ns(utf8_encoding).enc /* NO_ENC */ }; -static -int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { - return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); + return initScan(NS(encodings), (const INIT_ENCODING *)enc, + XML_PROLOG_STATE, ptr, end, nextTokPtr); } -static -int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { - return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); + return initScan(NS(encodings), (const INIT_ENCODING *)enc, + XML_CONTENT_STATE, ptr, end, nextTokPtr); } -int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) +int +NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, + const char *name) { int i = getEncodingIndex(name); if (i == UNKNOWN_ENC) return 0; - INIT_ENC_INDEX(p) = (char)i; + SET_INIT_ENC_INDEX(p, i); p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog); p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent); p->initEnc.updatePosition = initUpdatePosition; @@ -54,8 +68,8 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n return 1; } -static -const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) +static const ENCODING * +NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { #define ENCODING_MAX 128 char buf[ENCODING_MAX]; @@ -65,7 +79,7 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha if (ptr != end) return 0; *p = 0; - if (streqci(buf, "UTF-16") && enc->minBytesPerChar == 2) + if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC) @@ -73,24 +87,29 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha return NS(encodings)[i]; } -int NS(XmlParseXmlDecl)(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) +int +NS(XmlParseXmlDecl)(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) { return doParseXmlDecl(NS(findEncoding), - isGeneralTextEntity, - enc, - ptr, - end, - badPtr, - versionPtr, - encodingName, - encoding, - standalone); + isGeneralTextEntity, + enc, + ptr, + end, + badPtr, + versionPtr, + versionEndPtr, + encodingName, + encoding, + standalone); } + +#endif /* XML_TOK_NS_C */ -- 2.39.5