2 The contents of this file are subject to the Mozilla Public License
3 Version 1.1 (the "License"); you may not use this file except in
4 compliance with the License. You may obtain a copy of the License at
5 http://www.mozilla.org/MPL/
7 Software distributed under the License is distributed on an "AS IS"
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9 License for the specific language governing rights and limitations
12 The Original Code is expat.
14 The Initial Developer of the Original Code is James Clark.
15 Portions created by James Clark are Copyright (C) 1998, 1999
16 James Clark. All Rights Reserved.
20 Alternatively, the contents of this file may be used under the terms
21 of the GNU General Public License (the "GPL"), in which case the
22 provisions of the GPL are applicable instead of those above. If you
23 wish to allow use of your version of this file only under the terms of
24 the GPL and not to allow others to use your version of this file under
25 the MPL, indicate your decision by deleting the provisions above and
26 replace them with the notice and other provisions required by the
27 GPL. If you do not delete the provisions above, a recipient may use
28 your version of this file under either the MPL or the GPL.
35 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
36 #define XmlConvert XmlUtf16Convert
37 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
38 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
39 #define XmlEncode XmlUtf16Encode
40 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
41 typedef unsigned short ICHAR;
43 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
44 #define XmlConvert XmlUtf8Convert
45 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
46 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
47 #define XmlEncode XmlUtf8Encode
48 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
55 #define XmlInitEncodingNS XmlInitEncoding
56 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
57 #undef XmlGetInternalEncodingNS
58 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
59 #define XmlParseXmlDeclNS XmlParseXmlDecl
64 #ifdef XML_UNICODE_WCHAR_T
65 #define XML_T(x) L ## x
70 /* Round up n to be a multiple of sz, where sz is a power of 2. */
71 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
75 #include "hashtable.h"
77 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
78 #define INIT_DATA_BUF_SIZE 1024
79 #define INIT_ATTS_SIZE 16
80 #define INIT_BLOCK_SIZE 1024
81 #define INIT_BUFFER_SIZE 1024
83 #define EXPAND_SPARE 24
85 typedef struct binding {
86 struct prefix *prefix;
87 struct binding *nextTagBinding;
88 struct binding *prevPrefixBinding;
89 const struct attribute_id *attId;
95 typedef struct prefix {
102 const XML_Char *localPart;
117 const XML_Char *name;
118 const XML_Char *textPtr;
120 const XML_Char *systemId;
121 const XML_Char *base;
122 const XML_Char *publicId;
123 const XML_Char *notation;
127 typedef struct block {
141 /* The XML_Char before the name is used to determine whether
142 an attribute has been specified. */
143 typedef struct attribute_id {
151 const ATTRIBUTE_ID *id;
153 const XML_Char *value;
157 const XML_Char *name;
160 int allocDefaultAtts;
161 DEFAULT_ATTRIBUTE *defaultAtts;
165 HASH_TABLE generalEntities;
166 HASH_TABLE elementTypes;
167 HASH_TABLE attributeIds;
172 const XML_Char *base;
173 PREFIX defaultPrefix;
176 typedef struct open_internal_entity {
177 const char *internalEventPtr;
178 const char *internalEventEndPtr;
179 struct open_internal_entity *next;
181 } OPEN_INTERNAL_ENTITY;
183 typedef enum XML_Error Processor(XML_Parser parser,
186 const char **endPtr);
188 static Processor prologProcessor;
189 static Processor prologInitProcessor;
190 static Processor contentProcessor;
191 static Processor cdataSectionProcessor;
192 static Processor epilogProcessor;
193 static Processor externalEntityInitProcessor;
194 static Processor externalEntityInitProcessor2;
195 static Processor externalEntityInitProcessor3;
196 static Processor externalEntityContentProcessor;
198 static enum XML_Error
199 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
200 static enum XML_Error
201 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
202 static enum XML_Error
203 initializeEncoding(XML_Parser parser);
204 static enum XML_Error
205 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
206 const char *start, const char *end, const char **endPtr);
207 static enum XML_Error
208 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
209 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
210 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
212 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
214 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
215 static enum XML_Error
216 storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
218 static enum XML_Error
219 appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
221 static ATTRIBUTE_ID *
222 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
223 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
224 static enum XML_Error
225 storeEntityValue(XML_Parser parser, const char *start, const char *end);
227 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
229 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
231 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
233 static const XML_Char *getContext(XML_Parser parser);
234 static int setContext(XML_Parser parser, const XML_Char *context);
235 static void normalizePublicId(XML_Char *s);
236 static int dtdInit(DTD *);
237 static void dtdDestroy(DTD *);
238 static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
239 static void poolInit(STRING_POOL *);
240 static void poolClear(STRING_POOL *);
241 static void poolDestroy(STRING_POOL *);
242 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
243 const char *ptr, const char *end);
244 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
245 const char *ptr, const char *end);
246 static int poolGrow(STRING_POOL *pool);
247 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
248 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
250 #define poolStart(pool) ((pool)->start)
251 #define poolEnd(pool) ((pool)->ptr)
252 #define poolLength(pool) ((pool)->ptr - (pool)->start)
253 #define poolChop(pool) ((void)--(pool->ptr))
254 #define poolLastChar(pool) (((pool)->ptr)[-1])
255 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
256 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
257 #define poolAppendChar(pool, c) \
258 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
260 : ((*((pool)->ptr)++ = c), 1))
263 /* The first member must be userData so that the XML_GetUserData macro works. */
267 /* first character to be parsed */
268 const char *m_bufferPtr;
269 /* past last character to be parsed */
271 /* allocated end of buffer */
272 const char *m_bufferLim;
273 long m_parseEndByteIndex;
274 const char *m_parseEndPtr;
276 XML_Char *m_dataBufEnd;
277 XML_StartElementHandler m_startElementHandler;
278 XML_EndElementHandler m_endElementHandler;
279 XML_CharacterDataHandler m_characterDataHandler;
280 XML_ProcessingInstructionHandler m_processingInstructionHandler;
281 XML_CommentHandler m_commentHandler;
282 XML_StartCdataSectionHandler m_startCdataSectionHandler;
283 XML_EndCdataSectionHandler m_endCdataSectionHandler;
284 XML_DefaultHandler m_defaultHandler;
285 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
286 XML_NotationDeclHandler m_notationDeclHandler;
287 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
288 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
289 XML_NotStandaloneHandler m_notStandaloneHandler;
290 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
291 void *m_externalEntityRefHandlerArg;
292 XML_UnknownEncodingHandler m_unknownEncodingHandler;
293 const ENCODING *m_encoding;
294 INIT_ENCODING m_initEncoding;
295 const XML_Char *m_protocolEncodingName;
297 void *m_unknownEncodingMem;
298 void *m_unknownEncodingData;
299 void *m_unknownEncodingHandlerData;
300 void (*m_unknownEncodingRelease)(void *);
301 PROLOG_STATE m_prologState;
302 Processor *m_processor;
303 enum XML_Error m_errorCode;
304 const char *m_eventPtr;
305 const char *m_eventEndPtr;
306 const char *m_positionPtr;
307 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
308 int m_defaultExpandInternalEntities;
310 ENTITY *m_declEntity;
311 const XML_Char *m_declNotationName;
312 const XML_Char *m_declNotationPublicId;
313 ELEMENT_TYPE *m_declElementType;
314 ATTRIBUTE_ID *m_declAttributeId;
315 char m_declAttributeIsCdata;
319 BINDING *m_inheritedBindings;
320 BINDING *m_freeBindingList;
322 int m_nSpecifiedAtts;
325 STRING_POOL m_tempPool;
326 STRING_POOL m_temp2Pool;
327 char *m_groupConnector;
328 unsigned m_groupSize;
329 int m_hadExternalDoctype;
330 XML_Char m_namespaceSeparator;
333 #define userData (((Parser *)parser)->m_userData)
334 #define handlerArg (((Parser *)parser)->m_handlerArg)
335 #define startElementHandler (((Parser *)parser)->m_startElementHandler)
336 #define endElementHandler (((Parser *)parser)->m_endElementHandler)
337 #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
338 #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
339 #define commentHandler (((Parser *)parser)->m_commentHandler)
340 #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
341 #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
342 #define defaultHandler (((Parser *)parser)->m_defaultHandler)
343 #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
344 #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
345 #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
346 #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
347 #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
348 #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
349 #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
350 #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
351 #define encoding (((Parser *)parser)->m_encoding)
352 #define initEncoding (((Parser *)parser)->m_initEncoding)
353 #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
354 #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
355 #define unknownEncodingHandlerData \
356 (((Parser *)parser)->m_unknownEncodingHandlerData)
357 #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
358 #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
359 #define ns (((Parser *)parser)->m_ns)
360 #define prologState (((Parser *)parser)->m_prologState)
361 #define processor (((Parser *)parser)->m_processor)
362 #define errorCode (((Parser *)parser)->m_errorCode)
363 #define eventPtr (((Parser *)parser)->m_eventPtr)
364 #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
365 #define positionPtr (((Parser *)parser)->m_positionPtr)
366 #define position (((Parser *)parser)->m_position)
367 #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
368 #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
369 #define tagLevel (((Parser *)parser)->m_tagLevel)
370 #define buffer (((Parser *)parser)->m_buffer)
371 #define bufferPtr (((Parser *)parser)->m_bufferPtr)
372 #define bufferEnd (((Parser *)parser)->m_bufferEnd)
373 #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
374 #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
375 #define bufferLim (((Parser *)parser)->m_bufferLim)
376 #define dataBuf (((Parser *)parser)->m_dataBuf)
377 #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
378 #define dtd (((Parser *)parser)->m_dtd)
379 #define declEntity (((Parser *)parser)->m_declEntity)
380 #define declNotationName (((Parser *)parser)->m_declNotationName)
381 #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
382 #define declElementType (((Parser *)parser)->m_declElementType)
383 #define declAttributeId (((Parser *)parser)->m_declAttributeId)
384 #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
385 #define freeTagList (((Parser *)parser)->m_freeTagList)
386 #define freeBindingList (((Parser *)parser)->m_freeBindingList)
387 #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
388 #define tagStack (((Parser *)parser)->m_tagStack)
389 #define atts (((Parser *)parser)->m_atts)
390 #define attsSize (((Parser *)parser)->m_attsSize)
391 #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
392 #define tempPool (((Parser *)parser)->m_tempPool)
393 #define temp2Pool (((Parser *)parser)->m_temp2Pool)
394 #define groupConnector (((Parser *)parser)->m_groupConnector)
395 #define groupSize (((Parser *)parser)->m_groupSize)
396 #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
397 #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
401 Parser *asParser(XML_Parser parser)
408 XML_Parser XML_ParserCreate(const XML_Char *encodingName)
410 XML_Parser parser = malloc(sizeof(Parser));
413 processor = prologInitProcessor;
414 XmlPrologStateInit(&prologState);
417 startElementHandler = 0;
418 endElementHandler = 0;
419 characterDataHandler = 0;
420 processingInstructionHandler = 0;
422 startCdataSectionHandler = 0;
423 endCdataSectionHandler = 0;
425 unparsedEntityDeclHandler = 0;
426 notationDeclHandler = 0;
427 startNamespaceDeclHandler = 0;
428 endNamespaceDeclHandler = 0;
429 notStandaloneHandler = 0;
430 externalEntityRefHandler = 0;
431 externalEntityRefHandlerArg = parser;
432 unknownEncodingHandler = 0;
436 parseEndByteIndex = 0;
442 declNotationName = 0;
443 declNotationPublicId = 0;
444 memset(&position, 0, sizeof(POSITION));
445 errorCode = XML_ERROR_NONE;
449 openInternalEntities = 0;
454 inheritedBindings = 0;
455 attsSize = INIT_ATTS_SIZE;
456 atts = malloc(attsSize * sizeof(ATTRIBUTE));
458 dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
461 hadExternalDoctype = 0;
462 unknownEncodingMem = 0;
463 unknownEncodingRelease = 0;
464 unknownEncodingData = 0;
465 unknownEncodingHandlerData = 0;
466 namespaceSeparator = '!';
469 poolInit(&temp2Pool);
470 protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
471 if (!dtdInit(&dtd) || !atts || !dataBuf
472 || (encodingName && !protocolEncodingName)) {
473 XML_ParserFree(parser);
476 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
477 XmlInitEncoding(&initEncoding, &encoding, 0);
481 XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
484 const XML_Char implicitContext[] = {
485 XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
486 XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
487 XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
488 XML_T('.'), XML_T('w'), XML_T('3'),
489 XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
490 XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
491 XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
492 XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
493 XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
497 XML_Parser parser = XML_ParserCreate(encodingName);
499 XmlInitEncodingNS(&initEncoding, &encoding, 0);
501 namespaceSeparator = nsSep;
503 if (!setContext(parser, implicitContext)) {
504 XML_ParserFree(parser);
510 int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
513 protocolEncodingName = 0;
515 protocolEncodingName = poolCopyString(&tempPool, encodingName);
516 if (!protocolEncodingName)
522 XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
523 const XML_Char *context,
524 const XML_Char *encodingName)
526 XML_Parser parser = oldParser;
528 XML_StartElementHandler oldStartElementHandler = startElementHandler;
529 XML_EndElementHandler oldEndElementHandler = endElementHandler;
530 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
531 XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
532 XML_CommentHandler oldCommentHandler = commentHandler;
533 XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
534 XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
535 XML_DefaultHandler oldDefaultHandler = defaultHandler;
536 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
537 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
538 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
539 XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
540 XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
541 void *oldUserData = userData;
542 void *oldHandlerArg = handlerArg;
543 int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
544 void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
547 ? XML_ParserCreateNS(encodingName, namespaceSeparator)
548 : XML_ParserCreate(encodingName));
551 startElementHandler = oldStartElementHandler;
552 endElementHandler = oldEndElementHandler;
553 characterDataHandler = oldCharacterDataHandler;
554 processingInstructionHandler = oldProcessingInstructionHandler;
555 commentHandler = oldCommentHandler;
556 startCdataSectionHandler = oldStartCdataSectionHandler;
557 endCdataSectionHandler = oldEndCdataSectionHandler;
558 defaultHandler = oldDefaultHandler;
559 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
560 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
561 notStandaloneHandler = oldNotStandaloneHandler;
562 externalEntityRefHandler = oldExternalEntityRefHandler;
563 unknownEncodingHandler = oldUnknownEncodingHandler;
564 userData = oldUserData;
565 if (oldUserData == oldHandlerArg)
566 handlerArg = userData;
569 if (oldExternalEntityRefHandlerArg != oldParser)
570 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
571 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
572 if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
573 XML_ParserFree(parser);
576 processor = externalEntityInitProcessor;
581 void destroyBindings(BINDING *bindings)
584 BINDING *b = bindings;
587 bindings = b->nextTagBinding;
593 void XML_ParserFree(XML_Parser parser)
598 if (freeTagList == 0)
600 tagStack = freeTagList;
604 tagStack = tagStack->parent;
606 destroyBindings(p->bindings);
609 destroyBindings(freeBindingList);
610 destroyBindings(inheritedBindings);
611 poolDestroy(&tempPool);
612 poolDestroy(&temp2Pool);
615 free(groupConnector);
618 free(unknownEncodingMem);
619 if (unknownEncodingRelease)
620 unknownEncodingRelease(unknownEncodingData);
624 void XML_UseParserAsHandlerArg(XML_Parser parser)
629 void XML_SetUserData(XML_Parser parser, void *p)
631 if (handlerArg == userData)
632 handlerArg = userData = p;
637 int XML_SetBase(XML_Parser parser, const XML_Char *p)
640 p = poolCopyString(&dtd.pool, p);
650 const XML_Char *XML_GetBase(XML_Parser parser)
655 int XML_GetSpecifiedAttributeCount(XML_Parser parser)
657 return nSpecifiedAtts;
660 void XML_SetElementHandler(XML_Parser parser,
661 XML_StartElementHandler start,
662 XML_EndElementHandler end)
664 startElementHandler = start;
665 endElementHandler = end;
668 void XML_SetCharacterDataHandler(XML_Parser parser,
669 XML_CharacterDataHandler handler)
671 characterDataHandler = handler;
674 void XML_SetProcessingInstructionHandler(XML_Parser parser,
675 XML_ProcessingInstructionHandler handler)
677 processingInstructionHandler = handler;
680 void XML_SetCommentHandler(XML_Parser parser,
681 XML_CommentHandler handler)
683 commentHandler = handler;
686 void XML_SetCdataSectionHandler(XML_Parser parser,
687 XML_StartCdataSectionHandler start,
688 XML_EndCdataSectionHandler end)
690 startCdataSectionHandler = start;
691 endCdataSectionHandler = end;
694 void XML_SetDefaultHandler(XML_Parser parser,
695 XML_DefaultHandler handler)
697 defaultHandler = handler;
698 defaultExpandInternalEntities = 0;
701 void XML_SetDefaultHandlerExpand(XML_Parser parser,
702 XML_DefaultHandler handler)
704 defaultHandler = handler;
705 defaultExpandInternalEntities = 1;
708 void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
709 XML_UnparsedEntityDeclHandler handler)
711 unparsedEntityDeclHandler = handler;
714 void XML_SetNotationDeclHandler(XML_Parser parser,
715 XML_NotationDeclHandler handler)
717 notationDeclHandler = handler;
720 void XML_SetNamespaceDeclHandler(XML_Parser parser,
721 XML_StartNamespaceDeclHandler start,
722 XML_EndNamespaceDeclHandler end)
724 startNamespaceDeclHandler = start;
725 endNamespaceDeclHandler = end;
728 void XML_SetNotStandaloneHandler(XML_Parser parser,
729 XML_NotStandaloneHandler handler)
731 notStandaloneHandler = handler;
734 void XML_SetExternalEntityRefHandler(XML_Parser parser,
735 XML_ExternalEntityRefHandler handler)
737 externalEntityRefHandler = handler;
740 void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
743 externalEntityRefHandlerArg = arg;
745 externalEntityRefHandlerArg = parser;
748 void XML_SetUnknownEncodingHandler(XML_Parser parser,
749 XML_UnknownEncodingHandler handler,
752 unknownEncodingHandler = handler;
753 unknownEncodingHandlerData = data;
756 int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
761 positionPtr = bufferPtr;
762 errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
763 if (errorCode == XML_ERROR_NONE)
765 eventEndPtr = eventPtr;
768 else if (bufferPtr == bufferEnd) {
771 parseEndByteIndex += len;
774 errorCode = processor(parser, s, parseEndPtr = s + len, 0);
775 if (errorCode == XML_ERROR_NONE)
777 eventEndPtr = eventPtr;
780 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
781 if (errorCode != XML_ERROR_NONE) {
782 eventEndPtr = eventPtr;
785 XmlUpdatePosition(encoding, positionPtr, end, &position);
786 nLeftOver = s + len - end;
788 if (buffer == 0 || nLeftOver > bufferLim - buffer) {
789 /* FIXME avoid integer overflow */
790 buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
792 errorCode = XML_ERROR_NO_MEMORY;
793 eventPtr = eventEndPtr = 0;
796 bufferLim = buffer + len * 2;
798 memcpy(buffer, end, nLeftOver);
800 bufferEnd = buffer + nLeftOver;
805 memcpy(XML_GetBuffer(parser, len), s, len);
806 return XML_ParseBuffer(parser, len, isFinal);
810 int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
812 const char *start = bufferPtr;
815 parseEndByteIndex += len;
816 errorCode = processor(parser, start, parseEndPtr = bufferEnd,
817 isFinal ? (const char **)0 : &bufferPtr);
818 if (errorCode == XML_ERROR_NONE) {
820 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
824 eventEndPtr = eventPtr;
829 void *XML_GetBuffer(XML_Parser parser, int len)
831 if (len > bufferLim - bufferEnd) {
832 /* FIXME avoid integer overflow */
833 int neededSize = len + (bufferEnd - bufferPtr);
834 if (neededSize <= bufferLim - buffer) {
835 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
836 bufferEnd = buffer + (bufferEnd - bufferPtr);
841 int bufferSize = bufferLim - bufferPtr;
843 bufferSize = INIT_BUFFER_SIZE;
846 } while (bufferSize < neededSize);
847 newBuf = malloc(bufferSize);
849 errorCode = XML_ERROR_NO_MEMORY;
852 bufferLim = newBuf + bufferSize;
854 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
857 bufferEnd = newBuf + (bufferEnd - bufferPtr);
858 bufferPtr = buffer = newBuf;
864 enum XML_Error XML_GetErrorCode(XML_Parser parser)
869 long XML_GetCurrentByteIndex(XML_Parser parser)
872 return parseEndByteIndex - (parseEndPtr - eventPtr);
876 int XML_GetCurrentByteCount(XML_Parser parser)
878 if (eventEndPtr && eventPtr)
879 return eventEndPtr - eventPtr;
883 int XML_GetCurrentLineNumber(XML_Parser parser)
886 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
887 positionPtr = eventPtr;
889 return position.lineNumber + 1;
892 int XML_GetCurrentColumnNumber(XML_Parser parser)
895 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
896 positionPtr = eventPtr;
898 return position.columnNumber;
901 void XML_DefaultCurrent(XML_Parser parser)
903 if (defaultHandler) {
904 if (openInternalEntities)
905 reportDefault(parser,
906 ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(),
907 openInternalEntities->internalEventPtr,
908 openInternalEntities->internalEventEndPtr);
910 reportDefault(parser, encoding, eventPtr, eventEndPtr);
914 const XML_LChar *XML_ErrorString(int code)
916 static const XML_LChar *message[] = {
918 XML_T("out of memory"),
919 XML_T("syntax error"),
920 XML_T("no element found"),
921 XML_T("not well-formed"),
922 XML_T("unclosed token"),
923 XML_T("unclosed token"),
924 XML_T("mismatched tag"),
925 XML_T("duplicate attribute"),
926 XML_T("junk after document element"),
927 XML_T("illegal parameter entity reference"),
928 XML_T("undefined entity"),
929 XML_T("recursive entity reference"),
930 XML_T("asynchronous entity"),
931 XML_T("reference to invalid character number"),
932 XML_T("reference to binary entity"),
933 XML_T("reference to external entity in attribute"),
934 XML_T("xml processing instruction not at start of external entity"),
935 XML_T("unknown encoding"),
936 XML_T("encoding specified in XML declaration is incorrect"),
937 XML_T("unclosed CDATA section"),
938 XML_T("error in processing external entity reference"),
939 XML_T("document is not standalone")
941 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
942 return message[code];
947 enum XML_Error contentProcessor(XML_Parser parser,
952 return doContent(parser, 0, encoding, start, end, endPtr);
956 enum XML_Error externalEntityInitProcessor(XML_Parser parser,
961 enum XML_Error result = initializeEncoding(parser);
962 if (result != XML_ERROR_NONE)
964 processor = externalEntityInitProcessor2;
965 return externalEntityInitProcessor2(parser, start, end, endPtr);
969 enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
975 int tok = XmlContentTok(encoding, start, end, &next);
980 case XML_TOK_PARTIAL:
983 return XML_ERROR_NONE;
986 return XML_ERROR_UNCLOSED_TOKEN;
987 case XML_TOK_PARTIAL_CHAR:
990 return XML_ERROR_NONE;
993 return XML_ERROR_PARTIAL_CHAR;
995 processor = externalEntityInitProcessor3;
996 return externalEntityInitProcessor3(parser, start, end, endPtr);
1000 enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1003 const char **endPtr)
1006 int tok = XmlContentTok(encoding, start, end, &next);
1008 case XML_TOK_XML_DECL:
1010 enum XML_Error result = processXmlDecl(parser, 1, start, next);
1011 if (result != XML_ERROR_NONE)
1016 case XML_TOK_PARTIAL:
1019 return XML_ERROR_NONE;
1022 return XML_ERROR_UNCLOSED_TOKEN;
1023 case XML_TOK_PARTIAL_CHAR:
1026 return XML_ERROR_NONE;
1029 return XML_ERROR_PARTIAL_CHAR;
1031 processor = externalEntityContentProcessor;
1033 return doContent(parser, 1, encoding, start, end, endPtr);
1037 enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1040 const char **endPtr)
1042 return doContent(parser, 1, encoding, start, end, endPtr);
1045 static enum XML_Error
1046 doContent(XML_Parser parser,
1048 const ENCODING *enc,
1051 const char **nextPtr)
1053 const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
1054 const char **eventPP;
1055 const char **eventEndPP;
1056 if (enc == encoding) {
1057 eventPP = &eventPtr;
1058 eventEndPP = &eventEndPtr;
1061 eventPP = &(openInternalEntities->internalEventPtr);
1062 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1066 const char *next = s; /* XmlContentTok doesn't always set the last arg */
1067 int tok = XmlContentTok(enc, s, end, &next);
1070 case XML_TOK_TRAILING_CR:
1073 return XML_ERROR_NONE;
1076 if (characterDataHandler) {
1078 characterDataHandler(handlerArg, &c, 1);
1080 else if (defaultHandler)
1081 reportDefault(parser, enc, s, end);
1082 if (startTagLevel == 0)
1083 return XML_ERROR_NO_ELEMENTS;
1084 if (tagLevel != startTagLevel)
1085 return XML_ERROR_ASYNC_ENTITY;
1086 return XML_ERROR_NONE;
1090 return XML_ERROR_NONE;
1092 if (startTagLevel > 0) {
1093 if (tagLevel != startTagLevel)
1094 return XML_ERROR_ASYNC_ENTITY;
1095 return XML_ERROR_NONE;
1097 return XML_ERROR_NO_ELEMENTS;
1098 case XML_TOK_INVALID:
1100 return XML_ERROR_INVALID_TOKEN;
1101 case XML_TOK_PARTIAL:
1104 return XML_ERROR_NONE;
1106 return XML_ERROR_UNCLOSED_TOKEN;
1107 case XML_TOK_PARTIAL_CHAR:
1110 return XML_ERROR_NONE;
1112 return XML_ERROR_PARTIAL_CHAR;
1113 case XML_TOK_ENTITY_REF:
1115 const XML_Char *name;
1117 XML_Char ch = XmlPredefinedEntityName(enc,
1118 s + enc->minBytesPerChar,
1119 next - enc->minBytesPerChar);
1121 if (characterDataHandler)
1122 characterDataHandler(handlerArg, &ch, 1);
1123 else if (defaultHandler)
1124 reportDefault(parser, enc, s, next);
1127 name = poolStoreString(&dtd.pool, enc,
1128 s + enc->minBytesPerChar,
1129 next - enc->minBytesPerChar);
1131 return XML_ERROR_NO_MEMORY;
1132 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1133 poolDiscard(&dtd.pool);
1135 if (dtd.complete || dtd.standalone)
1136 return XML_ERROR_UNDEFINED_ENTITY;
1138 reportDefault(parser, enc, s, next);
1142 return XML_ERROR_RECURSIVE_ENTITY_REF;
1143 if (entity->notation)
1144 return XML_ERROR_BINARY_ENTITY_REF;
1146 if (entity->textPtr) {
1147 enum XML_Error result;
1148 OPEN_INTERNAL_ENTITY openEntity;
1149 if (defaultHandler && !defaultExpandInternalEntities) {
1150 reportDefault(parser, enc, s, next);
1154 openEntity.next = openInternalEntities;
1155 openInternalEntities = &openEntity;
1156 openEntity.entity = entity;
1157 openEntity.internalEventPtr = 0;
1158 openEntity.internalEventEndPtr = 0;
1159 result = doContent(parser,
1162 (char *)entity->textPtr,
1163 (char *)(entity->textPtr + entity->textLen),
1166 openInternalEntities = openEntity.next;
1170 else if (externalEntityRefHandler) {
1171 const XML_Char *context;
1173 context = getContext(parser);
1176 return XML_ERROR_NO_MEMORY;
1177 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
1182 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1183 poolDiscard(&tempPool);
1185 else if (defaultHandler)
1186 reportDefault(parser, enc, s, next);
1190 case XML_TOK_START_TAG_WITH_ATTS:
1191 if (!startElementHandler) {
1192 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1197 case XML_TOK_START_TAG_NO_ATTS:
1202 freeTagList = freeTagList->parent;
1205 tag = malloc(sizeof(TAG));
1207 return XML_ERROR_NO_MEMORY;
1208 tag->buf = malloc(INIT_TAG_BUF_SIZE);
1210 return XML_ERROR_NO_MEMORY;
1211 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1214 tag->parent = tagStack;
1216 tag->name.localPart = 0;
1217 tag->rawName = s + enc->minBytesPerChar;
1218 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1220 /* Need to guarantee that:
1221 tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
1222 if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1223 int bufSize = tag->rawNameLength * 4;
1224 bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1225 tag->buf = realloc(tag->buf, bufSize);
1227 return XML_ERROR_NO_MEMORY;
1228 tag->bufEnd = tag->buf + bufSize;
1230 memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1231 tag->rawName = tag->buf;
1234 if (startElementHandler) {
1235 enum XML_Error result;
1238 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1239 const char *fromPtr = tag->rawName;
1242 toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1244 toPtr = (XML_Char *)tag->buf;
1245 tag->name.str = toPtr;
1247 &fromPtr, rawNameEnd,
1248 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1249 if (fromPtr == rawNameEnd)
1251 bufSize = (tag->bufEnd - tag->buf) << 1;
1252 tag->buf = realloc(tag->buf, bufSize);
1254 return XML_ERROR_NO_MEMORY;
1255 tag->bufEnd = tag->buf + bufSize;
1257 tag->rawName = tag->buf;
1259 *toPtr = XML_T('\0');
1260 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1263 startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1264 poolClear(&tempPool);
1269 reportDefault(parser, enc, s, next);
1273 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1274 if (!startElementHandler) {
1275 enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1280 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1281 if (startElementHandler || endElementHandler) {
1282 const char *rawName = s + enc->minBytesPerChar;
1283 enum XML_Error result;
1284 BINDING *bindings = 0;
1286 name.str = poolStoreString(&tempPool, enc, rawName,
1287 rawName + XmlNameLength(enc, rawName));
1289 return XML_ERROR_NO_MEMORY;
1290 poolFinish(&tempPool);
1291 result = storeAtts(parser, enc, s, &name, &bindings);
1294 poolFinish(&tempPool);
1295 if (startElementHandler)
1296 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1297 if (endElementHandler) {
1298 if (startElementHandler)
1299 *eventPP = *eventEndPP;
1300 endElementHandler(handlerArg, name.str);
1302 poolClear(&tempPool);
1304 BINDING *b = bindings;
1305 if (endNamespaceDeclHandler)
1306 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1307 bindings = bindings->nextTagBinding;
1308 b->nextTagBinding = freeBindingList;
1309 freeBindingList = b;
1310 b->prefix->binding = b->prevPrefixBinding;
1313 else if (defaultHandler)
1314 reportDefault(parser, enc, s, next);
1316 return epilogProcessor(parser, next, end, nextPtr);
1318 case XML_TOK_END_TAG:
1319 if (tagLevel == startTagLevel)
1320 return XML_ERROR_ASYNC_ENTITY;
1323 const char *rawName;
1324 TAG *tag = tagStack;
1325 tagStack = tag->parent;
1326 tag->parent = freeTagList;
1328 rawName = s + enc->minBytesPerChar*2;
1329 len = XmlNameLength(enc, rawName);
1330 if (len != tag->rawNameLength
1331 || memcmp(tag->rawName, rawName, len) != 0) {
1333 return XML_ERROR_TAG_MISMATCH;
1336 if (endElementHandler && tag->name.str) {
1337 if (tag->name.localPart) {
1338 XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
1339 const XML_Char *from = tag->name.localPart;
1340 while ((*to++ = *from++) != 0)
1343 endElementHandler(handlerArg, tag->name.str);
1345 else if (defaultHandler)
1346 reportDefault(parser, enc, s, next);
1347 while (tag->bindings) {
1348 BINDING *b = tag->bindings;
1349 if (endNamespaceDeclHandler)
1350 endNamespaceDeclHandler(handlerArg, b->prefix->name);
1351 tag->bindings = tag->bindings->nextTagBinding;
1352 b->nextTagBinding = freeBindingList;
1353 freeBindingList = b;
1354 b->prefix->binding = b->prevPrefixBinding;
1357 return epilogProcessor(parser, next, end, nextPtr);
1360 case XML_TOK_CHAR_REF:
1362 int n = XmlCharRefNumber(enc, s);
1364 return XML_ERROR_BAD_CHAR_REF;
1365 if (characterDataHandler) {
1366 XML_Char buf[XML_ENCODE_MAX];
1367 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1369 else if (defaultHandler)
1370 reportDefault(parser, enc, s, next);
1373 case XML_TOK_XML_DECL:
1374 return XML_ERROR_MISPLACED_XML_PI;
1375 case XML_TOK_DATA_NEWLINE:
1376 if (characterDataHandler) {
1378 characterDataHandler(handlerArg, &c, 1);
1380 else if (defaultHandler)
1381 reportDefault(parser, enc, s, next);
1383 case XML_TOK_CDATA_SECT_OPEN:
1385 enum XML_Error result;
1386 if (startCdataSectionHandler)
1387 startCdataSectionHandler(handlerArg);
1389 /* Suppose you doing a transformation on a document that involves
1390 changing only the character data. You set up a defaultHandler
1391 and a characterDataHandler. The defaultHandler simply copies
1392 characters through. The characterDataHandler does the transformation
1393 and writes the characters out escaping them as necessary. This case
1394 will fail to work if we leave out the following two lines (because &
1395 and < inside CDATA sections will be incorrectly escaped).
1397 However, now we have a start/endCdataSectionHandler, so it seems
1398 easier to let the user deal with this. */
1400 else if (characterDataHandler)
1401 characterDataHandler(handlerArg, dataBuf, 0);
1403 else if (defaultHandler)
1404 reportDefault(parser, enc, s, next);
1405 result = doCdataSection(parser, enc, &next, end, nextPtr);
1407 processor = cdataSectionProcessor;
1412 case XML_TOK_TRAILING_RSQB:
1415 return XML_ERROR_NONE;
1417 if (characterDataHandler) {
1418 if (MUST_CONVERT(enc, s)) {
1419 ICHAR *dataPtr = (ICHAR *)dataBuf;
1420 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1421 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1424 characterDataHandler(handlerArg,
1426 (XML_Char *)end - (XML_Char *)s);
1428 else if (defaultHandler)
1429 reportDefault(parser, enc, s, end);
1430 if (startTagLevel == 0) {
1432 return XML_ERROR_NO_ELEMENTS;
1434 if (tagLevel != startTagLevel) {
1436 return XML_ERROR_ASYNC_ENTITY;
1438 return XML_ERROR_NONE;
1439 case XML_TOK_DATA_CHARS:
1440 if (characterDataHandler) {
1441 if (MUST_CONVERT(enc, s)) {
1443 ICHAR *dataPtr = (ICHAR *)dataBuf;
1444 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1446 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1453 characterDataHandler(handlerArg,
1455 (XML_Char *)next - (XML_Char *)s);
1457 else if (defaultHandler)
1458 reportDefault(parser, enc, s, next);
1461 if (!reportProcessingInstruction(parser, enc, s, next))
1462 return XML_ERROR_NO_MEMORY;
1464 case XML_TOK_COMMENT:
1465 if (!reportComment(parser, enc, s, next))
1466 return XML_ERROR_NO_MEMORY;
1470 reportDefault(parser, enc, s, next);
1473 *eventPP = s = next;
1478 /* If tagNamePtr is non-null, build a real list of attributes,
1479 otherwise just check the attributes for well-formedness. */
1481 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1482 const char *s, TAG_NAME *tagNamePtr,
1483 BINDING **bindingsPtr)
1485 ELEMENT_TYPE *elementType = 0;
1486 int nDefaultAtts = 0;
1487 const XML_Char **appAtts;
1493 const XML_Char *localPart;
1496 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0);
1498 tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
1499 if (!tagNamePtr->str)
1500 return XML_ERROR_NO_MEMORY;
1501 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
1503 return XML_ERROR_NO_MEMORY;
1504 if (ns && !setElementTypePrefix(parser, elementType))
1505 return XML_ERROR_NO_MEMORY;
1507 nDefaultAtts = elementType->nDefaultAtts;
1509 n = XmlGetAttributes(enc, s, attsSize, atts);
1510 if (n + nDefaultAtts > attsSize) {
1511 int oldAttsSize = attsSize;
1512 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1513 atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1515 return XML_ERROR_NO_MEMORY;
1516 if (n > oldAttsSize)
1517 XmlGetAttributes(enc, s, n, atts);
1519 appAtts = (const XML_Char **)atts;
1520 for (i = 0; i < n; i++) {
1521 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1523 + XmlNameLength(enc, atts[i].name));
1525 return XML_ERROR_NO_MEMORY;
1526 if ((attId->name)[-1]) {
1527 if (enc == encoding)
1528 eventPtr = atts[i].name;
1529 return XML_ERROR_DUPLICATE_ATTRIBUTE;
1531 (attId->name)[-1] = 1;
1532 appAtts[attIndex++] = attId->name;
1533 if (!atts[i].normalized) {
1534 enum XML_Error result;
1537 if (attId->maybeTokenized) {
1539 for (j = 0; j < nDefaultAtts; j++) {
1540 if (attId == elementType->defaultAtts[j].id) {
1541 isCdata = elementType->defaultAtts[j].isCdata;
1547 result = storeAttributeValue(parser, enc, isCdata,
1548 atts[i].valuePtr, atts[i].valueEnd,
1553 appAtts[attIndex] = poolStart(&tempPool);
1554 poolFinish(&tempPool);
1557 poolDiscard(&tempPool);
1559 else if (tagNamePtr) {
1560 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1561 if (appAtts[attIndex] == 0)
1562 return XML_ERROR_NO_MEMORY;
1563 poolFinish(&tempPool);
1565 if (attId->prefix && tagNamePtr) {
1567 if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
1568 return XML_ERROR_NO_MEMORY;
1574 (attId->name)[-1] = 2;
1580 nSpecifiedAtts = attIndex;
1583 for (j = 0; j < nDefaultAtts; j++) {
1584 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1585 if (!(da->id->name)[-1] && da->value) {
1586 if (da->id->prefix) {
1587 if (da->id->xmlns) {
1588 if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
1589 return XML_ERROR_NO_MEMORY;
1592 (da->id->name)[-1] = 2;
1594 appAtts[attIndex++] = da->id->name;
1595 appAtts[attIndex++] = da->value;
1599 (da->id->name)[-1] = 1;
1600 appAtts[attIndex++] = da->id->name;
1601 appAtts[attIndex++] = da->value;
1605 appAtts[attIndex] = 0;
1609 for (; i < attIndex; i += 2) {
1610 if (appAtts[i][-1] == 2) {
1612 ((XML_Char *)(appAtts[i]))[-1] = 0;
1613 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
1614 if (id->prefix->binding) {
1616 const BINDING *b = id->prefix->binding;
1617 const XML_Char *s = appAtts[i];
1618 for (j = 0; j < b->uriLen; j++) {
1619 if (!poolAppendChar(&tempPool, b->uri[j]))
1620 return XML_ERROR_NO_MEMORY;
1625 if (!poolAppendChar(&tempPool, *s))
1626 return XML_ERROR_NO_MEMORY;
1628 appAtts[i] = poolStart(&tempPool);
1629 poolFinish(&tempPool);
1635 ((XML_Char *)(appAtts[i]))[-1] = 0;
1638 for (; i < attIndex; i += 2)
1639 ((XML_Char *)(appAtts[i]))[-1] = 0;
1641 return XML_ERROR_NONE;
1642 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
1643 binding->attId->name[-1] = 0;
1644 if (elementType->prefix) {
1645 binding = elementType->prefix->binding;
1647 return XML_ERROR_NONE;
1648 localPart = tagNamePtr->str;
1649 while (*localPart++ != XML_T(':'))
1652 else if (dtd.defaultPrefix.binding) {
1653 binding = dtd.defaultPrefix.binding;
1654 localPart = tagNamePtr->str;
1657 return XML_ERROR_NONE;
1658 tagNamePtr->localPart = localPart;
1659 tagNamePtr->uriLen = binding->uriLen;
1660 i = binding->uriLen;
1662 if (i == binding->uriAlloc) {
1663 binding->uri = realloc(binding->uri, binding->uriAlloc *= 2);
1665 return XML_ERROR_NO_MEMORY;
1667 binding->uri[i++] = *localPart;
1668 } while (*localPart++);
1669 tagNamePtr->str = binding->uri;
1670 return XML_ERROR_NONE;
1674 int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
1678 for (len = 0; uri[len]; len++)
1680 if (namespaceSeparator)
1682 if (freeBindingList) {
1683 b = freeBindingList;
1684 if (len > b->uriAlloc) {
1685 b->uri = realloc(b->uri, len + EXPAND_SPARE);
1688 b->uriAlloc = len + EXPAND_SPARE;
1690 freeBindingList = b->nextTagBinding;
1693 b = malloc(sizeof(BINDING));
1696 b->uri = malloc(sizeof(XML_Char) * len + EXPAND_SPARE);
1704 memcpy(b->uri, uri, len * sizeof(XML_Char));
1705 if (namespaceSeparator)
1706 b->uri[len - 1] = namespaceSeparator;
1709 b->prevPrefixBinding = prefix->binding;
1710 if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
1711 prefix->binding = 0;
1713 prefix->binding = b;
1714 b->nextTagBinding = *bindingsPtr;
1716 if (startNamespaceDeclHandler)
1717 startNamespaceDeclHandler(handlerArg, prefix->name,
1718 prefix->binding ? uri : 0);
1722 /* The idea here is to avoid using stack for each CDATA section when
1723 the whole file is parsed with one call. */
1726 enum XML_Error cdataSectionProcessor(XML_Parser parser,
1729 const char **endPtr)
1731 enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1733 processor = contentProcessor;
1734 return contentProcessor(parser, start, end, endPtr);
1739 /* startPtr gets set to non-null is the section is closed, and to null if
1740 the section is not yet closed. */
1743 enum XML_Error doCdataSection(XML_Parser parser,
1744 const ENCODING *enc,
1745 const char **startPtr,
1747 const char **nextPtr)
1749 const char *s = *startPtr;
1750 const char **eventPP;
1751 const char **eventEndPP;
1752 if (enc == encoding) {
1753 eventPP = &eventPtr;
1755 eventEndPP = &eventEndPtr;
1758 eventPP = &(openInternalEntities->internalEventPtr);
1759 eventEndPP = &(openInternalEntities->internalEventEndPtr);
1765 int tok = XmlCdataSectionTok(enc, s, end, &next);
1768 case XML_TOK_CDATA_SECT_CLOSE:
1769 if (endCdataSectionHandler)
1770 endCdataSectionHandler(handlerArg);
1772 /* see comment under XML_TOK_CDATA_SECT_OPEN */
1773 else if (characterDataHandler)
1774 characterDataHandler(handlerArg, dataBuf, 0);
1776 else if (defaultHandler)
1777 reportDefault(parser, enc, s, next);
1779 return XML_ERROR_NONE;
1780 case XML_TOK_DATA_NEWLINE:
1781 if (characterDataHandler) {
1783 characterDataHandler(handlerArg, &c, 1);
1785 else if (defaultHandler)
1786 reportDefault(parser, enc, s, next);
1788 case XML_TOK_DATA_CHARS:
1789 if (characterDataHandler) {
1790 if (MUST_CONVERT(enc, s)) {
1792 ICHAR *dataPtr = (ICHAR *)dataBuf;
1793 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1795 characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1802 characterDataHandler(handlerArg,
1804 (XML_Char *)next - (XML_Char *)s);
1806 else if (defaultHandler)
1807 reportDefault(parser, enc, s, next);
1809 case XML_TOK_INVALID:
1811 return XML_ERROR_INVALID_TOKEN;
1812 case XML_TOK_PARTIAL_CHAR:
1815 return XML_ERROR_NONE;
1817 return XML_ERROR_PARTIAL_CHAR;
1818 case XML_TOK_PARTIAL:
1822 return XML_ERROR_NONE;
1824 return XML_ERROR_UNCLOSED_CDATA_SECTION;
1828 *eventPP = s = next;
1833 static enum XML_Error
1834 initializeEncoding(XML_Parser parser)
1838 char encodingBuf[128];
1839 if (!protocolEncodingName)
1843 for (i = 0; protocolEncodingName[i]; i++) {
1844 if (i == sizeof(encodingBuf) - 1
1845 || protocolEncodingName[i] >= 0x80
1846 || protocolEncodingName[i] < 0) {
1847 encodingBuf[0] = '\0';
1850 encodingBuf[i] = (char)protocolEncodingName[i];
1852 encodingBuf[i] = '\0';
1856 s = protocolEncodingName;
1858 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
1859 return XML_ERROR_NONE;
1860 return handleUnknownEncoding(parser, protocolEncodingName);
1863 static enum XML_Error
1864 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
1865 const char *s, const char *next)
1867 const char *encodingName = 0;
1868 const ENCODING *newEncoding = 0;
1869 const char *version;
1870 int standalone = -1;
1873 : XmlParseXmlDecl)(isGeneralTextEntity,
1882 return XML_ERROR_SYNTAX;
1883 if (!isGeneralTextEntity && standalone == 1)
1886 reportDefault(parser, encoding, s, next);
1887 if (!protocolEncodingName) {
1889 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
1890 eventPtr = encodingName;
1891 return XML_ERROR_INCORRECT_ENCODING;
1893 encoding = newEncoding;
1895 else if (encodingName) {
1896 enum XML_Error result;
1897 const XML_Char *s = poolStoreString(&tempPool,
1901 + XmlNameLength(encoding, encodingName));
1903 return XML_ERROR_NO_MEMORY;
1904 result = handleUnknownEncoding(parser, s);
1905 poolDiscard(&tempPool);
1906 if (result == XML_ERROR_UNKNOWN_ENCODING)
1907 eventPtr = encodingName;
1911 return XML_ERROR_NONE;
1914 static enum XML_Error
1915 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
1917 if (unknownEncodingHandler) {
1920 for (i = 0; i < 256; i++)
1925 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
1927 unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
1928 if (!unknownEncodingMem) {
1930 info.release(info.data);
1931 return XML_ERROR_NO_MEMORY;
1934 ? XmlInitUnknownEncodingNS
1935 : XmlInitUnknownEncoding)(unknownEncodingMem,
1940 unknownEncodingData = info.data;
1941 unknownEncodingRelease = info.release;
1943 return XML_ERROR_NONE;
1947 info.release(info.data);
1949 return XML_ERROR_UNKNOWN_ENCODING;
1952 static enum XML_Error
1953 prologInitProcessor(XML_Parser parser,
1956 const char **nextPtr)
1958 enum XML_Error result = initializeEncoding(parser);
1959 if (result != XML_ERROR_NONE)
1961 processor = prologProcessor;
1962 return prologProcessor(parser, s, end, nextPtr);
1965 static enum XML_Error
1966 prologProcessor(XML_Parser parser,
1969 const char **nextPtr)
1973 int tok = XmlPrologTok(encoding, s, end, &next);
1975 if (nextPtr != 0 && tok != XML_TOK_INVALID) {
1977 return XML_ERROR_NONE;
1980 case XML_TOK_INVALID:
1982 return XML_ERROR_INVALID_TOKEN;
1984 return XML_ERROR_NO_ELEMENTS;
1985 case XML_TOK_PARTIAL:
1986 return XML_ERROR_UNCLOSED_TOKEN;
1987 case XML_TOK_PARTIAL_CHAR:
1988 return XML_ERROR_PARTIAL_CHAR;
1989 case XML_TOK_TRAILING_CR:
1990 eventPtr = s + encoding->minBytesPerChar;
1991 return XML_ERROR_NO_ELEMENTS;
1996 switch (XmlTokenRole(&prologState, tok, s, next, encoding)) {
1997 case XML_ROLE_XML_DECL:
1999 enum XML_Error result = processXmlDecl(parser, 0, s, next);
2000 if (result != XML_ERROR_NONE)
2004 case XML_ROLE_DOCTYPE_SYSTEM_ID:
2006 && notStandaloneHandler
2007 && !notStandaloneHandler(handlerArg))
2008 return XML_ERROR_NOT_STANDALONE;
2009 hadExternalDoctype = 1;
2011 case XML_ROLE_DOCTYPE_PUBLIC_ID:
2012 case XML_ROLE_ENTITY_PUBLIC_ID:
2013 if (!XmlIsPublicId(encoding, s, next, &eventPtr))
2014 return XML_ERROR_SYNTAX;
2016 XML_Char *tem = poolStoreString(&dtd.pool,
2018 s + encoding->minBytesPerChar,
2019 next - encoding->minBytesPerChar);
2021 return XML_ERROR_NO_MEMORY;
2022 normalizePublicId(tem);
2023 declEntity->publicId = tem;
2024 poolFinish(&dtd.pool);
2027 case XML_ROLE_INSTANCE_START:
2028 processor = contentProcessor;
2029 if (hadExternalDoctype)
2031 return contentProcessor(parser, s, end, nextPtr);
2032 case XML_ROLE_ATTLIST_ELEMENT_NAME:
2034 const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next);
2036 return XML_ERROR_NO_MEMORY;
2037 declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
2038 if (!declElementType)
2039 return XML_ERROR_NO_MEMORY;
2040 if (declElementType->name != name)
2041 poolDiscard(&dtd.pool);
2043 poolFinish(&dtd.pool);
2044 if (!setElementTypePrefix(parser, declElementType))
2045 return XML_ERROR_NO_MEMORY;
2049 case XML_ROLE_ATTRIBUTE_NAME:
2050 declAttributeId = getAttributeId(parser, encoding, s, next);
2051 if (!declAttributeId)
2052 return XML_ERROR_NO_MEMORY;
2053 declAttributeIsCdata = 0;
2055 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2056 declAttributeIsCdata = 1;
2058 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2059 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2061 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
2062 return XML_ERROR_NO_MEMORY;
2064 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2065 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2067 const XML_Char *attVal;
2068 enum XML_Error result
2069 = storeAttributeValue(parser, encoding, declAttributeIsCdata,
2070 s + encoding->minBytesPerChar,
2071 next - encoding->minBytesPerChar,
2075 attVal = poolStart(&dtd.pool);
2076 poolFinish(&dtd.pool);
2078 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
2079 return XML_ERROR_NO_MEMORY;
2082 case XML_ROLE_ENTITY_VALUE:
2084 enum XML_Error result = storeEntityValue(parser, s, next);
2085 if (result != XML_ERROR_NONE)
2089 case XML_ROLE_ENTITY_SYSTEM_ID:
2091 declEntity->systemId = poolStoreString(&dtd.pool, encoding,
2092 s + encoding->minBytesPerChar,
2093 next - encoding->minBytesPerChar);
2094 if (!declEntity->systemId)
2095 return XML_ERROR_NO_MEMORY;
2096 declEntity->base = dtd.base;
2097 poolFinish(&dtd.pool);
2100 case XML_ROLE_ENTITY_NOTATION_NAME:
2102 declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next);
2103 if (!declEntity->notation)
2104 return XML_ERROR_NO_MEMORY;
2105 poolFinish(&dtd.pool);
2106 if (unparsedEntityDeclHandler) {
2107 eventPtr = eventEndPtr = s;
2108 unparsedEntityDeclHandler(handlerArg,
2111 declEntity->systemId,
2112 declEntity->publicId,
2113 declEntity->notation);
2118 case XML_ROLE_GENERAL_ENTITY_NAME:
2120 const XML_Char *name;
2121 if (XmlPredefinedEntityName(encoding, s, next)) {
2125 name = poolStoreString(&dtd.pool, encoding, s, next);
2127 return XML_ERROR_NO_MEMORY;
2129 declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2131 return XML_ERROR_NO_MEMORY;
2132 if (declEntity->name != name) {
2133 poolDiscard(&dtd.pool);
2137 poolFinish(&dtd.pool);
2140 poolDiscard(&dtd.pool);
2145 case XML_ROLE_PARAM_ENTITY_NAME:
2148 case XML_ROLE_NOTATION_NAME:
2149 declNotationPublicId = 0;
2150 declNotationName = 0;
2151 if (notationDeclHandler) {
2152 declNotationName = poolStoreString(&tempPool, encoding, s, next);
2153 if (!declNotationName)
2154 return XML_ERROR_NO_MEMORY;
2155 poolFinish(&tempPool);
2158 case XML_ROLE_NOTATION_PUBLIC_ID:
2159 if (!XmlIsPublicId(encoding, s, next, &eventPtr))
2160 return XML_ERROR_SYNTAX;
2161 if (declNotationName) {
2162 XML_Char *tem = poolStoreString(&tempPool,
2164 s + encoding->minBytesPerChar,
2165 next - encoding->minBytesPerChar);
2167 return XML_ERROR_NO_MEMORY;
2168 normalizePublicId(tem);
2169 declNotationPublicId = tem;
2170 poolFinish(&tempPool);
2173 case XML_ROLE_NOTATION_SYSTEM_ID:
2174 if (declNotationName && notationDeclHandler) {
2175 const XML_Char *systemId
2176 = poolStoreString(&tempPool, encoding,
2177 s + encoding->minBytesPerChar,
2178 next - encoding->minBytesPerChar);
2180 return XML_ERROR_NO_MEMORY;
2181 eventPtr = eventEndPtr = s;
2182 notationDeclHandler(handlerArg,
2186 declNotationPublicId);
2188 poolClear(&tempPool);
2190 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
2191 if (declNotationPublicId && notationDeclHandler) {
2192 eventPtr = eventEndPtr = s;
2193 notationDeclHandler(handlerArg,
2197 declNotationPublicId);
2199 poolClear(&tempPool);
2201 case XML_ROLE_ERROR:
2204 case XML_TOK_PARAM_ENTITY_REF:
2205 return XML_ERROR_PARAM_ENTITY_REF;
2206 case XML_TOK_XML_DECL:
2207 return XML_ERROR_MISPLACED_XML_PI;
2209 return XML_ERROR_SYNTAX;
2211 case XML_ROLE_GROUP_OPEN:
2212 if (prologState.level >= groupSize) {
2214 groupConnector = realloc(groupConnector, groupSize *= 2);
2216 groupConnector = malloc(groupSize = 32);
2217 if (!groupConnector)
2218 return XML_ERROR_NO_MEMORY;
2220 groupConnector[prologState.level] = 0;
2222 case XML_ROLE_GROUP_SEQUENCE:
2223 if (groupConnector[prologState.level] == '|') {
2225 return XML_ERROR_SYNTAX;
2227 groupConnector[prologState.level] = ',';
2229 case XML_ROLE_GROUP_CHOICE:
2230 if (groupConnector[prologState.level] == ',') {
2232 return XML_ERROR_SYNTAX;
2234 groupConnector[prologState.level] = '|';
2236 case XML_ROLE_PARAM_ENTITY_REF:
2238 && notStandaloneHandler
2239 && !notStandaloneHandler(handlerArg))
2240 return XML_ERROR_NOT_STANDALONE;
2248 if (!reportProcessingInstruction(parser, encoding, s, next))
2249 return XML_ERROR_NO_MEMORY;
2251 case XML_TOK_COMMENT:
2254 if (!reportComment(parser, encoding, s, next))
2255 return XML_ERROR_NO_MEMORY;
2260 if (defaultHandler) {
2263 case XML_TOK_COMMENT:
2265 case XML_TOK_XML_DECL:
2270 reportDefault(parser, encoding, s, next);
2279 enum XML_Error epilogProcessor(XML_Parser parser,
2282 const char **nextPtr)
2284 processor = epilogProcessor;
2288 int tok = XmlPrologTok(encoding, s, end, &next);
2291 case XML_TOK_TRAILING_CR:
2292 if (defaultHandler) {
2294 reportDefault(parser, encoding, s, end);
2300 return XML_ERROR_NONE;
2301 case XML_TOK_PROLOG_S:
2303 reportDefault(parser, encoding, s, next);
2306 if (!reportProcessingInstruction(parser, encoding, s, next))
2307 return XML_ERROR_NO_MEMORY;
2309 case XML_TOK_COMMENT:
2310 if (!reportComment(parser, encoding, s, next))
2311 return XML_ERROR_NO_MEMORY;
2313 case XML_TOK_INVALID:
2315 return XML_ERROR_INVALID_TOKEN;
2316 case XML_TOK_PARTIAL:
2319 return XML_ERROR_NONE;
2321 return XML_ERROR_UNCLOSED_TOKEN;
2322 case XML_TOK_PARTIAL_CHAR:
2325 return XML_ERROR_NONE;
2327 return XML_ERROR_PARTIAL_CHAR;
2329 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
2331 eventPtr = s = next;
2335 static enum XML_Error
2336 storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2337 const char *ptr, const char *end,
2340 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
2343 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
2345 if (!poolAppendChar(pool, XML_T('\0')))
2346 return XML_ERROR_NO_MEMORY;
2347 return XML_ERROR_NONE;
2350 static enum XML_Error
2351 appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2352 const char *ptr, const char *end,
2355 const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
2358 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
2361 return XML_ERROR_NONE;
2362 case XML_TOK_INVALID:
2363 if (enc == encoding)
2365 return XML_ERROR_INVALID_TOKEN;
2366 case XML_TOK_PARTIAL:
2367 if (enc == encoding)
2369 return XML_ERROR_INVALID_TOKEN;
2370 case XML_TOK_CHAR_REF:
2372 XML_Char buf[XML_ENCODE_MAX];
2374 int n = XmlCharRefNumber(enc, ptr);
2376 if (enc == encoding)
2378 return XML_ERROR_BAD_CHAR_REF;
2381 && n == 0x20 /* space */
2382 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
2384 n = XmlEncode(n, (ICHAR *)buf);
2386 if (enc == encoding)
2388 return XML_ERROR_BAD_CHAR_REF;
2390 for (i = 0; i < n; i++) {
2391 if (!poolAppendChar(pool, buf[i]))
2392 return XML_ERROR_NO_MEMORY;
2396 case XML_TOK_DATA_CHARS:
2397 if (!poolAppend(pool, enc, ptr, next))
2398 return XML_ERROR_NO_MEMORY;
2401 case XML_TOK_TRAILING_CR:
2402 next = ptr + enc->minBytesPerChar;
2404 case XML_TOK_ATTRIBUTE_VALUE_S:
2405 case XML_TOK_DATA_NEWLINE:
2406 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
2408 if (!poolAppendChar(pool, 0x20))
2409 return XML_ERROR_NO_MEMORY;
2411 case XML_TOK_ENTITY_REF:
2413 const XML_Char *name;
2415 XML_Char ch = XmlPredefinedEntityName(enc,
2416 ptr + enc->minBytesPerChar,
2417 next - enc->minBytesPerChar);
2419 if (!poolAppendChar(pool, ch))
2420 return XML_ERROR_NO_MEMORY;
2423 name = poolStoreString(&temp2Pool, enc,
2424 ptr + enc->minBytesPerChar,
2425 next - enc->minBytesPerChar);
2427 return XML_ERROR_NO_MEMORY;
2428 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
2429 poolDiscard(&temp2Pool);
2432 if (enc == encoding)
2434 return XML_ERROR_UNDEFINED_ENTITY;
2437 else if (entity->open) {
2438 if (enc == encoding)
2440 return XML_ERROR_RECURSIVE_ENTITY_REF;
2442 else if (entity->notation) {
2443 if (enc == encoding)
2445 return XML_ERROR_BINARY_ENTITY_REF;
2447 else if (!entity->textPtr) {
2448 if (enc == encoding)
2450 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
2453 enum XML_Error result;
2454 const XML_Char *textEnd = entity->textPtr + entity->textLen;
2456 result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
2472 enum XML_Error storeEntityValue(XML_Parser parser,
2473 const char *entityTextPtr,
2474 const char *entityTextEnd)
2476 STRING_POOL *pool = &(dtd.pool);
2477 entityTextPtr += encoding->minBytesPerChar;
2478 entityTextEnd -= encoding->minBytesPerChar;
2481 int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next);
2483 case XML_TOK_PARAM_ENTITY_REF:
2484 eventPtr = entityTextPtr;
2485 return XML_ERROR_SYNTAX;
2488 declEntity->textPtr = pool->start;
2489 declEntity->textLen = pool->ptr - pool->start;
2494 return XML_ERROR_NONE;
2495 case XML_TOK_ENTITY_REF:
2496 case XML_TOK_DATA_CHARS:
2497 if (!poolAppend(pool, encoding, entityTextPtr, next))
2498 return XML_ERROR_NO_MEMORY;
2500 case XML_TOK_TRAILING_CR:
2501 next = entityTextPtr + encoding->minBytesPerChar;
2503 case XML_TOK_DATA_NEWLINE:
2504 if (pool->end == pool->ptr && !poolGrow(pool))
2505 return XML_ERROR_NO_MEMORY;
2506 *(pool->ptr)++ = 0xA;
2508 case XML_TOK_CHAR_REF:
2510 XML_Char buf[XML_ENCODE_MAX];
2512 int n = XmlCharRefNumber(encoding, entityTextPtr);
2514 eventPtr = entityTextPtr;
2515 return XML_ERROR_BAD_CHAR_REF;
2517 n = XmlEncode(n, (ICHAR *)buf);
2519 eventPtr = entityTextPtr;
2520 return XML_ERROR_BAD_CHAR_REF;
2522 for (i = 0; i < n; i++) {
2523 if (pool->end == pool->ptr && !poolGrow(pool))
2524 return XML_ERROR_NO_MEMORY;
2525 *(pool->ptr)++ = buf[i];
2529 case XML_TOK_PARTIAL:
2530 eventPtr = entityTextPtr;
2531 return XML_ERROR_INVALID_TOKEN;
2532 case XML_TOK_INVALID:
2534 return XML_ERROR_INVALID_TOKEN;
2538 entityTextPtr = next;
2544 normalizeLines(XML_Char *s)
2548 if (*s == XML_T('\0'))
2567 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2569 const XML_Char *target;
2572 if (!processingInstructionHandler) {
2574 reportDefault(parser, enc, start, end);
2577 start += enc->minBytesPerChar * 2;
2578 tem = start + XmlNameLength(enc, start);
2579 target = poolStoreString(&tempPool, enc, start, tem);
2582 poolFinish(&tempPool);
2583 data = poolStoreString(&tempPool, enc,
2585 end - enc->minBytesPerChar*2);
2588 normalizeLines(data);
2589 processingInstructionHandler(handlerArg, target, data);
2590 poolClear(&tempPool);
2595 reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2598 if (!commentHandler) {
2600 reportDefault(parser, enc, start, end);
2603 data = poolStoreString(&tempPool,
2605 start + enc->minBytesPerChar * 4,
2606 end - enc->minBytesPerChar * 3);
2609 normalizeLines(data);
2610 commentHandler(handlerArg, data);
2611 poolClear(&tempPool);
2616 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
2618 if (MUST_CONVERT(enc, s)) {
2619 const char **eventPP;
2620 const char **eventEndPP;
2621 if (enc == encoding) {
2622 eventPP = &eventPtr;
2623 eventEndPP = &eventEndPtr;
2626 eventPP = &(openInternalEntities->internalEventPtr);
2627 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2630 ICHAR *dataPtr = (ICHAR *)dataBuf;
2631 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2633 defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2638 defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
2643 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
2645 DEFAULT_ATTRIBUTE *att;
2646 if (type->nDefaultAtts == type->allocDefaultAtts) {
2647 if (type->allocDefaultAtts == 0) {
2648 type->allocDefaultAtts = 8;
2649 type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2652 type->allocDefaultAtts *= 2;
2653 type->defaultAtts = realloc(type->defaultAtts,
2654 type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2656 if (!type->defaultAtts)
2659 att = type->defaultAtts + type->nDefaultAtts;
2662 att->isCdata = isCdata;
2664 attId->maybeTokenized = 1;
2665 type->nDefaultAtts += 1;
2669 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
2671 const XML_Char *name;
2672 for (name = elementType->name; *name; name++) {
2673 if (*name == XML_T(':')) {
2676 for (s = elementType->name; s != name; s++) {
2677 if (!poolAppendChar(&dtd.pool, *s))
2680 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2682 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
2685 if (prefix->name == poolStart(&dtd.pool))
2686 poolFinish(&dtd.pool);
2688 poolDiscard(&dtd.pool);
2689 elementType->prefix = prefix;
2696 static ATTRIBUTE_ID *
2697 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2700 const XML_Char *name;
2701 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2703 name = poolStoreString(&dtd.pool, enc, start, end);
2707 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
2710 if (id->name != name)
2711 poolDiscard(&dtd.pool);
2713 poolFinish(&dtd.pool);
2716 else if (name[0] == 'x'
2721 && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
2722 if (name[5] == '\0')
2723 id->prefix = &dtd.defaultPrefix;
2725 id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
2730 for (i = 0; name[i]; i++) {
2731 if (name[i] == XML_T(':')) {
2733 for (j = 0; j < i; j++) {
2734 if (!poolAppendChar(&dtd.pool, name[j]))
2737 if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2739 id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
2740 if (id->prefix->name == poolStart(&dtd.pool))
2741 poolFinish(&dtd.pool);
2743 poolDiscard(&dtd.pool);
2752 #define CONTEXT_SEP XML_T('\f')
2755 const XML_Char *getContext(XML_Parser parser)
2757 HASH_TABLE_ITER iter;
2760 if (dtd.defaultPrefix.binding) {
2763 if (!poolAppendChar(&tempPool, XML_T('=')))
2765 len = dtd.defaultPrefix.binding->uriLen;
2766 if (namespaceSeparator != XML_T('\0'))
2768 for (i = 0; i < len; i++)
2769 if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
2774 hashTableIterInit(&iter, &(dtd.prefixes));
2779 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
2782 if (!prefix->binding)
2784 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
2786 for (s = prefix->name; *s; s++)
2787 if (!poolAppendChar(&tempPool, *s))
2789 if (!poolAppendChar(&tempPool, XML_T('=')))
2791 len = prefix->binding->uriLen;
2792 if (namespaceSeparator != XML_T('\0'))
2794 for (i = 0; i < len; i++)
2795 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
2801 hashTableIterInit(&iter, &(dtd.generalEntities));
2804 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
2809 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
2811 for (s = e->name; *s; s++)
2812 if (!poolAppendChar(&tempPool, *s))
2817 if (!poolAppendChar(&tempPool, XML_T('\0')))
2819 return tempPool.start;
2823 int setContext(XML_Parser parser, const XML_Char *context)
2825 const XML_Char *s = context;
2827 while (*context != XML_T('\0')) {
2828 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
2830 if (!poolAppendChar(&tempPool, XML_T('\0')))
2832 e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
2835 if (*s != XML_T('\0'))
2838 poolDiscard(&tempPool);
2840 else if (*s == '=') {
2842 if (poolLength(&tempPool) == 0)
2843 prefix = &dtd.defaultPrefix;
2845 if (!poolAppendChar(&tempPool, XML_T('\0')))
2847 prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
2850 if (prefix->name == poolStart(&tempPool))
2851 poolFinish(&tempPool);
2853 poolDiscard(&tempPool);
2855 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
2856 if (!poolAppendChar(&tempPool, *context))
2858 if (!poolAppendChar(&tempPool, XML_T('\0')))
2860 if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
2862 poolDiscard(&tempPool);
2863 if (*context != XML_T('\0'))
2868 if (!poolAppendChar(&tempPool, *s))
2878 void normalizePublicId(XML_Char *publicId)
2880 XML_Char *p = publicId;
2882 for (s = publicId; *s; s++) {
2887 if (p != publicId && p[-1] != 0x20)
2894 if (p != publicId && p[-1] == 0x20)
2899 static int dtdInit(DTD *p)
2901 poolInit(&(p->pool));
2902 hashTableInit(&(p->generalEntities));
2903 hashTableInit(&(p->elementTypes));
2904 hashTableInit(&(p->attributeIds));
2905 hashTableInit(&(p->prefixes));
2909 p->defaultPrefix.name = 0;
2910 p->defaultPrefix.binding = 0;
2914 static void dtdDestroy(DTD *p)
2916 HASH_TABLE_ITER iter;
2917 hashTableIterInit(&iter, &(p->elementTypes));
2919 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
2922 if (e->allocDefaultAtts != 0)
2923 free(e->defaultAtts);
2925 hashTableDestroy(&(p->generalEntities));
2926 hashTableDestroy(&(p->elementTypes));
2927 hashTableDestroy(&(p->attributeIds));
2928 hashTableDestroy(&(p->prefixes));
2929 poolDestroy(&(p->pool));
2932 /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
2933 The new DTD has already been initialized. */
2935 static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
2937 HASH_TABLE_ITER iter;
2940 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base);
2946 /* Copy the prefix table. */
2948 hashTableIterInit(&iter, &(oldDtd->prefixes));
2950 const XML_Char *name;
2951 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
2954 name = poolCopyString(&(newDtd->pool), oldP->name);
2957 if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
2961 hashTableIterInit(&iter, &(oldDtd->attributeIds));
2963 /* Copy the attribute id table. */
2967 const XML_Char *name;
2968 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
2972 /* Remember to allocate the scratch byte before the name. */
2973 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
2975 name = poolCopyString(&(newDtd->pool), oldA->name);
2979 newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
2982 newA->maybeTokenized = oldA->maybeTokenized;
2984 newA->xmlns = oldA->xmlns;
2985 if (oldA->prefix == &oldDtd->defaultPrefix)
2986 newA->prefix = &newDtd->defaultPrefix;
2988 newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
2992 /* Copy the element type table. */
2994 hashTableIterInit(&iter, &(oldDtd->elementTypes));
2999 const XML_Char *name;
3000 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3003 name = poolCopyString(&(newDtd->pool), oldE->name);
3006 newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
3009 if (oldE->nDefaultAtts) {
3010 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
3011 if (!newE->defaultAtts)
3014 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
3016 newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
3017 for (i = 0; i < newE->nDefaultAtts; i++) {
3018 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
3019 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
3020 if (oldE->defaultAtts[i].value) {
3021 newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
3022 if (!newE->defaultAtts[i].value)
3026 newE->defaultAtts[i].value = 0;
3030 /* Copy the entity table. */
3032 hashTableIterInit(&iter, &(oldDtd->generalEntities));
3036 const XML_Char *name;
3037 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
3040 name = poolCopyString(&(newDtd->pool), oldE->name);
3043 newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
3046 if (oldE->systemId) {
3047 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
3050 newE->systemId = tem;
3052 if (oldE->base == oldDtd->base)
3053 newE->base = newDtd->base;
3054 tem = poolCopyString(&(newDtd->pool), oldE->base);
3061 const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
3064 newE->textPtr = tem;
3065 newE->textLen = oldE->textLen;
3067 if (oldE->notation) {
3068 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
3071 newE->notation = tem;
3075 newDtd->complete = oldDtd->complete;
3076 newDtd->standalone = oldDtd->standalone;
3081 void poolInit(STRING_POOL *pool)
3084 pool->freeBlocks = 0;
3091 void poolClear(STRING_POOL *pool)
3093 if (!pool->freeBlocks)
3094 pool->freeBlocks = pool->blocks;
3096 BLOCK *p = pool->blocks;
3098 BLOCK *tem = p->next;
3099 p->next = pool->freeBlocks;
3100 pool->freeBlocks = p;
3111 void poolDestroy(STRING_POOL *pool)
3113 BLOCK *p = pool->blocks;
3115 BLOCK *tem = p->next;
3120 p = pool->freeBlocks;
3122 BLOCK *tem = p->next;
3126 pool->freeBlocks = 0;
3133 XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
3134 const char *ptr, const char *end)
3136 if (!pool->ptr && !poolGrow(pool))
3139 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
3142 if (!poolGrow(pool))
3148 static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
3151 if (!poolAppendChar(pool, *s))
3159 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
3161 if (!pool->ptr && !poolGrow(pool))
3163 for (; n > 0; --n, s++) {
3164 if (!poolAppendChar(pool, *s))
3174 XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
3175 const char *ptr, const char *end)
3177 if (!poolAppend(pool, enc, ptr, end))
3179 if (pool->ptr == pool->end && !poolGrow(pool))
3186 int poolGrow(STRING_POOL *pool)
3188 if (pool->freeBlocks) {
3189 if (pool->start == 0) {
3190 pool->blocks = pool->freeBlocks;
3191 pool->freeBlocks = pool->freeBlocks->next;
3192 pool->blocks->next = 0;
3193 pool->start = pool->blocks->s;
3194 pool->end = pool->start + pool->blocks->size;
3195 pool->ptr = pool->start;
3198 if (pool->end - pool->start < pool->freeBlocks->size) {
3199 BLOCK *tem = pool->freeBlocks->next;
3200 pool->freeBlocks->next = pool->blocks;
3201 pool->blocks = pool->freeBlocks;
3202 pool->freeBlocks = tem;
3203 memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
3204 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3205 pool->start = pool->blocks->s;
3206 pool->end = pool->start + pool->blocks->size;
3210 if (pool->blocks && pool->start == pool->blocks->s) {
3211 int blockSize = (pool->end - pool->start)*2;
3212 pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3215 pool->blocks->size = blockSize;
3216 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3217 pool->start = pool->blocks->s;
3218 pool->end = pool->start + blockSize;
3222 int blockSize = pool->end - pool->start;
3223 if (blockSize < INIT_BLOCK_SIZE)
3224 blockSize = INIT_BLOCK_SIZE;
3227 tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3230 tem->size = blockSize;
3231 tem->next = pool->blocks;
3233 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
3234 pool->ptr = tem->s + (pool->ptr - pool->start);
3235 pool->start = tem->s;
3236 pool->end = tem->s + blockSize;