w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

xmlparse.c
Go to the documentation of this file.
1 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2  See the file COPYING for copying permission.
3 */
4 
5 #include <stddef.h>
6 #include <string.h> /* memset(), memcpy() */
7 #include <assert.h>
8 
9 #define XML_BUILDING_EXPAT 1
10 
11 #ifdef COMPILED_FROM_DSP
12 #include "winconfig.h"
13 #elif defined(MACOS_CLASSIC)
14 #include "macconfig.h"
15 #elif defined(__amigaos4__)
16 #include "amigaconfig.h"
17 #elif defined(__WATCOMC__)
18 #include "watcomconfig.h"
19 #elif defined(HAVE_EXPAT_CONFIG_H)
20 #include <expat_config.h>
21 #endif /* ndef COMPILED_FROM_DSP */
22 
23 #include "ascii.h"
24 #include "expat.h"
25 
26 #ifdef XML_UNICODE
27 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
28 #define XmlConvert XmlUtf16Convert
29 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
30 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
31 #define XmlEncode XmlUtf16Encode
32 /* Using pointer subtraction to convert to integer type. */
33 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
34 typedef unsigned short ICHAR;
35 #else
36 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
37 #define XmlConvert XmlUtf8Convert
38 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
39 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
40 #define XmlEncode XmlUtf8Encode
41 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
42 typedef char ICHAR;
43 #endif
44 
45 
46 #ifndef XML_NS
47 
48 #define XmlInitEncodingNS XmlInitEncoding
49 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
50 #undef XmlGetInternalEncodingNS
51 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
52 #define XmlParseXmlDeclNS XmlParseXmlDecl
53 
54 #endif
55 
56 #ifdef XML_UNICODE
57 
58 #ifdef XML_UNICODE_WCHAR_T
59 #define XML_T(x) (const wchar_t)x
60 #define XML_L(x) L ## x
61 #else
62 #define XML_T(x) (const unsigned short)x
63 #define XML_L(x) x
64 #endif
65 
66 #else
67 
68 #define XML_T(x) x
69 #define XML_L(x) x
70 
71 #endif
72 
73 /* Round up n to be a multiple of sz, where sz is a power of 2. */
74 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
75 
76 /* Handle the case where memmove() doesn't exist. */
77 #ifndef HAVE_MEMMOVE
78 #ifdef HAVE_BCOPY
79 #define memmove(d,s,l) bcopy((s),(d),(l))
80 #else
81 #error memmove does not exist on this platform, nor is a substitute available
82 #endif /* HAVE_BCOPY */
83 #endif /* HAVE_MEMMOVE */
84 
85 #include "internal.h"
86 #include "xmltok.h"
87 #include "xmlrole.h"
88 
89 typedef const XML_Char *KEY;
90 
91 typedef struct {
93 } NAMED;
94 
95 typedef struct {
96  NAMED **v;
97  unsigned char power;
98  size_t size;
99  size_t used;
101 } HASH_TABLE;
102 
103 /* Basic character hash algorithm, taken from Python's string hash:
104  h = h * 1000003 ^ character, the constant being a prime number.
105 
106 */
107 #ifdef XML_UNICODE
108 #define CHAR_HASH(h, c) \
109  (((h) * 0xF4243) ^ (unsigned short)(c))
110 #else
111 #define CHAR_HASH(h, c) \
112  (((h) * 0xF4243) ^ (unsigned char)(c))
113 #endif
114 
115 /* For probing (after a collision) we need a step size relative prime
116  to the hash table size, which is a power of 2. We use double-hashing,
117  since we can calculate a second hash value cheaply by taking those bits
118  of the first hash value that were discarded (masked out) when the table
119  index was calculated: index = hash & mask, where mask = table->size - 1.
120  We limit the maximum step size to table->size / 4 (mask >> 2) and make
121  it odd, since odd numbers are always relative prime to a power of 2.
122 */
123 #define SECOND_HASH(hash, mask, power) \
124  ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
125 #define PROBE_STEP(hash, mask, power) \
126  ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
127 
128 typedef struct {
129  NAMED **p;
132 
133 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
134 #define INIT_DATA_BUF_SIZE 1024
135 #define INIT_ATTS_SIZE 16
136 #define INIT_ATTS_VERSION 0xFFFFFFFF
137 #define INIT_BLOCK_SIZE 1024
138 #define INIT_BUFFER_SIZE 1024
139 
140 #define EXPAND_SPARE 24
141 
142 typedef struct binding {
143  struct prefix *prefix;
146  const struct attribute_id *attId;
148  int uriLen;
149  int uriAlloc;
151 
152 typedef struct prefix {
153  const XML_Char *name;
156 
157 typedef struct {
158  const XML_Char *str;
160  const XML_Char *prefix;
161  int strLen;
162  int uriLen;
164 } TAG_NAME;
165 
166 /* TAG represents an open element.
167  The name of the element is stored in both the document and API
168  encodings. The memory buffer 'buf' is a separately-allocated
169  memory area which stores the name. During the XML_Parse()/
170  XMLParseBuffer() when the element is open, the memory for the 'raw'
171  version of the name (in the document encoding) is shared with the
172  document buffer. If the element is open across calls to
173  XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
174  contain the 'raw' name as well.
175 
176  A parser re-uses these structures, maintaining a list of allocated
177  TAG objects in a free list.
178 */
179 typedef struct tag {
180  struct tag *parent; /* parent of this element */
181  const char *rawName; /* tagName in the original encoding */
183  TAG_NAME name; /* tagName in the API encoding */
184  char *buf; /* buffer for name components */
185  char *bufEnd; /* end of the buffer */
187 } TAG;
188 
189 typedef struct {
190  const XML_Char *name;
192  int textLen; /* length in XML_Chars */
193  int processed; /* # of processed bytes - when suspended */
195  const XML_Char *base;
200  XML_Bool is_internal; /* true if declared in internal subset outside PE */
201 } ENTITY;
202 
203 typedef struct {
204  enum XML_Content_Type type;
205  enum XML_Content_Quant quant;
206  const XML_Char * name;
209  int childcnt;
210  int nextsib;
212 
213 #define INIT_SCAFFOLD_ELEMENTS 32
214 
215 typedef struct block {
216  struct block *next;
217  int size;
220 
221 typedef struct {
224  const XML_Char *end;
228 } STRING_POOL;
229 
230 /* The XML_Char before the name is used to determine whether
231  an attribute has been specified. */
232 typedef struct attribute_id {
238 
239 typedef struct {
240  const ATTRIBUTE_ID *id;
242  const XML_Char *value;
244 
245 typedef struct {
246  unsigned long version;
247  unsigned long hash;
249 } NS_ATT;
250 
251 typedef struct {
252  const XML_Char *name;
258 } ELEMENT_TYPE;
259 
260 typedef struct {
267  /* false once a parameter entity reference has been skipped */
269  /* true once an internal or external PE reference has been encountered;
270  this includes the reference to an external subset */
273 #ifdef XML_DTD
274  /* indicates if external PE has been read */
275  XML_Bool paramEntityRead;
276  HASH_TABLE paramEntities;
277 #endif /* XML_DTD */
279  /* === scaffolding for building content model === */
283  unsigned scaffSize;
284  unsigned scaffCount;
287 } DTD;
288 
289 typedef struct open_internal_entity {
290  const char *internalEventPtr;
291  const char *internalEventEndPtr;
295  XML_Bool betweenDecl; /* WFC: PE Between Declarations */
297 
299  const char *start,
300  const char *end,
301  const char **endPtr);
302 
307 #ifdef XML_DTD
308 static Processor ignoreSectionProcessor;
309 static Processor externalParEntProcessor;
310 static Processor externalParEntInitProcessor;
311 static Processor entityValueProcessor;
312 static Processor entityValueInitProcessor;
313 #endif /* XML_DTD */
321 
322 static enum XML_Error
323 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
324 static enum XML_Error
325 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
326  const char *s, const char *next);
327 static enum XML_Error
329 static enum XML_Error
330 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
331  const char *end, int tok, const char *next, const char **nextPtr,
332  XML_Bool haveMore);
333 static enum XML_Error
336 static enum XML_Error
338  const char *start, const char *end, const char **endPtr,
339  XML_Bool haveMore);
340 static enum XML_Error
341 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
342  const char *end, const char **nextPtr, XML_Bool haveMore);
343 #ifdef XML_DTD
344 static enum XML_Error
345 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
346  const char *end, const char **nextPtr, XML_Bool haveMore);
347 #endif /* XML_DTD */
348 
349 static enum XML_Error
350 storeAtts(XML_Parser parser, const ENCODING *, const char *s,
351  TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
352 static enum XML_Error
354  const XML_Char *uri, BINDING **bindingsPtr);
355 static int
357  XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
358 static enum XML_Error
360  const char *, const char *, STRING_POOL *);
361 static enum XML_Error
363  const char *, const char *, STRING_POOL *);
364 static ATTRIBUTE_ID *
365 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
366  const char *end);
367 static int
369 static enum XML_Error
370 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
371  const char *end);
372 static int
374  const char *start, const char *end);
375 static int
376 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
377  const char *end);
378 static void
379 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
380  const char *end);
381 
382 static const XML_Char * getContext(XML_Parser parser);
383 static XML_Bool
385 
386 static void FASTCALL normalizePublicId(XML_Char *s);
387 
388 static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
389 /* do not call if parentParser != NULL */
390 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
391 static void
392 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
393 static int
394 dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
395 static int
397 
398 static NAMED *
399 lookup(HASH_TABLE *table, KEY name, size_t createSize);
400 static void FASTCALL
402 static void FASTCALL hashTableClear(HASH_TABLE *);
403 static void FASTCALL hashTableDestroy(HASH_TABLE *);
404 static void FASTCALL
407 
408 static void FASTCALL
410 static void FASTCALL poolClear(STRING_POOL *);
411 static void FASTCALL poolDestroy(STRING_POOL *);
412 static XML_Char *
414  const char *ptr, const char *end);
415 static XML_Char *
417  const char *ptr, const char *end);
419 static const XML_Char * FASTCALL
421 static const XML_Char *
422 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
423 static const XML_Char * FASTCALL
425 
428 static ELEMENT_TYPE *
430  const char *ptr, const char *end);
431 
432 static XML_Parser
433 parserCreate(const XML_Char *encodingName,
434  const XML_Memory_Handling_Suite *memsuite,
435  const XML_Char *nameSep,
436  DTD *dtd);
437 static void
438 parserInit(XML_Parser parser, const XML_Char *encodingName);
439 
440 #define poolStart(pool) ((pool)->start)
441 #define poolEnd(pool) ((pool)->ptr)
442 #define poolLength(pool) ((pool)->ptr - (pool)->start)
443 #define poolChop(pool) ((void)--(pool->ptr))
444 #define poolLastChar(pool) (((pool)->ptr)[-1])
445 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
446 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
447 #define poolAppendChar(pool, c) \
448  (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
449  ? 0 \
450  : ((*((pool)->ptr)++ = c), 1))
451 
453  /* The first member must be userData so that the XML_GetUserData
454  macro works. */
455  void *m_userData;
457  char *m_buffer;
459  /* first character to be parsed */
460  const char *m_bufferPtr;
461  /* past last character to be parsed */
462  char *m_bufferEnd;
463  /* allocated end of buffer */
464  const char *m_bufferLim;
466  const char *m_parseEndPtr;
504  enum XML_Error m_errorCode;
505  const char *m_eventPtr;
506  const char *m_eventEndPtr;
507  const char *m_positionPtr;
534  unsigned long m_nsAttsVersion;
535  unsigned char m_nsAttsPower;
540  unsigned int m_groupSize;
544 #ifdef XML_DTD
545  XML_Bool m_isParamEntity;
546  XML_Bool m_useForeignDTD;
547  enum XML_ParamEntityParsing m_paramEntityParsing;
548 #endif
549 };
550 
551 #define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
552 #define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
553 #define FREE(p) (parser->m_mem.free_fcn((p)))
554 
555 #define userData (parser->m_userData)
556 #define handlerArg (parser->m_handlerArg)
557 #define startElementHandler (parser->m_startElementHandler)
558 #define endElementHandler (parser->m_endElementHandler)
559 #define characterDataHandler (parser->m_characterDataHandler)
560 #define processingInstructionHandler \
561  (parser->m_processingInstructionHandler)
562 #define commentHandler (parser->m_commentHandler)
563 #define startCdataSectionHandler \
564  (parser->m_startCdataSectionHandler)
565 #define endCdataSectionHandler (parser->m_endCdataSectionHandler)
566 #define defaultHandler (parser->m_defaultHandler)
567 #define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
568 #define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
569 #define unparsedEntityDeclHandler \
570  (parser->m_unparsedEntityDeclHandler)
571 #define notationDeclHandler (parser->m_notationDeclHandler)
572 #define startNamespaceDeclHandler \
573  (parser->m_startNamespaceDeclHandler)
574 #define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
575 #define notStandaloneHandler (parser->m_notStandaloneHandler)
576 #define externalEntityRefHandler \
577  (parser->m_externalEntityRefHandler)
578 #define externalEntityRefHandlerArg \
579  (parser->m_externalEntityRefHandlerArg)
580 #define internalEntityRefHandler \
581  (parser->m_internalEntityRefHandler)
582 #define skippedEntityHandler (parser->m_skippedEntityHandler)
583 #define unknownEncodingHandler (parser->m_unknownEncodingHandler)
584 #define elementDeclHandler (parser->m_elementDeclHandler)
585 #define attlistDeclHandler (parser->m_attlistDeclHandler)
586 #define entityDeclHandler (parser->m_entityDeclHandler)
587 #define xmlDeclHandler (parser->m_xmlDeclHandler)
588 #define encoding (parser->m_encoding)
589 #define initEncoding (parser->m_initEncoding)
590 #define internalEncoding (parser->m_internalEncoding)
591 #define unknownEncodingMem (parser->m_unknownEncodingMem)
592 #define unknownEncodingData (parser->m_unknownEncodingData)
593 #define unknownEncodingHandlerData \
594  (parser->m_unknownEncodingHandlerData)
595 #define unknownEncodingRelease (parser->m_unknownEncodingRelease)
596 #define protocolEncodingName (parser->m_protocolEncodingName)
597 #define ns (parser->m_ns)
598 #define ns_triplets (parser->m_ns_triplets)
599 #define prologState (parser->m_prologState)
600 #define processor (parser->m_processor)
601 #define errorCode (parser->m_errorCode)
602 #define eventPtr (parser->m_eventPtr)
603 #define eventEndPtr (parser->m_eventEndPtr)
604 #define positionPtr (parser->m_positionPtr)
605 #define position (parser->m_position)
606 #define openInternalEntities (parser->m_openInternalEntities)
607 #define freeInternalEntities (parser->m_freeInternalEntities)
608 #define defaultExpandInternalEntities \
609  (parser->m_defaultExpandInternalEntities)
610 #define tagLevel (parser->m_tagLevel)
611 #define buffer (parser->m_buffer)
612 #define bufferPtr (parser->m_bufferPtr)
613 #define bufferEnd (parser->m_bufferEnd)
614 #define parseEndByteIndex (parser->m_parseEndByteIndex)
615 #define parseEndPtr (parser->m_parseEndPtr)
616 #define bufferLim (parser->m_bufferLim)
617 #define dataBuf (parser->m_dataBuf)
618 #define dataBufEnd (parser->m_dataBufEnd)
619 #define _dtd (parser->m_dtd)
620 #define curBase (parser->m_curBase)
621 #define declEntity (parser->m_declEntity)
622 #define doctypeName (parser->m_doctypeName)
623 #define doctypeSysid (parser->m_doctypeSysid)
624 #define doctypePubid (parser->m_doctypePubid)
625 #define declAttributeType (parser->m_declAttributeType)
626 #define declNotationName (parser->m_declNotationName)
627 #define declNotationPublicId (parser->m_declNotationPublicId)
628 #define declElementType (parser->m_declElementType)
629 #define declAttributeId (parser->m_declAttributeId)
630 #define declAttributeIsCdata (parser->m_declAttributeIsCdata)
631 #define declAttributeIsId (parser->m_declAttributeIsId)
632 #define freeTagList (parser->m_freeTagList)
633 #define freeBindingList (parser->m_freeBindingList)
634 #define inheritedBindings (parser->m_inheritedBindings)
635 #define tagStack (parser->m_tagStack)
636 #define atts (parser->m_atts)
637 #define attsSize (parser->m_attsSize)
638 #define nSpecifiedAtts (parser->m_nSpecifiedAtts)
639 #define idAttIndex (parser->m_idAttIndex)
640 #define nsAtts (parser->m_nsAtts)
641 #define nsAttsVersion (parser->m_nsAttsVersion)
642 #define nsAttsPower (parser->m_nsAttsPower)
643 #define tempPool (parser->m_tempPool)
644 #define temp2Pool (parser->m_temp2Pool)
645 #define groupConnector (parser->m_groupConnector)
646 #define groupSize (parser->m_groupSize)
647 #define namespaceSeparator (parser->m_namespaceSeparator)
648 #define parentParser (parser->m_parentParser)
649 #define ps_parsing (parser->m_parsingStatus.parsing)
650 #define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
651 #ifdef XML_DTD
652 #define isParamEntity (parser->m_isParamEntity)
653 #define useForeignDTD (parser->m_useForeignDTD)
654 #define paramEntityParsing (parser->m_paramEntityParsing)
655 #endif /* XML_DTD */
656 
658 XML_ParserCreate(const XML_Char *encodingName)
659 {
660  return XML_ParserCreate_MM(encodingName, NULL, NULL);
661 }
662 
664 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
665 {
666  XML_Char tmp[2];
667  *tmp = nsSep;
668  return XML_ParserCreate_MM(encodingName, NULL, tmp);
669 }
670 
671 static const XML_Char implicitContext[] = {
678 };
679 
681 XML_ParserCreate_MM(const XML_Char *encodingName,
682  const XML_Memory_Handling_Suite *memsuite,
683  const XML_Char *nameSep)
684 {
685  XML_Parser parser = parserCreate(encodingName, memsuite, nameSep, NULL);
686  if (parser != NULL && ns) {
687  /* implicit context only set for root parser, since child
688  parsers (i.e. external entity parsers) will inherit it
689  */
692  return NULL;
693  }
694  }
695  return parser;
696 }
697 
698 static XML_Parser
699 parserCreate(const XML_Char *encodingName,
700  const XML_Memory_Handling_Suite *memsuite,
701  const XML_Char *nameSep,
702  DTD *dtd)
703 {
705 
706  if (memsuite) {
708  parser = (XML_Parser)
709  memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
710  if (parser != NULL) {
711  mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
712  mtemp->malloc_fcn = memsuite->malloc_fcn;
713  mtemp->realloc_fcn = memsuite->realloc_fcn;
714  mtemp->free_fcn = memsuite->free_fcn;
715  }
716  }
717  else {
719  parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
720  if (parser != NULL) {
721  mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
722  mtemp->malloc_fcn = malloc;
723  mtemp->realloc_fcn = realloc;
724  mtemp->free_fcn = free;
725  }
726  }
727 
728  if (!parser)
729  return parser;
730 
731  buffer = NULL;
732  bufferLim = NULL;
733 
735  atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
736  if (atts == NULL) {
737  FREE(parser);
738  return NULL;
739  }
741  if (dataBuf == NULL) {
742  FREE(atts);
743  FREE(parser);
744  return NULL;
745  }
747 
748  if (dtd)
749  _dtd = dtd;
750  else {
751  _dtd = dtdCreate(&parser->m_mem);
752  if (_dtd == NULL) {
753  FREE(dataBuf);
754  FREE(atts);
755  FREE(parser);
756  return NULL;
757  }
758  }
759 
761  freeTagList = NULL;
763 
764  groupSize = 0;
766 
769 
771  ns = XML_FALSE;
773 
774  nsAtts = NULL;
775  nsAttsVersion = 0;
776  nsAttsPower = 0;
777 
778  poolInit(&tempPool, &(parser->m_mem));
779  poolInit(&temp2Pool, &(parser->m_mem));
780  parserInit(parser, encodingName);
781 
782  if (encodingName && !protocolEncodingName) {
784  return NULL;
785  }
786 
787  if (nameSep) {
788  ns = XML_TRUE;
790  namespaceSeparator = *nameSep;
791  }
792  else {
794  }
795 
796  return parser;
797 }
798 
799 static void
800 parserInit(XML_Parser parser, const XML_Char *encodingName)
801 {
804  protocolEncodingName = (encodingName != NULL
805  ? poolCopyString(&tempPool, encodingName)
806  : NULL);
807  curBase = NULL;
809  userData = NULL;
810  handlerArg = NULL;
833  bufferPtr = buffer;
834  bufferEnd = buffer;
835  parseEndByteIndex = 0;
836  parseEndPtr = NULL;
839  declEntity = NULL;
840  doctypeName = NULL;
841  doctypeSysid = NULL;
842  doctypePubid = NULL;
848  memset(&position, 0, sizeof(POSITION));
850  eventPtr = NULL;
851  eventEndPtr = NULL;
852  positionPtr = NULL;
855  tagLevel = 0;
856  tagStack = NULL;
858  nSpecifiedAtts = 0;
862  parentParser = NULL;
864 #ifdef XML_DTD
865  isParamEntity = XML_FALSE;
866  useForeignDTD = XML_FALSE;
867  paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
868 #endif
869 }
870 
871 /* moves list of bindings to freeBindingList */
872 static void FASTCALL
874 {
875  while (bindings) {
876  BINDING *b = bindings;
877  bindings = bindings->nextTagBinding;
878  b->nextTagBinding = freeBindingList;
879  freeBindingList = b;
880  }
881 }
882 
885 {
886  TAG *tStk;
887  OPEN_INTERNAL_ENTITY *openEntityList;
888  if (parentParser)
889  return XML_FALSE;
890  /* move tagStack to freeTagList */
891  tStk = tagStack;
892  while (tStk) {
893  TAG *tag = tStk;
894  tStk = tStk->parent;
897  tag->bindings = NULL;
898  freeTagList = tag;
899  }
900  /* move openInternalEntities to freeInternalEntities */
901  openEntityList = openInternalEntities;
902  while (openEntityList) {
903  OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
904  openEntityList = openEntity->next;
905  openEntity->next = freeInternalEntities;
906  freeInternalEntities = openEntity;
907  }
914  parserInit(parser, encodingName);
915  dtdReset(_dtd, &parser->m_mem);
917 }
918 
919 enum XML_Status XMLCALL
920 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
921 {
922  /* Block after XML_Parse()/XML_ParseBuffer() has been called.
923  XXX There's no way for the caller to determine which of the
924  XXX possible error cases caused the XML_STATUS_ERROR return.
925  */
927  return XML_STATUS_ERROR;
928  if (encodingName == NULL)
930  else {
931  protocolEncodingName = poolCopyString(&tempPool, encodingName);
933  return XML_STATUS_ERROR;
934  }
935  return XML_STATUS_OK;
936 }
937 
940  const XML_Char *context,
941  const XML_Char *encodingName)
942 {
943  XML_Parser parser = oldParser;
944  DTD *newDtd = NULL;
945  DTD *oldDtd = _dtd;
946  XML_StartElementHandler oldStartElementHandler = startElementHandler;
947  XML_EndElementHandler oldEndElementHandler = endElementHandler;
948  XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
949  XML_ProcessingInstructionHandler oldProcessingInstructionHandler
951  XML_CommentHandler oldCommentHandler = commentHandler;
952  XML_StartCdataSectionHandler oldStartCdataSectionHandler
954  XML_EndCdataSectionHandler oldEndCdataSectionHandler
956  XML_DefaultHandler oldDefaultHandler = defaultHandler;
957  XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler
959  XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler;
960  XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler
962  XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler
964  XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
965  XML_ExternalEntityRefHandler oldExternalEntityRefHandler
967  XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler;
968  XML_UnknownEncodingHandler oldUnknownEncodingHandler
970  XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler;
971  XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler;
972  XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler;
973  XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler;
974  ELEMENT_TYPE * oldDeclElementType = declElementType;
975 
976  void *oldUserData = userData;
977  void *oldHandlerArg = handlerArg;
978  XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
979  XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
980 #ifdef XML_DTD
981  enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing;
982  int oldInEntityValue = prologState.inEntityValue;
983 #endif
984  XML_Bool oldns_triplets = ns_triplets;
985 
986 #ifdef XML_DTD
987  if (!context)
988  newDtd = oldDtd;
989 #endif /* XML_DTD */
990 
991  /* Note that the magical uses of the pre-processor to make field
992  access look more like C++ require that `parser' be overwritten
993  here. This makes this function more painful to follow than it
994  would be otherwise.
995  */
996  if (ns) {
997  XML_Char tmp[2];
998  *tmp = namespaceSeparator;
999  parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1000  }
1001  else {
1002  parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1003  }
1004 
1005  if (!parser)
1006  return NULL;
1007 
1008  startElementHandler = oldStartElementHandler;
1009  endElementHandler = oldEndElementHandler;
1010  characterDataHandler = oldCharacterDataHandler;
1011  processingInstructionHandler = oldProcessingInstructionHandler;
1012  commentHandler = oldCommentHandler;
1013  startCdataSectionHandler = oldStartCdataSectionHandler;
1014  endCdataSectionHandler = oldEndCdataSectionHandler;
1015  defaultHandler = oldDefaultHandler;
1016  unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1017  notationDeclHandler = oldNotationDeclHandler;
1018  startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1019  endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1020  notStandaloneHandler = oldNotStandaloneHandler;
1021  externalEntityRefHandler = oldExternalEntityRefHandler;
1022  skippedEntityHandler = oldSkippedEntityHandler;
1023  unknownEncodingHandler = oldUnknownEncodingHandler;
1024  elementDeclHandler = oldElementDeclHandler;
1025  attlistDeclHandler = oldAttlistDeclHandler;
1026  entityDeclHandler = oldEntityDeclHandler;
1027  xmlDeclHandler = oldXmlDeclHandler;
1028  declElementType = oldDeclElementType;
1029  userData = oldUserData;
1030  if (oldUserData == oldHandlerArg)
1031  handlerArg = userData;
1032  else
1033  handlerArg = parser;
1034  if (oldExternalEntityRefHandlerArg != oldParser)
1035  externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1036  defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1037  ns_triplets = oldns_triplets;
1038  parentParser = oldParser;
1039 #ifdef XML_DTD
1040  paramEntityParsing = oldParamEntityParsing;
1041  prologState.inEntityValue = oldInEntityValue;
1042  if (context) {
1043 #endif /* XML_DTD */
1044  if (!dtdCopy(_dtd, oldDtd, &parser->m_mem)
1045  || !setContext(parser, context)) {
1047  return NULL;
1048  }
1050 #ifdef XML_DTD
1051  }
1052  else {
1053  /* The DTD instance referenced by _dtd is shared between the document's
1054  root parser and external PE parsers, therefore one does not need to
1055  call setContext. In addition, one also *must* not call setContext,
1056  because this would overwrite existing prefix->binding pointers in
1057  _dtd with ones that get destroyed with the external PE parser.
1058  This would leave those prefixes with dangling pointers.
1059  */
1060  isParamEntity = XML_TRUE;
1061  XmlPrologStateInitExternalEntity(&prologState);
1062  processor = externalParEntInitProcessor;
1063  }
1064 #endif /* XML_DTD */
1065  return parser;
1066 }
1067 
1068 static void FASTCALL
1070 {
1071  for (;;) {
1072  BINDING *b = bindings;
1073  if (!b)
1074  break;
1075  bindings = b->nextTagBinding;
1076  FREE(b->uri);
1077  FREE(b);
1078  }
1079 }
1080 
1081 void XMLCALL
1083 {
1084  TAG *tagList;
1085  OPEN_INTERNAL_ENTITY *entityList;
1086  if (parser == NULL)
1087  return;
1088  /* free tagStack and freeTagList */
1089  tagList = tagStack;
1090  for (;;) {
1091  TAG *p;
1092  if (tagList == NULL) {
1093  if (freeTagList == NULL)
1094  break;
1095  tagList = freeTagList;
1096  freeTagList = NULL;
1097  }
1098  p = tagList;
1099  tagList = tagList->parent;
1100  FREE(p->buf);
1101  destroyBindings(p->bindings, parser);
1102  FREE(p);
1103  }
1104  /* free openInternalEntities and freeInternalEntities */
1105  entityList = openInternalEntities;
1106  for (;;) {
1107  OPEN_INTERNAL_ENTITY *openEntity;
1108  if (entityList == NULL) {
1109  if (freeInternalEntities == NULL)
1110  break;
1111  entityList = freeInternalEntities;
1113  }
1114  openEntity = entityList;
1115  entityList = entityList->next;
1116  FREE(openEntity);
1117  }
1118 
1123 #ifdef XML_DTD
1124  /* external parameter entity parsers share the DTD structure
1125  parser->m_dtd with the root parser, so we must not destroy it
1126  */
1127  if (!isParamEntity && _dtd)
1128 #else
1129  if (_dtd)
1130 #endif /* XML_DTD */
1132  FREE((void *)atts);
1134  FREE(buffer);
1135  FREE(dataBuf);
1136  FREE(nsAtts);
1140  FREE(parser);
1141 }
1142 
1143 void XMLCALL
1145 {
1146  handlerArg = parser;
1147 }
1148 
1149 enum XML_Error XMLCALL
1151 {
1152 #ifdef XML_DTD
1153  /* block after XML_Parse()/XML_ParseBuffer() has been called */
1156  useForeignDTD = useDTD;
1157  return XML_ERROR_NONE;
1158 #else
1160 #endif
1161 }
1162 
1163 void XMLCALL
1165 {
1166  /* block after XML_Parse()/XML_ParseBuffer() has been called */
1168  return;
1169  ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1170 }
1171 
1172 void XMLCALL
1174 {
1175  if (handlerArg == userData)
1176  handlerArg = userData = p;
1177  else
1178  userData = p;
1179 }
1180 
1181 enum XML_Status XMLCALL
1183 {
1184  if (p) {
1185  p = poolCopyString(&_dtd->pool, p);
1186  if (!p)
1187  return XML_STATUS_ERROR;
1188  curBase = p;
1189  }
1190  else
1191  curBase = NULL;
1192  return XML_STATUS_OK;
1193 }
1194 
1195 const XML_Char * XMLCALL
1197 {
1198  return curBase;
1199 }
1200 
1201 int XMLCALL
1203 {
1204  return nSpecifiedAtts;
1205 }
1206 
1207 int XMLCALL
1209 {
1210  return idAttIndex;
1211 }
1212 
1213 void XMLCALL
1217 {
1220 }
1221 
1222 void XMLCALL
1226 }
1227 
1228 void XMLCALL
1232 }
1233 
1234 void XMLCALL
1236  XML_CharacterDataHandler handler)
1237 {
1238  characterDataHandler = handler;
1239 }
1240 
1241 void XMLCALL
1244 {
1245  processingInstructionHandler = handler;
1246 }
1247 
1248 void XMLCALL
1250  XML_CommentHandler handler)
1251 {
1252  commentHandler = handler;
1253 }
1254 
1255 void XMLCALL
1259 {
1262 }
1263 
1264 void XMLCALL
1268 }
1269 
1270 void XMLCALL
1274 }
1275 
1276 void XMLCALL
1278  XML_DefaultHandler handler)
1279 {
1280  defaultHandler = handler;
1282 }
1283 
1284 void XMLCALL
1286  XML_DefaultHandler handler)
1287 {
1288  defaultHandler = handler;
1290 }
1291 
1292 void XMLCALL
1296 {
1299 }
1300 
1301 void XMLCALL
1305 }
1306 
1307 void XMLCALL
1311 }
1312 
1313 void XMLCALL
1316 {
1317  unparsedEntityDeclHandler = handler;
1318 }
1319 
1320 void XMLCALL
1322  XML_NotationDeclHandler handler)
1323 {
1324  notationDeclHandler = handler;
1325 }
1326 
1327 void XMLCALL
1331 {
1334 }
1335 
1336 void XMLCALL
1340 }
1341 
1342 void XMLCALL
1346 }
1347 
1348 void XMLCALL
1350  XML_NotStandaloneHandler handler)
1351 {
1352  notStandaloneHandler = handler;
1353 }
1354 
1355 void XMLCALL
1358 {
1359  externalEntityRefHandler = handler;
1360 }
1361 
1362 void XMLCALL
1364 {
1365  if (arg)
1367  else
1369 }
1370 
1371 void XMLCALL
1373  XML_SkippedEntityHandler handler)
1374 {
1375  skippedEntityHandler = handler;
1376 }
1377 
1378 void XMLCALL
1381  void *data)
1382 {
1383  unknownEncodingHandler = handler;
1385 }
1386 
1387 void XMLCALL
1389  XML_ElementDeclHandler eldecl)
1390 {
1391  elementDeclHandler = eldecl;
1392 }
1393 
1394 void XMLCALL
1396  XML_AttlistDeclHandler attdecl)
1397 {
1398  attlistDeclHandler = attdecl;
1399 }
1400 
1401 void XMLCALL
1403  XML_EntityDeclHandler handler)
1404 {
1405  entityDeclHandler = handler;
1406 }
1407 
1408 void XMLCALL
1410  XML_XmlDeclHandler handler) {
1411  xmlDeclHandler = handler;
1412 }
1413 
1414 int XMLCALL
1416  enum XML_ParamEntityParsing peParsing)
1417 {
1418  /* block after XML_Parse()/XML_ParseBuffer() has been called */
1420  return 0;
1421 #ifdef XML_DTD
1422  paramEntityParsing = peParsing;
1423  return 1;
1424 #else
1425  return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1426 #endif
1427 }
1428 
1429 enum XML_Status XMLCALL
1430 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
1431 {
1432  switch (ps_parsing) {
1433  case XML_SUSPENDED:
1435  return XML_STATUS_ERROR;
1436  case XML_FINISHED:
1438  return XML_STATUS_ERROR;
1439  default:
1441  }
1442 
1443  if (len == 0) {
1444  ps_finalBuffer = (XML_Bool)isFinal;
1445  if (!isFinal)
1446  return XML_STATUS_OK;
1449 
1450  /* If data are left over from last buffer, and we now know that these
1451  data are the final chunk of input, then we have to check them again
1452  to detect errors based on that fact.
1453  */
1455 
1456  if (errorCode == XML_ERROR_NONE) {
1457  switch (ps_parsing) {
1458  case XML_SUSPENDED:
1461  return XML_STATUS_SUSPENDED;
1462  case XML_INITIALIZED:
1463  case XML_PARSING:
1465  /* fall through */
1466  default:
1467  return XML_STATUS_OK;
1468  }
1469  }
1472  return XML_STATUS_ERROR;
1473  }
1474 #ifndef XML_CONTEXT_BYTES
1475  else if (bufferPtr == bufferEnd) {
1476  const char *end;
1477  int nLeftOver;
1478  enum XML_Error result;
1480  positionPtr = s;
1481  ps_finalBuffer = (XML_Bool)isFinal;
1482 
1484 
1485  if (errorCode != XML_ERROR_NONE) {
1488  return XML_STATUS_ERROR;
1489  }
1490  else {
1491  switch (ps_parsing) {
1492  case XML_SUSPENDED:
1494  break;
1495  case XML_INITIALIZED:
1496  case XML_PARSING:
1498  if (isFinal) {
1500  return result;
1501  }
1502  }
1503  }
1504 
1506  nLeftOver = s + len - end;
1507  if (nLeftOver) {
1508  if (buffer == NULL || nLeftOver > bufferLim - buffer) {
1509  /* FIXME avoid integer overflow */
1510  char *temp;
1511  temp = (buffer == NULL
1512  ? (char *)MALLOC(len * 2)
1513  : (char *)REALLOC(buffer, len * 2));
1514  if (temp == NULL) {
1516  return XML_STATUS_ERROR;
1517  }
1518  buffer = temp;
1519  if (!buffer) {
1523  return XML_STATUS_ERROR;
1524  }
1525  bufferLim = buffer + len * 2;
1526  }
1527  memcpy(buffer, end, nLeftOver);
1528  }
1529  bufferPtr = buffer;
1530  bufferEnd = buffer + nLeftOver;
1533  eventPtr = bufferPtr;
1535  return result;
1536  }
1537 #endif /* not defined XML_CONTEXT_BYTES */
1538  else {
1539  void *buff = XML_GetBuffer(parser, len);
1540  if (buff == NULL)
1541  return XML_STATUS_ERROR;
1542  else {
1543  memcpy(buff, s, len);
1544  return XML_ParseBuffer(parser, len, isFinal);
1545  }
1546  }
1547 }
1548 
1549 enum XML_Status XMLCALL
1550 XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
1551 {
1552  const char *start;
1554 
1555  switch (ps_parsing) {
1556  case XML_SUSPENDED:
1558  return XML_STATUS_ERROR;
1559  case XML_FINISHED:
1561  return XML_STATUS_ERROR;
1562  default:
1564  }
1565 
1566  start = bufferPtr;
1567  positionPtr = start;
1568  bufferEnd += len;
1571  ps_finalBuffer = (XML_Bool)isFinal;
1572 
1574 
1575  if (errorCode != XML_ERROR_NONE) {
1578  return XML_STATUS_ERROR;
1579  }
1580  else {
1581  switch (ps_parsing) {
1582  case XML_SUSPENDED:
1584  break;
1585  case XML_INITIALIZED:
1586  case XML_PARSING:
1587  if (isFinal) {
1589  return result;
1590  }
1591  default: ; /* should not happen */
1592  }
1593  }
1594 
1597  return result;
1598 }
1599 
1600 void * XMLCALL
1602 {
1603  switch (ps_parsing) {
1604  case XML_SUSPENDED:
1606  return NULL;
1607  case XML_FINISHED:
1609  return NULL;
1610  default: ;
1611  }
1612 
1613  if (len > bufferLim - bufferEnd) {
1614  /* FIXME avoid integer overflow */
1615  int neededSize = len + (int)(bufferEnd - bufferPtr);
1616 #ifdef XML_CONTEXT_BYTES
1617  int keep = (int)(bufferPtr - buffer);
1618 
1619  if (keep > XML_CONTEXT_BYTES)
1621  neededSize += keep;
1622 #endif /* defined XML_CONTEXT_BYTES */
1623  if (neededSize <= bufferLim - buffer) {
1624 #ifdef XML_CONTEXT_BYTES
1625  if (keep < bufferPtr - buffer) {
1626  int offset = (int)(bufferPtr - buffer) - keep;
1628  bufferEnd -= offset;
1629  bufferPtr -= offset;
1630  }
1631 #else
1634  bufferPtr = buffer;
1635 #endif /* not defined XML_CONTEXT_BYTES */
1636  }
1637  else {
1638  char *newBuf;
1639  int bufferSize = (int)(bufferLim - bufferPtr);
1640  if (bufferSize == 0)
1641  bufferSize = INIT_BUFFER_SIZE;
1642  do {
1643  bufferSize *= 2;
1644  } while (bufferSize < neededSize);
1645  newBuf = (char *)MALLOC(bufferSize);
1646  if (newBuf == 0) {
1648  return NULL;
1649  }
1650  bufferLim = newBuf + bufferSize;
1651 #ifdef XML_CONTEXT_BYTES
1652  if (bufferPtr) {
1653  int keep = (int)(bufferPtr - buffer);
1654  if (keep > XML_CONTEXT_BYTES)
1656  memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
1657  FREE(buffer);
1658  buffer = newBuf;
1660  bufferPtr = buffer + keep;
1661  }
1662  else {
1663  bufferEnd = newBuf + (bufferEnd - bufferPtr);
1664  bufferPtr = buffer = newBuf;
1665  }
1666 #else
1667  if (bufferPtr) {
1668  memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
1669  FREE(buffer);
1670  }
1671  bufferEnd = newBuf + (bufferEnd - bufferPtr);
1672  bufferPtr = buffer = newBuf;
1673 #endif /* not defined XML_CONTEXT_BYTES */
1674  }
1675  }
1676  return bufferEnd;
1677 }
1678 
1679 enum XML_Status XMLCALL
1681 {
1682  switch (ps_parsing) {
1683  case XML_SUSPENDED:
1684  if (resumable) {
1686  return XML_STATUS_ERROR;
1687  }
1689  break;
1690  case XML_FINISHED:
1692  return XML_STATUS_ERROR;
1693  default:
1694  if (resumable) {
1695 #ifdef XML_DTD
1696  if (isParamEntity) {
1698  return XML_STATUS_ERROR;
1699  }
1700 #endif
1702  }
1703  else
1705  }
1706  return XML_STATUS_OK;
1707 }
1708 
1709 enum XML_Status XMLCALL
1711 {
1713 
1714  if (ps_parsing != XML_SUSPENDED) {
1716  return XML_STATUS_ERROR;
1717  }
1719 
1721 
1722  if (errorCode != XML_ERROR_NONE) {
1725  return XML_STATUS_ERROR;
1726  }
1727  else {
1728  switch (ps_parsing) {
1729  case XML_SUSPENDED:
1731  break;
1732  case XML_INITIALIZED:
1733  case XML_PARSING:
1734  if (ps_finalBuffer) {
1736  return result;
1737  }
1738  default: ;
1739  }
1740  }
1741 
1744  return result;
1745 }
1746 
1747 void XMLCALL
1749 {
1750  assert(status != NULL);
1751  *status = parser->m_parsingStatus;
1752 }
1753 
1754 enum XML_Error XMLCALL
1756 {
1757  return errorCode;
1758 }
1759 
1762 {
1763  if (eventPtr)
1764  return parseEndByteIndex - (parseEndPtr - eventPtr);
1765  return -1;
1766 }
1767 
1768 int XMLCALL
1770 {
1771  if (eventEndPtr && eventPtr)
1772  return (int)(eventEndPtr - eventPtr);
1773  return 0;
1774 }
1775 
1776 const char * XMLCALL
1778 {
1779 #ifdef XML_CONTEXT_BYTES
1780  if (eventPtr && buffer) {
1781  *offset = (int)(eventPtr - buffer);
1782  *size = (int)(bufferEnd - buffer);
1783  return buffer;
1784  }
1785 #endif /* defined XML_CONTEXT_BYTES */
1786  return (char *) 0;
1787 }
1788 
1791 {
1792  if (eventPtr && eventPtr >= positionPtr) {
1795  }
1796  return position.lineNumber + 1;
1797 }
1798 
1801 {
1802  if (eventPtr && eventPtr >= positionPtr) {
1805  }
1806  return position.columnNumber;
1807 }
1808 
1809 void XMLCALL
1811 {
1812  FREE(model);
1813 }
1814 
1815 void * XMLCALL
1817 {
1818  return MALLOC(size);
1819 }
1820 
1821 void * XMLCALL
1823 {
1824  return REALLOC(ptr, size);
1825 }
1826 
1827 void XMLCALL
1829 {
1830  FREE(ptr);
1831 }
1832 
1833 void XMLCALL
1835 {
1836  if (defaultHandler) {
1840  openInternalEntities->internalEventPtr,
1841  openInternalEntities->internalEventEndPtr);
1842  else
1844  }
1845 }
1846 
1847 const XML_LChar * XMLCALL
1849 {
1850  static const XML_LChar* const message[] = {
1851  0,
1852  XML_L("out of memory"),
1853  XML_L("syntax error"),
1854  XML_L("no element found"),
1855  XML_L("not well-formed (invalid token)"),
1856  XML_L("unclosed token"),
1857  XML_L("partial character"),
1858  XML_L("mismatched tag"),
1859  XML_L("duplicate attribute"),
1860  XML_L("junk after document element"),
1861  XML_L("illegal parameter entity reference"),
1862  XML_L("undefined entity"),
1863  XML_L("recursive entity reference"),
1864  XML_L("asynchronous entity"),
1865  XML_L("reference to invalid character number"),
1866  XML_L("reference to binary entity"),
1867  XML_L("reference to external entity in attribute"),
1868  XML_L("XML or text declaration not at start of entity"),
1869  XML_L("unknown encoding"),
1870  XML_L("encoding specified in XML declaration is incorrect"),
1871  XML_L("unclosed CDATA section"),
1872  XML_L("error in processing external entity reference"),
1873  XML_L("document is not standalone"),
1874  XML_L("unexpected parser state - please send a bug report"),
1875  XML_L("entity declared in parameter entity"),
1876  XML_L("requested feature requires XML_DTD support in Expat"),
1877  XML_L("cannot change setting once parsing has begun"),
1878  XML_L("unbound prefix"),
1879  XML_L("must not undeclare prefix"),
1880  XML_L("incomplete markup in parameter entity"),
1881  XML_L("XML declaration not well-formed"),
1882  XML_L("text declaration not well-formed"),
1883  XML_L("illegal character(s) in public id"),
1884  XML_L("parser suspended"),
1885  XML_L("parser not suspended"),
1886  XML_L("parsing aborted"),
1887  XML_L("parsing finished"),
1888  XML_L("cannot suspend in external parameter entity"),
1889  XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
1890  XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
1891  XML_L("prefix must not be bound to one of the reserved namespace names")
1892  };
1893  if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1894  return message[code];
1895  return NULL;
1896 }
1897 
1898 const XML_LChar * XMLCALL
1900 
1901  /* V1 is used to string-ize the version number. However, it would
1902  string-ize the actual version macro *names* unless we get them
1903  substituted before being passed to V1. CPP is defined to expand
1904  a macro, then rescan for more expansions. Thus, we use V2 to expand
1905  the version macros, then CPP will expand the resulting V1() macro
1906  with the correct numerals. */
1907  /* ### I'm assuming cpp is portable in this respect... */
1908 
1909 #define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
1910 #define V2(a,b,c) XML_L("expat_")V1(a,b,c)
1911 
1913 
1914 #undef V1
1915 #undef V2
1916 }
1917 
1920 {
1922 
1923  version.major = XML_MAJOR_VERSION;
1924  version.minor = XML_MINOR_VERSION;
1925  version.micro = XML_MICRO_VERSION;
1926 
1927  return version;
1928 }
1929 
1930 const XML_Feature * XMLCALL
1932 {
1933  static const XML_Feature features[] = {
1934  {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
1935  sizeof(XML_Char)},
1936  {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
1937  sizeof(XML_LChar)},
1938 #ifdef XML_UNICODE
1939  {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
1940 #endif
1941 #ifdef XML_UNICODE_WCHAR_T
1942  {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
1943 #endif
1944 #ifdef XML_DTD
1945  {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
1946 #endif
1947 #ifdef XML_CONTEXT_BYTES
1948  {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
1950 #endif
1951 #ifdef XML_MIN_SIZE
1952  {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
1953 #endif
1954 #ifdef XML_NS
1955  {XML_FEATURE_NS, XML_L("XML_NS"), 0},
1956 #endif
1957 #ifdef XML_LARGE_SIZE
1958  {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
1959 #endif
1960  {XML_FEATURE_END, NULL, 0}
1961  };
1962 
1963  return features;
1964 }
1965 
1966 /* Initially tag->rawName always points into the parse buffer;
1967  for those TAG instances opened while the current parse buffer was
1968  processed, and not yet closed, we need to store tag->rawName in a more
1969  permanent location, since the parse buffer is about to be discarded.
1970 */
1971 static XML_Bool
1973 {
1974  TAG *tag = tagStack;
1975  while (tag) {
1976  int bufSize;
1977  int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
1978  char *rawNameBuf = tag->buf + nameLen;
1979  /* Stop if already stored. Since tagStack is a stack, we can stop
1980  at the first entry that has already been copied; everything
1981  below it in the stack is already been accounted for in a
1982  previous call to this function.
1983  */
1984  if (tag->rawName == rawNameBuf)
1985  break;
1986  /* For re-use purposes we need to ensure that the
1987  size of tag->buf is a multiple of sizeof(XML_Char).
1988  */
1989  bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
1990  if (bufSize > tag->bufEnd - tag->buf) {
1991  char *temp = (char *)REALLOC(tag->buf, bufSize);
1992  if (temp == NULL)
1993  return XML_FALSE;
1994  /* if tag->name.str points to tag->buf (only when namespace
1995  processing is off) then we have to update it
1996  */
1997  if (tag->name.str == (XML_Char *)tag->buf)
1998  tag->name.str = (XML_Char *)temp;
1999  /* if tag->name.localPart is set (when namespace processing is on)
2000  then update it as well, since it will always point into tag->buf
2001  */
2002  if (tag->name.localPart)
2004  (XML_Char *)tag->buf);
2005  tag->buf = temp;
2006  tag->bufEnd = temp + bufSize;
2007  rawNameBuf = temp + nameLen;
2008  }
2009  memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2010  tag->rawName = rawNameBuf;
2011  tag = tag->parent;
2012  }
2013  return XML_TRUE;
2014 }
2015 
2016 static enum XML_Error PTRCALL
2018  const char *start,
2019  const char *end,
2020  const char **endPtr)
2021 {
2023  endPtr, (XML_Bool)!ps_finalBuffer);
2024  if (result == XML_ERROR_NONE) {
2025  if (!storeRawNames(parser))
2026  return XML_ERROR_NO_MEMORY;
2027  }
2028  return result;
2029 }
2030 
2031 static enum XML_Error PTRCALL
2033  const char *start,
2034  const char *end,
2035  const char **endPtr)
2036 {
2038  if (result != XML_ERROR_NONE)
2039  return result;
2041  return externalEntityInitProcessor2(parser, start, end, endPtr);
2042 }
2043 
2044 static enum XML_Error PTRCALL
2046  const char *start,
2047  const char *end,
2048  const char **endPtr)
2049 {
2050  const char *next = start; /* XmlContentTok doesn't always set the last arg */
2051  int tok = XmlContentTok(encoding, start, end, &next);
2052  switch (tok) {
2053  case XML_TOK_BOM:
2054  /* If we are at the end of the buffer, this would cause the next stage,
2055  i.e. externalEntityInitProcessor3, to pass control directly to
2056  doContent (by detecting XML_TOK_NONE) without processing any xml text
2057  declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2058  */
2059  if (next == end && !ps_finalBuffer) {
2060  *endPtr = next;
2061  return XML_ERROR_NONE;
2062  }
2063  start = next;
2064  break;
2065  case XML_TOK_PARTIAL:
2066  if (!ps_finalBuffer) {
2067  *endPtr = start;
2068  return XML_ERROR_NONE;
2069  }
2070  eventPtr = start;
2071  return XML_ERROR_UNCLOSED_TOKEN;
2072  case XML_TOK_PARTIAL_CHAR:
2073  if (!ps_finalBuffer) {
2074  *endPtr = start;
2075  return XML_ERROR_NONE;
2076  }
2077  eventPtr = start;
2078  return XML_ERROR_PARTIAL_CHAR;
2079  }
2081  return externalEntityInitProcessor3(parser, start, end, endPtr);
2082 }
2083 
2084 static enum XML_Error PTRCALL
2086  const char *start,
2087  const char *end,
2088  const char **endPtr)
2089 {
2090  int tok;
2091  const char *next = start; /* XmlContentTok doesn't always set the last arg */
2092  eventPtr = start;
2094  eventEndPtr = next;
2095 
2096  switch (tok) {
2097  case XML_TOK_XML_DECL:
2098  {
2099  enum XML_Error result;
2101  if (result != XML_ERROR_NONE)
2102  return result;
2103  switch (ps_parsing) {
2104  case XML_SUSPENDED:
2105  *endPtr = next;
2106  return XML_ERROR_NONE;
2107  case XML_FINISHED:
2108  return XML_ERROR_ABORTED;
2109  default:
2110  start = next;
2111  }
2112  }
2113  break;
2114  case XML_TOK_PARTIAL:
2115  if (!ps_finalBuffer) {
2116  *endPtr = start;
2117  return XML_ERROR_NONE;
2118  }
2119  return XML_ERROR_UNCLOSED_TOKEN;
2120  case XML_TOK_PARTIAL_CHAR:
2121  if (!ps_finalBuffer) {
2122  *endPtr = start;
2123  return XML_ERROR_NONE;
2124  }
2125  return XML_ERROR_PARTIAL_CHAR;
2126  }
2128  tagLevel = 1;
2129  return externalEntityContentProcessor(parser, start, end, endPtr);
2130 }
2131 
2132 static enum XML_Error PTRCALL
2134  const char *start,
2135  const char *end,
2136  const char **endPtr)
2137 {
2139  endPtr, (XML_Bool)!ps_finalBuffer);
2140  if (result == XML_ERROR_NONE) {
2141  if (!storeRawNames(parser))
2142  return XML_ERROR_NO_MEMORY;
2143  }
2144  return result;
2145 }
2146 
2147 static enum XML_Error
2149  int startTagLevel,
2150  const ENCODING *enc,
2151  const char *s,
2152  const char *end,
2153  const char **nextPtr,
2154  XML_Bool haveMore)
2155 {
2156  /* save one level of indirection */
2157  DTD * const dtd = _dtd;
2158 
2159  const char **eventPP;
2160  const char **eventEndPP;
2161  if (enc == encoding) {
2162  eventPP = &eventPtr;
2163  eventEndPP = &eventEndPtr;
2164  }
2165  else {
2166  eventPP = &(openInternalEntities->internalEventPtr);
2167  eventEndPP = &(openInternalEntities->internalEventEndPtr);
2168  }
2169  *eventPP = s;
2170 
2171  for (;;) {
2172  const char *next = s; /* XmlContentTok doesn't always set the last arg */
2173  int tok = XmlContentTok(enc, s, end, &next);
2174  *eventEndPP = next;
2175  switch (tok) {
2176  case XML_TOK_TRAILING_CR:
2177  if (haveMore) {
2178  *nextPtr = s;
2179  return XML_ERROR_NONE;
2180  }
2181  *eventEndPP = end;
2182  if (characterDataHandler) {
2183  XML_Char c = 0xA;
2185  }
2186  else if (defaultHandler)
2188  /* We are at the end of the final buffer, should we check for
2189  XML_SUSPENDED, XML_FINISHED?
2190  */
2191  if (startTagLevel == 0)
2192  return XML_ERROR_NO_ELEMENTS;
2193  if (tagLevel != startTagLevel)
2194  return XML_ERROR_ASYNC_ENTITY;
2195  *nextPtr = end;
2196  return XML_ERROR_NONE;
2197  case XML_TOK_NONE:
2198  if (haveMore) {
2199  *nextPtr = s;
2200  return XML_ERROR_NONE;
2201  }
2202  if (startTagLevel > 0) {
2203  if (tagLevel != startTagLevel)
2204  return XML_ERROR_ASYNC_ENTITY;
2205  *nextPtr = s;
2206  return XML_ERROR_NONE;
2207  }
2208  return XML_ERROR_NO_ELEMENTS;
2209  case XML_TOK_INVALID:
2210  *eventPP = next;
2211  return XML_ERROR_INVALID_TOKEN;
2212  case XML_TOK_PARTIAL:
2213  if (haveMore) {
2214  *nextPtr = s;
2215  return XML_ERROR_NONE;
2216  }
2217  return XML_ERROR_UNCLOSED_TOKEN;
2218  case XML_TOK_PARTIAL_CHAR:
2219  if (haveMore) {
2220  *nextPtr = s;
2221  return XML_ERROR_NONE;
2222  }
2223  return XML_ERROR_PARTIAL_CHAR;
2224  case XML_TOK_ENTITY_REF:
2225  {
2226  const XML_Char *name;
2227  ENTITY *entity;
2229  s + enc->minBytesPerChar,
2230  next - enc->minBytesPerChar);
2231  if (ch) {
2234  else if (defaultHandler)
2236  break;
2237  }
2238  name = poolStoreString(&dtd->pool, enc,
2239  s + enc->minBytesPerChar,
2240  next - enc->minBytesPerChar);
2241  if (!name)
2242  return XML_ERROR_NO_MEMORY;
2243  entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0);
2244  poolDiscard(&dtd->pool);
2245  /* First, determine if a check for an existing declaration is needed;
2246  if yes, check that the entity exists, and that it is internal,
2247  otherwise call the skipped entity or default handler.
2248  */
2249  if (!dtd->hasParamEntityRefs || dtd->standalone) {
2250  if (!entity)
2252  else if (!entity->is_internal)
2254  }
2255  else if (!entity) {
2258  else if (defaultHandler)
2260  break;
2261  }
2262  if (entity->open)
2264  if (entity->notation)
2266  if (entity->textPtr) {
2267  enum XML_Error result;
2271  else if (defaultHandler)
2273  break;
2274  }
2276  if (result != XML_ERROR_NONE)
2277  return result;
2278  }
2279  else if (externalEntityRefHandler) {
2280  const XML_Char *context;
2281  entity->open = XML_TRUE;
2283  entity->open = XML_FALSE;
2284  if (!context)
2285  return XML_ERROR_NO_MEMORY;
2287  context,
2288  entity->base,
2289  entity->systemId,
2290  entity->publicId))
2293  }
2294  else if (defaultHandler)
2296  break;
2297  }
2299  /* fall through */
2301  {
2302  TAG *tag;
2303  enum XML_Error result;
2304  XML_Char *toPtr;
2305  if (freeTagList) {
2306  tag = freeTagList;
2307  freeTagList = freeTagList->parent;
2308  }
2309  else {
2310  tag = (TAG *)MALLOC(sizeof(TAG));
2311  if (!tag)
2312  return XML_ERROR_NO_MEMORY;
2313  tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2314  if (!tag->buf) {
2315  FREE(tag);
2316  return XML_ERROR_NO_MEMORY;
2317  }
2319  }
2320  tag->bindings = NULL;
2321  tag->parent = tagStack;
2322  tagStack = tag;
2323  tag->name.localPart = NULL;
2324  tag->name.prefix = NULL;
2325  tag->rawName = s + enc->minBytesPerChar;
2327  ++tagLevel;
2328  {
2329  const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2330  const char *fromPtr = tag->rawName;
2331  toPtr = (XML_Char *)tag->buf;
2332  for (;;) {
2333  int bufSize;
2334  int convLen;
2335  XmlConvert(enc,
2336  &fromPtr, rawNameEnd,
2337  (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
2338  convLen = (int)(toPtr - (XML_Char *)tag->buf);
2339  if (fromPtr == rawNameEnd) {
2340  tag->name.strLen = convLen;
2341  break;
2342  }
2343  bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2344  {
2345  char *temp = (char *)REALLOC(tag->buf, bufSize);
2346  if (temp == NULL)
2347  return XML_ERROR_NO_MEMORY;
2348  tag->buf = temp;
2349  tag->bufEnd = temp + bufSize;
2350  toPtr = (XML_Char *)temp + convLen;
2351  }
2352  }
2353  }
2354  tag->name.str = (XML_Char *)tag->buf;
2355  *toPtr = XML_T('\0');
2356  result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2357  if (result)
2358  return result;
2359  if (startElementHandler)
2361  (const XML_Char **)atts);
2362  else if (defaultHandler)
2364  poolClear(&tempPool);
2365  break;
2366  }
2368  /* fall through */
2370  {
2371  const char *rawName = s + enc->minBytesPerChar;
2372  enum XML_Error result;
2373  BINDING *bindings = NULL;
2374  XML_Bool noElmHandlers = XML_TRUE;
2375  TAG_NAME name;
2376  name.str = poolStoreString(&tempPool, enc, rawName,
2377  rawName + XmlNameLength(enc, rawName));
2378  if (!name.str)
2379  return XML_ERROR_NO_MEMORY;
2380  poolFinish(&tempPool);
2381  result = storeAtts(parser, enc, s, &name, &bindings);
2382  if (result)
2383  return result;
2384  poolFinish(&tempPool);
2385  if (startElementHandler) {
2386  startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2387  noElmHandlers = XML_FALSE;
2388  }
2389  if (endElementHandler) {
2390  if (startElementHandler)
2391  *eventPP = *eventEndPP;
2393  noElmHandlers = XML_FALSE;
2394  }
2395  if (noElmHandlers && defaultHandler)
2397  poolClear(&tempPool);
2398  while (bindings) {
2399  BINDING *b = bindings;
2401  endNamespaceDeclHandler(handlerArg, b->prefix->name);
2402  bindings = bindings->nextTagBinding;
2403  b->nextTagBinding = freeBindingList;
2404  freeBindingList = b;
2405  b->prefix->binding = b->prevPrefixBinding;
2406  }
2407  }
2408  if (tagLevel == 0)
2409  return epilogProcessor(parser, next, end, nextPtr);
2410  break;
2411  case XML_TOK_END_TAG:
2412  if (tagLevel == startTagLevel)
2413  return XML_ERROR_ASYNC_ENTITY;
2414  else {
2415  int len;
2416  const char *rawName;
2417  TAG *tag = tagStack;
2418  tagStack = tag->parent;
2419  tag->parent = freeTagList;
2420  freeTagList = tag;
2421  rawName = s + enc->minBytesPerChar*2;
2422  len = XmlNameLength(enc, rawName);
2423  if (len != tag->rawNameLength
2424  || memcmp(tag->rawName, rawName, len) != 0) {
2425  *eventPP = rawName;
2426  return XML_ERROR_TAG_MISMATCH;
2427  }
2428  --tagLevel;
2429  if (endElementHandler) {
2430  const XML_Char *localPart;
2431  const XML_Char *prefix;
2432  XML_Char *uri;
2433  localPart = tag->name.localPart;
2434  if (ns && localPart) {
2435  /* localPart and prefix may have been overwritten in
2436  tag->name.str, since this points to the binding->uri
2437  buffer which gets re-used; so we have to add them again
2438  */
2439  uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2440  /* don't need to check for space - already done in storeAtts() */
2441  while (*localPart) *uri++ = *localPart++;
2442  prefix = (XML_Char *)tag->name.prefix;
2443  if (ns_triplets && prefix) {
2444  *uri++ = namespaceSeparator;
2445  while (*prefix) *uri++ = *prefix++;
2446  }
2447  *uri = XML_T('\0');
2448  }
2450  }
2451  else if (defaultHandler)
2453  while (tag->bindings) {
2454  BINDING *b = tag->bindings;
2456  endNamespaceDeclHandler(handlerArg, b->prefix->name);
2458  b->nextTagBinding = freeBindingList;
2459  freeBindingList = b;
2460  b->prefix->binding = b->prevPrefixBinding;
2461  }
2462  if (tagLevel == 0)
2463  return epilogProcessor(parser, next, end, nextPtr);
2464  }
2465  break;
2466  case XML_TOK_CHAR_REF:
2467  {
2468  int n = XmlCharRefNumber(enc, s);
2469  if (n < 0)
2470  return XML_ERROR_BAD_CHAR_REF;
2471  if (characterDataHandler) {
2474  }
2475  else if (defaultHandler)
2477  }
2478  break;
2479  case XML_TOK_XML_DECL:
2481  case XML_TOK_DATA_NEWLINE:
2482  if (characterDataHandler) {
2483  XML_Char c = 0xA;
2485  }
2486  else if (defaultHandler)
2488  break;
2490  {
2491  enum XML_Error result;
2494 #if 0
2495  /* Suppose you doing a transformation on a document that involves
2496  changing only the character data. You set up a defaultHandler
2497  and a characterDataHandler. The defaultHandler simply copies
2498  characters through. The characterDataHandler does the
2499  transformation and writes the characters out escaping them as
2500  necessary. This case will fail to work if we leave out the
2501  following two lines (because & and < inside CDATA sections will
2502  be incorrectly escaped).
2503 
2504  However, now we have a start/endCdataSectionHandler, so it seems
2505  easier to let the user deal with this.
2506  */
2507  else if (characterDataHandler)
2509 #endif
2510  else if (defaultHandler)
2512  result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2513  if (result != XML_ERROR_NONE)
2514  return result;
2515  else if (!next) {
2517  return result;
2518  }
2519  }
2520  break;
2521  case XML_TOK_TRAILING_RSQB:
2522  if (haveMore) {
2523  *nextPtr = s;
2524  return XML_ERROR_NONE;
2525  }
2526  if (characterDataHandler) {
2527  if (MUST_CONVERT(enc, s)) {
2528  ICHAR *dataPtr = (ICHAR *)dataBuf;
2529  XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2531  (int)(dataPtr - (ICHAR *)dataBuf));
2532  }
2533  else
2535  (XML_Char *)s,
2536  (int)((XML_Char *)end - (XML_Char *)s));
2537  }
2538  else if (defaultHandler)
2540  /* We are at the end of the final buffer, should we check for
2541  XML_SUSPENDED, XML_FINISHED?
2542  */
2543  if (startTagLevel == 0) {
2544  *eventPP = end;
2545  return XML_ERROR_NO_ELEMENTS;
2546  }
2547  if (tagLevel != startTagLevel) {
2548  *eventPP = end;
2549  return XML_ERROR_ASYNC_ENTITY;
2550  }
2551  *nextPtr = end;
2552  return XML_ERROR_NONE;
2553  case XML_TOK_DATA_CHARS:
2554  {
2556  if (charDataHandler) {
2557  if (MUST_CONVERT(enc, s)) {
2558  for (;;) {
2559  ICHAR *dataPtr = (ICHAR *)dataBuf;
2560  XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
2561  *eventEndPP = s;
2562  charDataHandler(handlerArg, dataBuf,
2563  (int)(dataPtr - (ICHAR *)dataBuf));
2564  if (s == next)
2565  break;
2566  *eventPP = s;
2567  }
2568  }
2569  else
2570  charDataHandler(handlerArg,
2571  (XML_Char *)s,
2572  (int)((XML_Char *)next - (XML_Char *)s));
2573  }
2574  else if (defaultHandler)
2576  }
2577  break;
2578  case XML_TOK_PI:
2580  return XML_ERROR_NO_MEMORY;
2581  break;
2582  case XML_TOK_COMMENT:
2583  if (!reportComment(parser, enc, s, next))
2584  return XML_ERROR_NO_MEMORY;
2585  break;
2586  default:
2587  if (defaultHandler)
2589  break;
2590  }
2591  *eventPP = s = next;
2592  switch (ps_parsing) {
2593  case XML_SUSPENDED:
2594  *nextPtr = next;
2595  return XML_ERROR_NONE;
2596  case XML_FINISHED:
2597  return XML_ERROR_ABORTED;
2598  default: ;
2599  }
2600  }
2601  /* not reached */
2602 }
2603 
2604 /* Precondition: all arguments must be non-NULL;
2605  Purpose:
2606  - normalize attributes
2607  - check attributes for well-formedness
2608  - generate namespace aware attribute names (URI, prefix)
2609  - build list of attributes for startElementHandler
2610  - default attributes
2611  - process namespace declarations (check and report them)
2612  - generate namespace aware element name (URI, prefix)
2613 */
2614 static enum XML_Error
2616  const char *attStr, TAG_NAME *tagNamePtr,
2617  BINDING **bindingsPtr)
2618 {
2619  DTD * const dtd = _dtd; /* save one level of indirection */
2620  ELEMENT_TYPE *elementType;
2621  int nDefaultAtts;
2622  const XML_Char **appAtts; /* the attribute list for the application */
2623  int attIndex = 0;
2624  int prefixLen;
2625  int i;
2626  int n;
2627  XML_Char *uri;
2628  int nPrefixes = 0;
2629  BINDING *binding;
2630  const XML_Char *localPart;
2631 
2632  /* lookup the element type name */
2633  elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, tagNamePtr->str,0);
2634  if (!elementType) {
2635  const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
2636  if (!name)
2637  return XML_ERROR_NO_MEMORY;
2638  elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, name,
2639  sizeof(ELEMENT_TYPE));
2640  if (!elementType)
2641  return XML_ERROR_NO_MEMORY;
2642  if (ns && !setElementTypePrefix(parser, elementType))
2643  return XML_ERROR_NO_MEMORY;
2644  }
2645  nDefaultAtts = elementType->nDefaultAtts;
2646 
2647  /* get the attributes from the tokenizer */
2648  n = XmlGetAttributes(enc, attStr, attsSize, atts);
2649  if (n + nDefaultAtts > attsSize) {
2650  int oldAttsSize = attsSize;
2651  ATTRIBUTE *temp;
2652  attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
2653  temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
2654  if (temp == NULL)
2655  return XML_ERROR_NO_MEMORY;
2656  atts = temp;
2657  if (n > oldAttsSize)
2658  XmlGetAttributes(enc, attStr, n, atts);
2659  }
2660 
2661  appAtts = (const XML_Char **)atts;
2662  for (i = 0; i < n; i++) {
2663  /* add the name and value to the attribute list */
2665  atts[i].name
2666  + XmlNameLength(enc, atts[i].name));
2667  if (!attId)
2668  return XML_ERROR_NO_MEMORY;
2669  /* Detect duplicate attributes by their QNames. This does not work when
2670  namespace processing is turned on and different prefixes for the same
2671  namespace are used. For this case we have a check further down.
2672  */
2673  if ((attId->name)[-1]) {
2674  if (enc == encoding)
2675  eventPtr = atts[i].name;
2677  }
2678  (attId->name)[-1] = 1;
2679  appAtts[attIndex++] = attId->name;
2680  if (!atts[i].normalized) {
2681  enum XML_Error result;
2682  XML_Bool isCdata = XML_TRUE;
2683 
2684  /* figure out whether declared as other than CDATA */
2685  if (attId->maybeTokenized) {
2686  int j;
2687  for (j = 0; j < nDefaultAtts; j++) {
2688  if (attId == elementType->defaultAtts[j].id) {
2689  isCdata = elementType->defaultAtts[j].isCdata;
2690  break;
2691  }
2692  }
2693  }
2694 
2695  /* normalize the attribute value */
2696  result = storeAttributeValue(parser, enc, isCdata,
2697  atts[i].valuePtr, atts[i].valueEnd,
2698  &tempPool);
2699  if (result)
2700  return result;
2701  appAtts[attIndex] = poolStart(&tempPool);
2702  poolFinish(&tempPool);
2703  }
2704  else {
2705  /* the value did not need normalizing */
2706  appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
2707  atts[i].valueEnd);
2708  if (appAtts[attIndex] == 0)
2709  return XML_ERROR_NO_MEMORY;
2710  poolFinish(&tempPool);
2711  }
2712  /* handle prefixed attribute names */
2713  if (attId->prefix) {
2714  if (attId->xmlns) {
2715  /* deal with namespace declarations here */
2716  enum XML_Error result = addBinding(parser, attId->prefix, attId,
2717  appAtts[attIndex], bindingsPtr);
2718  if (result)
2719  return result;
2720  --attIndex;
2721  }
2722  else {
2723  /* deal with other prefixed names later */
2724  attIndex++;
2725  nPrefixes++;
2726  (attId->name)[-1] = 2;
2727  }
2728  }
2729  else
2730  attIndex++;
2731  }
2732 
2733  /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
2734  nSpecifiedAtts = attIndex;
2735  if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
2736  for (i = 0; i < attIndex; i += 2)
2737  if (appAtts[i] == elementType->idAtt->name) {
2738  idAttIndex = i;
2739  break;
2740  }
2741  }
2742  else
2743  idAttIndex = -1;
2744 
2745  /* do attribute defaulting */
2746  for (i = 0; i < nDefaultAtts; i++) {
2747  const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
2748  if (!(da->id->name)[-1] && da->value) {
2749  if (da->id->prefix) {
2750  if (da->id->xmlns) {
2751  enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
2752  da->value, bindingsPtr);
2753  if (result)
2754  return result;
2755  }
2756  else {
2757  (da->id->name)[-1] = 2;
2758  nPrefixes++;
2759  appAtts[attIndex++] = da->id->name;
2760  appAtts[attIndex++] = da->value;
2761  }
2762  }
2763  else {
2764  (da->id->name)[-1] = 1;
2765  appAtts[attIndex++] = da->id->name;
2766  appAtts[attIndex++] = da->value;
2767  }
2768  }
2769  }
2770  appAtts[attIndex] = 0;
2771 
2772  /* expand prefixed attribute names, check for duplicates,
2773  and clear flags that say whether attributes were specified */
2774  i = 0;
2775  if (nPrefixes) {
2776  int j; /* hash table index */
2777  unsigned long version = nsAttsVersion;
2778  int nsAttsSize = (int)1 << nsAttsPower;
2779  /* size of hash table must be at least 2 * (# of prefixed attributes) */
2780  if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
2781  NS_ATT *temp;
2782  /* hash table size must also be a power of 2 and >= 8 */
2783  while (nPrefixes >> nsAttsPower++);
2784  if (nsAttsPower < 3)
2785  nsAttsPower = 3;
2786  nsAttsSize = (int)1 << nsAttsPower;
2787  temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
2788  if (!temp)
2789  return XML_ERROR_NO_MEMORY;
2790  nsAtts = temp;
2791  version = 0; /* force re-initialization of nsAtts hash table */
2792  }
2793  /* using a version flag saves us from initializing nsAtts every time */
2794  if (!version) { /* initialize version flags when version wraps around */
2796  for (j = nsAttsSize; j != 0; )
2797  nsAtts[--j].version = version;
2798  }
2799  nsAttsVersion = --version;
2800 
2801  /* expand prefixed names and check for duplicates */
2802  for (; i < attIndex; i += 2) {
2803  const XML_Char *s = appAtts[i];
2804  if (s[-1] == 2) { /* prefixed */
2805  ATTRIBUTE_ID *id;
2806  const BINDING *b;
2807  unsigned long uriHash = 0;
2808  ((XML_Char *)s)[-1] = 0; /* clear flag */
2809  id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, s, 0);
2810  b = id->prefix->binding;
2811  if (!b)
2812  return XML_ERROR_UNBOUND_PREFIX;
2813 
2814  /* as we expand the name we also calculate its hash value */
2815  for (j = 0; j < b->uriLen; j++) {
2816  const XML_Char c = b->uri[j];
2817  if (!poolAppendChar(&tempPool, c))
2818  return XML_ERROR_NO_MEMORY;
2819  uriHash = CHAR_HASH(uriHash, c);
2820  }
2821  while (*s++ != XML_T(ASCII_COLON))
2822  ;
2823  do { /* copies null terminator */
2824  const XML_Char c = *s;
2825  if (!poolAppendChar(&tempPool, *s))
2826  return XML_ERROR_NO_MEMORY;
2827  uriHash = CHAR_HASH(uriHash, c);
2828  } while (*s++);
2829 
2830  { /* Check hash table for duplicate of expanded name (uriName).
2831  Derived from code in lookup(HASH_TABLE *table, ...).
2832  */
2833  unsigned char step = 0;
2834  unsigned long mask = nsAttsSize - 1;
2835  j = uriHash & mask; /* index into hash table */
2836  while (nsAtts[j].version == version) {
2837  /* for speed we compare stored hash values first */
2838  if (uriHash == nsAtts[j].hash) {
2839  const XML_Char *s1 = poolStart(&tempPool);
2840  const XML_Char *s2 = nsAtts[j].uriName;
2841  /* s1 is null terminated, but not s2 */
2842  for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
2843  if (*s1 == 0)
2845  }
2846  if (!step)
2847  step = PROBE_STEP(uriHash, mask, nsAttsPower);
2848  j < step ? (j += nsAttsSize - step) : (j -= step);
2849  }
2850  }
2851 
2852  if (ns_triplets) { /* append namespace separator and prefix */
2853  tempPool.ptr[-1] = namespaceSeparator;
2854  s = b->prefix->name;
2855  do {
2856  if (!poolAppendChar(&tempPool, *s))
2857  return XML_ERROR_NO_MEMORY;
2858  } while (*s++);
2859  }
2860 
2861  /* store expanded name in attribute list */
2862  s = poolStart(&tempPool);
2863  poolFinish(&tempPool);
2864  appAtts[i] = s;
2865 
2866  /* fill empty slot with new version, uriName and hash value */
2867  nsAtts[j].version = version;
2868  nsAtts[j].hash = uriHash;
2869  nsAtts[j].uriName = s;
2870 
2871  if (!--nPrefixes) {
2872  i += 2;
2873  break;
2874  }
2875  }
2876  else /* not prefixed */
2877  ((XML_Char *)s)[-1] = 0; /* clear flag */
2878  }
2879  }
2880  /* clear flags for the remaining attributes */
2881  for (; i < attIndex; i += 2)
2882  ((XML_Char *)(appAtts[i]))[-1] = 0;
2883  for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
2884  binding->attId->name[-1] = 0;
2885 
2886  if (!ns)
2887  return XML_ERROR_NONE;
2888 
2889  /* expand the element type name */
2890  if (elementType->prefix) {
2891  binding = elementType->prefix->binding;
2892  if (!binding)
2893  return XML_ERROR_UNBOUND_PREFIX;
2894  localPart = tagNamePtr->str;
2895  while (*localPart++ != XML_T(ASCII_COLON))
2896  ;
2897  }
2898  else if (dtd->defaultPrefix.binding) {
2899  binding = dtd->defaultPrefix.binding;
2900  localPart = tagNamePtr->str;
2901  }
2902  else
2903  return XML_ERROR_NONE;
2904  prefixLen = 0;
2905  if (ns_triplets && binding->prefix->name) {
2906  for (; binding->prefix->name[prefixLen++];)
2907  ; /* prefixLen includes null terminator */
2908  }
2909  tagNamePtr->localPart = localPart;
2910  tagNamePtr->uriLen = binding->uriLen;
2911  tagNamePtr->prefix = binding->prefix->name;
2912  tagNamePtr->prefixLen = prefixLen;
2913  for (i = 0; localPart[i++];)
2914  ; /* i includes null terminator */
2915  n = i + binding->uriLen + prefixLen;
2916  if (n > binding->uriAlloc) {
2917  TAG *p;
2918  uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
2919  if (!uri)
2920  return XML_ERROR_NO_MEMORY;
2922  memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
2923  for (p = tagStack; p; p = p->parent)
2924  if (p->name.str == binding->uri)
2925  p->name.str = uri;
2926  FREE(binding->uri);
2927  binding->uri = uri;
2928  }
2929  /* if namespaceSeparator != '\0' then uri includes it already */
2930  uri = binding->uri + binding->uriLen;
2931  memcpy(uri, localPart, i * sizeof(XML_Char));
2932  /* we always have a namespace separator between localPart and prefix */
2933  if (prefixLen) {
2934  uri += i - 1;
2935  *uri = namespaceSeparator; /* replace null terminator */
2936  memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
2937  }
2938  tagNamePtr->str = binding->uri;
2939  return XML_ERROR_NONE;
2940 }
2941 
2942 /* addBinding() overwrites the value of prefix->binding without checking.
2943  Therefore one must keep track of the old value outside of addBinding().
2944 */
2945 static enum XML_Error
2947  const XML_Char *uri, BINDING **bindingsPtr)
2948 {
2949  static const XML_Char xmlNamespace[] = {
2955  ASCII_e, '\0'
2956  };
2957  static const int xmlLen =
2958  (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
2959  static const XML_Char xmlnsNamespace[] = {
2964  ASCII_SLASH, '\0'
2965  };
2966  static const int xmlnsLen =
2967  (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
2968 
2969  XML_Bool mustBeXML = XML_FALSE;
2970  XML_Bool isXML = XML_TRUE;
2971  XML_Bool isXMLNS = XML_TRUE;
2972 
2973  BINDING *b;
2974  int len;
2975 
2976  /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
2977  if (*uri == XML_T('\0') && prefix->name)
2979 
2980  if (prefix->name
2981  && prefix->name[0] == XML_T(ASCII_x)
2982  && prefix->name[1] == XML_T(ASCII_m)
2983  && prefix->name[2] == XML_T(ASCII_l)) {
2984 
2985  /* Not allowed to bind xmlns */
2986  if (prefix->name[3] == XML_T(ASCII_n)
2987  && prefix->name[4] == XML_T(ASCII_s)
2988  && prefix->name[5] == XML_T('\0'))
2990 
2991  if (prefix->name[3] == XML_T('\0'))
2992  mustBeXML = XML_TRUE;
2993  }
2994 
2995  for (len = 0; uri[len]; len++) {
2996  if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
2997  isXML = XML_FALSE;
2998 
2999  if (!mustBeXML && isXMLNS
3000  && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3001  isXMLNS = XML_FALSE;
3002  }
3003  isXML = isXML && len == xmlLen;
3004  isXMLNS = isXMLNS && len == xmlnsLen;
3005 
3006  if (mustBeXML != isXML)
3007  return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3009 
3010  if (isXMLNS)
3012 
3013  if (namespaceSeparator)
3014  len++;
3015  if (freeBindingList) {
3016  b = freeBindingList;
3017  if (len > b->uriAlloc) {
3018  XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3019  sizeof(XML_Char) * (len + EXPAND_SPARE));
3020  if (temp == NULL)
3021  return XML_ERROR_NO_MEMORY;
3022  b->uri = temp;
3023  b->uriAlloc = len + EXPAND_SPARE;
3024  }
3025  freeBindingList = b->nextTagBinding;
3026  }
3027  else {
3028  b = (BINDING *)MALLOC(sizeof(BINDING));
3029  if (!b)
3030  return XML_ERROR_NO_MEMORY;
3031  b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
3032  if (!b->uri) {
3033  FREE(b);
3034  return XML_ERROR_NO_MEMORY;
3035  }
3036  b->uriAlloc = len + EXPAND_SPARE;
3037  }
3038  b->uriLen = len;
3039  memcpy(b->uri, uri, len * sizeof(XML_Char));
3040  if (namespaceSeparator)
3041  b->uri[len - 1] = namespaceSeparator;
3042  b->prefix = prefix;
3043  b->attId = attId;
3044  b->prevPrefixBinding = prefix->binding;
3045  /* NULL binding when default namespace undeclared */
3046  if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3047  prefix->binding = NULL;
3048  else
3049  prefix->binding = b;
3050  b->nextTagBinding = *bindingsPtr;
3051  *bindingsPtr = b;
3052  /* if attId == NULL then we are not starting a namespace scope */
3053  if (attId && startNamespaceDeclHandler)
3055  prefix->binding ? uri : 0);
3056  return XML_ERROR_NONE;
3057 }
3058 
3059 /* The idea here is to avoid using stack for each CDATA section when
3060  the whole file is parsed with one call.
3061 */
3062 static enum XML_Error PTRCALL
3064  const char *start,
3065  const char *end,
3066  const char **endPtr)
3067 {
3069  endPtr, (XML_Bool)!ps_finalBuffer);
3070  if (result != XML_ERROR_NONE)
3071  return result;
3072  if (start) {
3073  if (parentParser) { /* we are parsing an external entity */
3075  return externalEntityContentProcessor(parser, start, end, endPtr);
3076  }
3077  else {
3079  return contentProcessor(parser, start, end, endPtr);
3080  }
3081  }
3082  return result;
3083 }
3084 
3085 /* startPtr gets set to non-null if the section is closed, and to null if
3086  the section is not yet closed.
3087 */
3088 static enum XML_Error
3090  const ENCODING *enc,
3091  const char **startPtr,
3092  const char *end,
3093  const char **nextPtr,
3094  XML_Bool haveMore)
3095 {
3096  const char *s = *startPtr;
3097  const char **eventPP;
3098  const char **eventEndPP;
3099  if (enc == encoding) {
3100  eventPP = &eventPtr;
3101  *eventPP = s;
3102  eventEndPP = &eventEndPtr;
3103  }
3104  else {
3105  eventPP = &(openInternalEntities->internalEventPtr);
3106  eventEndPP = &(openInternalEntities->internalEventEndPtr);
3107  }
3108  *eventPP = s;
3109  *startPtr = NULL;
3110 
3111  for (;;) {
3112  const char *next;
3113  int tok = XmlCdataSectionTok(enc, s, end, &next);
3114  *eventEndPP = next;
3115  switch (tok) {
3119 #if 0
3120  /* see comment under XML_TOK_CDATA_SECT_OPEN */
3121  else if (characterDataHandler)
3123 #endif
3124  else if (defaultHandler)
3126  *startPtr = next;
3127  *nextPtr = next;
3128  if (ps_parsing == XML_FINISHED)
3129  return XML_ERROR_ABORTED;
3130  else
3131  return XML_ERROR_NONE;
3132  case XML_TOK_DATA_NEWLINE:
3133  if (characterDataHandler) {
3134  XML_Char c = 0xA;
3136  }
3137  else if (defaultHandler)
3139  break;
3140  case XML_TOK_DATA_CHARS:
3141  {
3143  if (charDataHandler) {
3144  if (MUST_CONVERT(enc, s)) {
3145  for (;;) {
3146  ICHAR *dataPtr = (ICHAR *)dataBuf;
3147  XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
3148  *eventEndPP = next;
3149  charDataHandler(handlerArg, dataBuf,
3150  (int)(dataPtr - (ICHAR *)dataBuf));
3151  if (s == next)
3152  break;
3153  *eventPP = s;
3154  }
3155  }
3156  else
3157  charDataHandler(handlerArg,
3158  (XML_Char *)s,
3159  (int)((XML_Char *)next - (XML_Char *)s));
3160  }
3161  else if (defaultHandler)
3163  }
3164  break;
3165  case XML_TOK_INVALID:
3166  *eventPP = next;
3167  return XML_ERROR_INVALID_TOKEN;
3168  case XML_TOK_PARTIAL_CHAR:
3169  if (haveMore) {
3170  *nextPtr = s;
3171  return XML_ERROR_NONE;
3172  }
3173  return XML_ERROR_PARTIAL_CHAR;
3174  case XML_TOK_PARTIAL:
3175  case XML_TOK_NONE:
3176  if (haveMore) {
3177  *nextPtr = s;
3178  return XML_ERROR_NONE;
3179  }
3181  default:
3182  *eventPP = next;
3184  }
3185 
3186  *eventPP = s = next;
3187  switch (ps_parsing) {
3188  case XML_SUSPENDED:
3189  *nextPtr = next;
3190  return XML_ERROR_NONE;
3191  case XML_FINISHED:
3192  return XML_ERROR_ABORTED;
3193  default: ;
3194  }
3195  }
3196  /* not reached */
3197 }
3198 
3199 #ifdef XML_DTD
3200 
3201 /* The idea here is to avoid using stack for each IGNORE section when
3202  the whole file is parsed with one call.
3203 */
3204 static enum XML_Error PTRCALL
3205 ignoreSectionProcessor(XML_Parser parser,
3206  const char *start,
3207  const char *end,
3208  const char **endPtr)
3209 {
3210  enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
3211  endPtr, (XML_Bool)!ps_finalBuffer);
3212  if (result != XML_ERROR_NONE)
3213  return result;
3214  if (start) {
3216  return prologProcessor(parser, start, end, endPtr);
3217  }
3218  return result;
3219 }
3220 
3221 /* startPtr gets set to non-null is the section is closed, and to null
3222  if the section is not yet closed.
3223 */
3224 static enum XML_Error
3225 doIgnoreSection(XML_Parser parser,
3226  const ENCODING *enc,
3227  const char **startPtr,
3228  const char *end,
3229  const char **nextPtr,
3230  XML_Bool haveMore)
3231 {
3232  const char *next;
3233  int tok;
3234  const char *s = *startPtr;
3235  const char **eventPP;
3236  const char **eventEndPP;
3237  if (enc == encoding) {
3238  eventPP = &eventPtr;
3239  *eventPP = s;
3240  eventEndPP = &eventEndPtr;
3241  }
3242  else {
3243  eventPP = &(openInternalEntities->internalEventPtr);
3244  eventEndPP = &(openInternalEntities->internalEventEndPtr);
3245  }
3246  *eventPP = s;
3247  *startPtr = NULL;
3248  tok = XmlIgnoreSectionTok(enc, s, end, &next);
3249  *eventEndPP = next;
3250  switch (tok) {
3251  case XML_TOK_IGNORE_SECT:
3252  if (defaultHandler)
3254  *startPtr = next;
3255  *nextPtr = next;
3256  if (ps_parsing == XML_FINISHED)
3257  return XML_ERROR_ABORTED;
3258  else
3259  return XML_ERROR_NONE;
3260  case XML_TOK_INVALID:
3261  *eventPP = next;
3262  return XML_ERROR_INVALID_TOKEN;
3263  case XML_TOK_PARTIAL_CHAR:
3264  if (haveMore) {
3265  *nextPtr = s;
3266  return XML_ERROR_NONE;
3267  }
3268  return XML_ERROR_PARTIAL_CHAR;
3269  case XML_TOK_PARTIAL:
3270  case XML_TOK_NONE:
3271  if (haveMore) {
3272  *nextPtr = s;
3273  return XML_ERROR_NONE;
3274  }
3275  return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3276  default:
3277  *eventPP = next;
3279  }
3280  /* not reached */
3281 }
3282 
3283 #endif /* XML_DTD */
3284 
3285 static enum XML_Error
3287 {
3288  const char *s;
3289 #ifdef XML_UNICODE
3290  char encodingBuf[128];
3291  if (!protocolEncodingName)
3292  s = NULL;
3293  else {
3294  int i;
3295  for (i = 0; protocolEncodingName[i]; i++) {
3296  if (i == sizeof(encodingBuf) - 1
3297  || (protocolEncodingName[i] & ~0x7f) != 0) {
3298  encodingBuf[0] = '\0';
3299  break;
3300  }
3301  encodingBuf[i] = (char)protocolEncodingName[i];
3302  }
3303  encodingBuf[i] = '\0';
3304  s = encodingBuf;
3305  }
3306 #else
3308 #endif
3310  return XML_ERROR_NONE;
3312 }
3313 
3314 static enum XML_Error
3315 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
3316  const char *s, const char *next)
3317 {
3318  const char *encodingName = NULL;
3319  const XML_Char *storedEncName = NULL;
3320  const ENCODING *newEncoding = NULL;
3321  const char *version = NULL;
3322  const char *versionend;
3323  const XML_Char *storedversion = NULL;
3324  int standalone = -1;
3325  if (!(ns
3327  : XmlParseXmlDecl)(isGeneralTextEntity,
3328  encoding,
3329  s,
3330  next,
3331  &eventPtr,
3332  &version,
3333  &versionend,
3334  &encodingName,
3335  &newEncoding,
3336  &standalone)) {
3337  if (isGeneralTextEntity)
3338  return XML_ERROR_TEXT_DECL;
3339  else
3340  return XML_ERROR_XML_DECL;
3341  }
3342  if (!isGeneralTextEntity && standalone == 1) {
3343  _dtd->standalone = XML_TRUE;
3344 #ifdef XML_DTD
3345  if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3346  paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3347 #endif /* XML_DTD */
3348  }
3349  if (xmlDeclHandler) {
3350  if (encodingName != NULL) {
3351  storedEncName = poolStoreString(&temp2Pool,
3352  encoding,
3353  encodingName,
3354  encodingName
3355  + XmlNameLength(encoding, encodingName));
3356  if (!storedEncName)
3357  return XML_ERROR_NO_MEMORY;
3359  }
3360  if (version) {
3361  storedversion = poolStoreString(&temp2Pool,
3362  encoding,
3363  version,
3364  versionend - encoding->minBytesPerChar);
3365  if (!storedversion)
3366  return XML_ERROR_NO_MEMORY;
3367  }
3368  xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
3369  }
3370  else if (defaultHandler)
3372  if (protocolEncodingName == NULL) {
3373  if (newEncoding) {
3374  if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
3375  eventPtr = encodingName;
3377  }
3378  encoding = newEncoding;
3379  }
3380  else if (encodingName) {
3381  enum XML_Error result;
3382  if (!storedEncName) {
3383  storedEncName = poolStoreString(
3384  &temp2Pool, encoding, encodingName,
3385  encodingName + XmlNameLength(encoding, encodingName));
3386  if (!storedEncName)
3387  return XML_ERROR_NO_MEMORY;
3388  }
3389  result = handleUnknownEncoding(parser, storedEncName);
3390  poolClear(&temp2Pool);
3392  eventPtr = encodingName;
3393  return result;
3394  }
3395  }
3396 
3397  if (storedEncName || storedversion)
3398  poolClear(&temp2Pool);
3399 
3400  return XML_ERROR_NONE;
3401 }
3402 
3403 static enum XML_Error
3404 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
3405 {
3406  if (unknownEncodingHandler) {
3408  int i;
3409  for (i = 0; i < 256; i++)
3410  info.map[i] = -1;
3411  info.convert = NULL;
3412  info.data = NULL;
3413  info.release = NULL;
3415  &info)) {
3416  ENCODING *enc;
3418  if (!unknownEncodingMem) {
3419  if (info.release)
3420  info.release(info.data);
3421  return XML_ERROR_NO_MEMORY;
3422  }
3423  enc = (ns
3426  info.map,
3427  info.convert,
3428  info.data);
3429  if (enc) {
3430  unknownEncodingData = info.data;
3431  unknownEncodingRelease = info.release;
3432  encoding = enc;
3433  return XML_ERROR_NONE;
3434  }
3435  }
3436  if (info.release != NULL)
3437  info.release(info.data);
3438  }
3440 }
3441 
3442 static enum XML_Error PTRCALL
3444  const char *s,
3445  const char *end,
3446  const char **nextPtr)
3447 {
3449  if (result != XML_ERROR_NONE)
3450  return result;
3452  return prologProcessor(parser, s, end, nextPtr);
3453 }
3454 
3455 #ifdef XML_DTD
3456 
3457 static enum XML_Error PTRCALL
3458 externalParEntInitProcessor(XML_Parser parser,
3459  const char *s,
3460  const char *end,
3461  const char **nextPtr)
3462 {
3464  if (result != XML_ERROR_NONE)
3465  return result;
3466 
3467  /* we know now that XML_Parse(Buffer) has been called,
3468  so we consider the external parameter entity read */
3469  _dtd->paramEntityRead = XML_TRUE;
3470 
3471  if (prologState.inEntityValue) {
3472  processor = entityValueInitProcessor;
3473  return entityValueInitProcessor(parser, s, end, nextPtr);
3474  }
3475  else {
3476  processor = externalParEntProcessor;
3477  return externalParEntProcessor(parser, s, end, nextPtr);
3478  }
3479 }
3480 
3481 static enum XML_Error PTRCALL
3482 entityValueInitProcessor(XML_Parser parser,
3483  const char *s,
3484  const char *end,
3485  const char **nextPtr)
3486 {
3487  int tok;
3488  const char *start = s;
3489  const char *next = start;
3490  eventPtr = start;
3491 
3492  for (;;) {
3494  eventEndPtr = next;
3495  if (tok <= 0) {
3496  if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3497  *nextPtr = s;
3498  return XML_ERROR_NONE;
3499  }
3500  switch (tok) {
3501  case XML_TOK_INVALID:
3502  return XML_ERROR_INVALID_TOKEN;
3503  case XML_TOK_PARTIAL:
3504  return XML_ERROR_UNCLOSED_TOKEN;
3505  case XML_TOK_PARTIAL_CHAR:
3506  return XML_ERROR_PARTIAL_CHAR;
3507  case XML_TOK_NONE: /* start == end */
3508  default:
3509  break;
3510  }
3511  /* found end of entity value - can store it now */
3512  return storeEntityValue(parser, encoding, s, end);
3513  }
3514  else if (tok == XML_TOK_XML_DECL) {
3515  enum XML_Error result;
3517  if (result != XML_ERROR_NONE)
3518  return result;
3519  switch (ps_parsing) {
3520  case XML_SUSPENDED:
3521  *nextPtr = next;
3522  return XML_ERROR_NONE;
3523  case XML_FINISHED:
3524  return XML_ERROR_ABORTED;
3525  default:
3526  *nextPtr = next;
3527  }
3528  /* stop scanning for text declaration - we found one */
3529  processor = entityValueProcessor;
3530  return entityValueProcessor(parser, next, end, nextPtr);
3531  }
3532  /* If we are at the end of the buffer, this would cause XmlPrologTok to
3533  return XML_TOK_NONE on the next call, which would then cause the
3534  function to exit with *nextPtr set to s - that is what we want for other
3535  tokens, but not for the BOM - we would rather like to skip it;
3536  then, when this routine is entered the next time, XmlPrologTok will
3537  return XML_TOK_INVALID, since the BOM is still in the buffer
3538  */
3539  else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
3540  *nextPtr = next;
3541  return XML_ERROR_NONE;
3542  }
3543  start = next;
3544  eventPtr = start;
3545  }
3546 }
3547 
3548 static enum XML_Error PTRCALL
3549 externalParEntProcessor(XML_Parser parser,
3550  const char *s,
3551  const char *end,
3552  const char **nextPtr)
3553 {
3554  const char *next = s;
3555  int tok;
3556 
3557  tok = XmlPrologTok(encoding, s, end, &next);
3558  if (tok <= 0) {
3559  if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3560  *nextPtr = s;
3561  return XML_ERROR_NONE;
3562  }
3563  switch (tok) {
3564  case XML_TOK_INVALID:
3565  return XML_ERROR_INVALID_TOKEN;
3566  case XML_TOK_PARTIAL:
3567  return XML_ERROR_UNCLOSED_TOKEN;
3568  case XML_TOK_PARTIAL_CHAR:
3569  return XML_ERROR_PARTIAL_CHAR;
3570  case XML_TOK_NONE: /* start == end */
3571  default:
3572  break;
3573  }
3574  }
3575  /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
3576  However, when parsing an external subset, doProlog will not accept a BOM
3577  as valid, and report a syntax error, so we have to skip the BOM
3578  */
3579  else if (tok == XML_TOK_BOM) {
3580  s = next;
3581  tok = XmlPrologTok(encoding, s, end, &next);
3582  }
3583 
3585  return doProlog(parser, encoding, s, end, tok, next,
3586  nextPtr, (XML_Bool)!ps_finalBuffer);
3587 }
3588 
3589 static enum XML_Error PTRCALL
3590 entityValueProcessor(XML_Parser parser,
3591  const char *s,
3592  const char *end,
3593  const char **nextPtr)
3594 {
3595  const char *start = s;
3596  const char *next = s;
3597  const ENCODING *enc = encoding;
3598  int tok;
3599 
3600  for (;;) {
3601  tok = XmlPrologTok(enc, start, end, &next);
3602  if (tok <= 0) {
3603  if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3604  *nextPtr = s;
3605  return XML_ERROR_NONE;
3606  }
3607  switch (tok) {
3608  case XML_TOK_INVALID:
3609  return XML_ERROR_INVALID_TOKEN;
3610  case XML_TOK_PARTIAL:
3611  return XML_ERROR_UNCLOSED_TOKEN;
3612  case XML_TOK_PARTIAL_CHAR:
3613  return XML_ERROR_PARTIAL_CHAR;
3614  case XML_TOK_NONE: /* start == end */
3615  default:
3616  break;
3617  }
3618  /* found end of entity value - can store it now */
3619  return storeEntityValue(parser, enc, s, end);
3620  }
3621  start = next;
3622  }
3623 }
3624 
3625 #endif /* XML_DTD */
3626 
3627 static enum XML_Error PTRCALL
3629  const char *s,
3630  const char *end,
3631  const char **nextPtr)
3632 {
3633  const char *next = s;
3634  int tok = XmlPrologTok(encoding, s, end, &next);
3635  return doProlog(parser, encoding, s, end, tok, next,
3636  nextPtr, (XML_Bool)!ps_finalBuffer);
3637 }
3638 
3639 static enum XML_Error
3641  const ENCODING *enc,
3642  const char *s,
3643  const char *end,
3644  int tok,
3645  const char *next,
3646  const char **nextPtr,
3647  XML_Bool haveMore)
3648 {
3649 #ifdef XML_DTD
3650  static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
3651 #endif /* XML_DTD */
3652  static const XML_Char atypeCDATA[] =
3653  { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
3654  static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
3655  static const XML_Char atypeIDREF[] =
3656  { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
3657  static const XML_Char atypeIDREFS[] =
3658  { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
3659  static const XML_Char atypeENTITY[] =
3660  { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
3661  static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
3663  static const XML_Char atypeNMTOKEN[] = {
3665  static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
3666  ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
3667  static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
3669  static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
3670  static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
3671 
3672  /* save one level of indirection */
3673  DTD * const dtd = _dtd;
3674 
3675  const char **eventPP;
3676  const char **eventEndPP;
3677  enum XML_Content_Quant quant;
3678 
3679  if (enc == encoding) {
3680  eventPP = &eventPtr;
3681  eventEndPP = &eventEndPtr;
3682  }
3683  else {
3684  eventPP = &(openInternalEntities->internalEventPtr);
3685  eventEndPP = &(openInternalEntities->internalEventEndPtr);
3686  }
3687 
3688  for (;;) {
3689  int role;
3690  XML_Bool handleDefault = XML_TRUE;
3691  *eventPP = s;
3692  *eventEndPP = next;
3693  if (tok <= 0) {
3694  if (haveMore && tok != XML_TOK_INVALID) {
3695  *nextPtr = s;
3696  return XML_ERROR_NONE;
3697  }
3698  switch (tok) {
3699  case XML_TOK_INVALID:
3700  *eventPP = next;
3701  return XML_ERROR_INVALID_TOKEN;
3702  case XML_TOK_PARTIAL:
3703  return XML_ERROR_UNCLOSED_TOKEN;
3704  case XML_TOK_PARTIAL_CHAR:
3705  return XML_ERROR_PARTIAL_CHAR;
3706  case XML_TOK_NONE:
3707 #ifdef XML_DTD
3708  /* for internal PE NOT referenced between declarations */
3709  if (enc != encoding && !openInternalEntities->betweenDecl) {
3710  *nextPtr = s;
3711  return XML_ERROR_NONE;
3712  }
3713  /* WFC: PE Between Declarations - must check that PE contains
3714  complete markup, not only for external PEs, but also for
3715  internal PEs if the reference occurs between declarations.
3716  */
3717  if (isParamEntity || enc != encoding) {
3719  == XML_ROLE_ERROR)
3720  return XML_ERROR_INCOMPLETE_PE;
3721  *nextPtr = s;
3722  return XML_ERROR_NONE;
3723  }
3724 #endif /* XML_DTD */
3725  return XML_ERROR_NO_ELEMENTS;
3726  default:
3727  tok = -tok;
3728  next = end;
3729  break;
3730  }
3731  }
3732  role = XmlTokenRole(&prologState, tok, s, next, enc);
3733  switch (role) {
3734  case XML_ROLE_XML_DECL:
3735  {
3736  enum XML_Error result = processXmlDecl(parser, 0, s, next);
3737  if (result != XML_ERROR_NONE)
3738  return result;
3739  enc = encoding;
3740  handleDefault = XML_FALSE;
3741  }
3742  break;
3743  case XML_ROLE_DOCTYPE_NAME:
3746  if (!doctypeName)
3747  return XML_ERROR_NO_MEMORY;
3748  poolFinish(&tempPool);
3749  doctypePubid = NULL;
3750  handleDefault = XML_FALSE;
3751  }
3752  doctypeSysid = NULL; /* always initialize to NULL */
3753  break;
3757  doctypePubid, 1);
3758  doctypeName = NULL;
3759  poolClear(&tempPool);
3760  handleDefault = XML_FALSE;
3761  }
3762  break;
3763 #ifdef XML_DTD
3764  case XML_ROLE_TEXT_DECL:
3765  {
3766  enum XML_Error result = processXmlDecl(parser, 1, s, next);
3767  if (result != XML_ERROR_NONE)
3768  return result;
3769  enc = encoding;
3770  handleDefault = XML_FALSE;
3771  }
3772  break;
3773 #endif /* XML_DTD */
3775 #ifdef XML_DTD
3776  useForeignDTD = XML_FALSE;
3777  declEntity = (ENTITY *)lookup(&dtd->paramEntities,
3778  externalSubsetName,
3779  sizeof(ENTITY));
3780  if (!declEntity)
3781  return XML_ERROR_NO_MEMORY;
3782 #endif /* XML_DTD */
3785  if (!XmlIsPublicId(enc, s, next, eventPP))
3786  return XML_ERROR_PUBLICID;
3788  s + enc->minBytesPerChar,
3789  next - enc->minBytesPerChar);
3790  if (!doctypePubid)
3791  return XML_ERROR_NO_MEMORY;
3793  poolFinish(&tempPool);
3794  handleDefault = XML_FALSE;
3795  goto alreadyChecked;
3796  }
3797  /* fall through */
3799  if (!XmlIsPublicId(enc, s, next, eventPP))
3800  return XML_ERROR_PUBLICID;
3801  alreadyChecked:
3802  if (dtd->keepProcessing && declEntity) {
3803  XML_Char *tem = poolStoreString(&dtd->pool,
3804  enc,
3805  s + enc->minBytesPerChar,
3806  next - enc->minBytesPerChar);
3807  if (!tem)
3808  return XML_ERROR_NO_MEMORY;
3809  normalizePublicId(tem);
3810  declEntity->publicId = tem;
3811  poolFinish(&dtd->pool);
3812  if (entityDeclHandler)
3813  handleDefault = XML_FALSE;
3814  }
3815  break;
3817  if (doctypeName) {
3820  poolClear(&tempPool);
3821  handleDefault = XML_FALSE;
3822  }
3823  /* doctypeSysid will be non-NULL in the case of a previous
3824  XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
3825  was not set, indicating an external subset
3826  */
3827 #ifdef XML_DTD
3828  if (doctypeSysid || useForeignDTD) {
3829  XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
3831  if (paramEntityParsing && externalEntityRefHandler) {
3832  ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities,
3833  externalSubsetName,
3834  sizeof(ENTITY));
3835  if (!entity)
3836  return XML_ERROR_NO_MEMORY;
3837  if (useForeignDTD)
3838  entity->base = curBase;
3839  dtd->paramEntityRead = XML_FALSE;
3841  0,
3842  entity->base,
3843  entity->systemId,
3844  entity->publicId))
3846  if (dtd->paramEntityRead) {
3847  if (!dtd->standalone &&
3850  return XML_ERROR_NOT_STANDALONE;
3851  }
3852  /* if we didn't read the foreign DTD then this means that there
3853  is no external subset and we must reset dtd->hasParamEntityRefs
3854  */
3855  else if (!doctypeSysid)
3856  dtd->hasParamEntityRefs = hadParamEntityRefs;
3857  /* end of DTD - no need to update dtd->keepProcessing */
3858  }
3859  useForeignDTD = XML_FALSE;
3860  }
3861 #endif /* XML_DTD */
3862  if (endDoctypeDeclHandler) {
3864  handleDefault = XML_FALSE;
3865  }
3866  break;
3868 #ifdef XML_DTD
3869  /* if there is no DOCTYPE declaration then now is the
3870  last chance to read the foreign DTD
3871  */
3872  if (useForeignDTD) {
3873  XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
3875  if (paramEntityParsing && externalEntityRefHandler) {
3876  ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities,
3877  externalSubsetName,
3878  sizeof(ENTITY));
3879  if (!entity)
3880  return XML_ERROR_NO_MEMORY;
3881  entity->base = curBase;
3882  dtd->paramEntityRead = XML_FALSE;
3884  0,
3885  entity->base,
3886  entity->systemId,
3887  entity->publicId))
3889  if (dtd->paramEntityRead) {
3890  if (!dtd->standalone &&
3893  return XML_ERROR_NOT_STANDALONE;
3894  }
3895  /* if we didn't read the foreign DTD then this means that there
3896  is no external subset and we must reset dtd->hasParamEntityRefs
3897  */
3898  else
3899  dtd->hasParamEntityRefs = hadParamEntityRefs;
3900  /* end of DTD - no need to update dtd->keepProcessing */
3901  }
3902  }
3903 #endif /* XML_DTD */
3905  return contentProcessor(parser, s, end, nextPtr);
3908  if (!declElementType)
3909  return XML_ERROR_NO_MEMORY;
3910  goto checkAttListDeclHandler;
3913  if (!declAttributeId)
3914  return XML_ERROR_NO_MEMORY;
3918  goto checkAttListDeclHandler;
3921  declAttributeType = atypeCDATA;
3922  goto checkAttListDeclHandler;
3925  declAttributeType = atypeID;
3926  goto checkAttListDeclHandler;
3928  declAttributeType = atypeIDREF;
3929  goto checkAttListDeclHandler;
3931  declAttributeType = atypeIDREFS;
3932  goto checkAttListDeclHandler;
3934  declAttributeType = atypeENTITY;
3935  goto checkAttListDeclHandler;
3937  declAttributeType = atypeENTITIES;
3938  goto checkAttListDeclHandler;
3940  declAttributeType = atypeNMTOKEN;
3941  goto checkAttListDeclHandler;
3943  declAttributeType = atypeNMTOKENS;
3944  checkAttListDeclHandler:
3945  if (dtd->keepProcessing && attlistDeclHandler)
3946  handleDefault = XML_FALSE;
3947  break;
3950  if (dtd->keepProcessing && attlistDeclHandler) {
3951  const XML_Char *prefix;
3952  if (declAttributeType) {
3953  prefix = enumValueSep;
3954  }
3955  else {
3957  ? notationPrefix
3958  : enumValueStart);
3959  }
3961  return XML_ERROR_NO_MEMORY;
3962  if (!poolAppend(&tempPool, enc, s, next))
3963  return XML_ERROR_NO_MEMORY;
3964  declAttributeType = tempPool.start;
3965  handleDefault = XML_FALSE;
3966  }
3967  break;
3970  if (dtd->keepProcessing) {
3973  0, parser))
3974  return XML_ERROR_NO_MEMORY;
3978  && declAttributeType[1] == XML_T(ASCII_O))) {
3979  /* Enumerated or Notation type */
3981  || !poolAppendChar(&tempPool, XML_T('\0')))
3982  return XML_ERROR_NO_MEMORY;
3983  declAttributeType = tempPool.start;
3984  poolFinish(&tempPool);
3985  }
3986  *eventEndPP = s;
3990  poolClear(&tempPool);
3991  handleDefault = XML_FALSE;
3992  }
3993  }
3994  break;
3997  if (dtd->keepProcessing) {
3998  const XML_Char *attVal;
3999  enum XML_Error result =
4001  s + enc->minBytesPerChar,
4002  next - enc->minBytesPerChar,
4003  &dtd->pool);
4004  if (result)
4005  return result;
4006  attVal = poolStart(&dtd->pool);
4007  poolFinish(&dtd->pool);
4008  /* ID attributes aren't allowed to have a default */
4011  return XML_ERROR_NO_MEMORY;
4015  && declAttributeType[1] == XML_T(ASCII_O))) {
4016  /* Enumerated or Notation type */
4018  || !poolAppendChar(&tempPool, XML_T('\0')))
4019  return XML_ERROR_NO_MEMORY;
4020  declAttributeType = tempPool.start;
4021  poolFinish(&tempPool);
4022  }
4023  *eventEndPP = s;
4026  attVal,
4028  poolClear(&tempPool);
4029  handleDefault = XML_FALSE;
4030  }
4031  }
4032  break;
4033  case XML_ROLE_ENTITY_VALUE:
4034  if (dtd->keepProcessing) {
4036  s + enc->minBytesPerChar,
4037  next - enc->minBytesPerChar);
4038  if (declEntity) {
4039  declEntity->textPtr = poolStart(&dtd->entityValuePool);
4040  declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
4041  poolFinish(&dtd->entityValuePool);
4042  if (entityDeclHandler) {
4043  *eventEndPP = s;
4045  declEntity->name,
4046  declEntity->is_param,
4047  declEntity->textPtr,
4048  declEntity->textLen,
4049  curBase, 0, 0, 0);
4050  handleDefault = XML_FALSE;
4051  }
4052  }
4053  else
4055  if (result != XML_ERROR_NONE)
4056  return result;
4057  }
4058  break;
4060 #ifdef XML_DTD
4061  useForeignDTD = XML_FALSE;
4062 #endif /* XML_DTD */
4066  s + enc->minBytesPerChar,
4067  next - enc->minBytesPerChar);
4068  if (doctypeSysid == NULL)
4069  return XML_ERROR_NO_MEMORY;
4070  poolFinish(&tempPool);
4071  handleDefault = XML_FALSE;
4072  }
4073 #ifdef XML_DTD
4074  else
4075  /* use externalSubsetName to make doctypeSysid non-NULL
4076  for the case where no startDoctypeDeclHandler is set */
4077  doctypeSysid = externalSubsetName;
4078 #endif /* XML_DTD */
4079  if (!dtd->standalone
4080 #ifdef XML_DTD
4081  && !paramEntityParsing
4082 #endif /* XML_DTD */
4085  return XML_ERROR_NOT_STANDALONE;
4086 #ifndef XML_DTD
4087  break;
4088 #else /* XML_DTD */
4089  if (!declEntity) {
4090  declEntity = (ENTITY *)lookup(&dtd->paramEntities,
4091  externalSubsetName,
4092  sizeof(ENTITY));
4093  if (!declEntity)
4094  return XML_ERROR_NO_MEMORY;
4095  declEntity->publicId = NULL;
4096  }
4097  /* fall through */
4098 #endif /* XML_DTD */
4100  if (dtd->keepProcessing && declEntity) {
4101  declEntity->systemId = poolStoreString(&dtd->pool, enc,
4102  s + enc->minBytesPerChar,
4103  next - enc->minBytesPerChar);
4104  if (!declEntity->systemId)
4105  return XML_ERROR_NO_MEMORY;
4106  declEntity->base = curBase;
4107  poolFinish(&dtd->pool);
4108  if (entityDeclHandler)
4109  handleDefault = XML_FALSE;
4110  }
4111  break;
4113  if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4114  *eventEndPP = s;
4116  declEntity->name,
4117  declEntity->is_param,
4118  0,0,
4119  declEntity->base,
4120  declEntity->systemId,
4121  declEntity->publicId,
4122  0);
4123  handleDefault = XML_FALSE;
4124  }
4125  break;
4127  if (dtd->keepProcessing && declEntity) {
4128  declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4129  if (!declEntity->notation)
4130  return XML_ERROR_NO_MEMORY;
4131  poolFinish(&dtd->pool);
4133  *eventEndPP = s;
4135  declEntity->name,
4136  declEntity->base,
4137  declEntity->systemId,
4138  declEntity->publicId,
4139  declEntity->notation);
4140  handleDefault = XML_FALSE;
4141  }
4142  else if (entityDeclHandler) {
4143  *eventEndPP = s;
4145  declEntity->name,
4146  0,0,0,
4147  declEntity->base,
4148  declEntity->systemId,
4149  declEntity->publicId,
4150  declEntity->notation);
4151  handleDefault = XML_FALSE;
4152  }
4153  }
4154  break;
4156  {
4157  if (XmlPredefinedEntityName(enc, s, next)) {
4158  declEntity = NULL;
4159  break;
4160  }
4161  if (dtd->keepProcessing) {
4162  const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4163  if (!name)
4164  return XML_ERROR_NO_MEMORY;
4166  sizeof(ENTITY));
4167  if (!declEntity)
4168  return XML_ERROR_NO_MEMORY;
4169  if (declEntity->name != name) {
4170  poolDiscard(&dtd->pool);
4171  declEntity = NULL;
4172  }
4173  else {
4174  poolFinish(&dtd->pool);
4175  declEntity->publicId = NULL;
4176  declEntity->is_param = XML_FALSE;
4177  /* if we have a parent parser or are reading an internal parameter
4178  entity, then the entity declaration is not considered "internal"
4179  */
4180  declEntity->is_internal = !(parentParser || openInternalEntities);
4181  if (entityDeclHandler)
4182  handleDefault = XML_FALSE;
4183  }
4184  }
4185  else {
4186  poolDiscard(&dtd->pool);
4187  declEntity = NULL;
4188  }
4189  }
4190  break;
4192 #ifdef XML_DTD
4193  if (dtd->keepProcessing) {
4194  const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4195  if (!name)
4196  return XML_ERROR_NO_MEMORY;
4197  declEntity = (ENTITY *)lookup(&dtd->paramEntities,
4198  name, sizeof(ENTITY));
4199  if (!declEntity)
4200  return XML_ERROR_NO_MEMORY;
4201  if (declEntity->name != name) {
4202  poolDiscard(&dtd->pool);
4203  declEntity = NULL;
4204  }
4205  else {
4206  poolFinish(&dtd->pool);
4207  declEntity->publicId = NULL;
4208  declEntity->is_param = XML_TRUE;
4209  /* if we have a parent parser or are reading an internal parameter
4210  entity, then the entity declaration is not considered "internal"
4211  */
4212  declEntity->is_internal = !(parentParser || openInternalEntities);
4213  if (entityDeclHandler)
4214  handleDefault = XML_FALSE;
4215  }
4216  }
4217  else {
4218  poolDiscard(&dtd->pool);
4219  declEntity = NULL;
4220  }
4221 #else /* not XML_DTD */
4222  declEntity = NULL;
4223 #endif /* XML_DTD */
4224  break;
4228  if (notationDeclHandler) {
4230  if (!declNotationName)
4231  return XML_ERROR_NO_MEMORY;
4232  poolFinish(&tempPool);
4233  handleDefault = XML_FALSE;
4234  }
4235  break;
4237  if (!XmlIsPublicId(enc, s, next, eventPP))
4238  return XML_ERROR_PUBLICID;
4239  if (declNotationName) { /* means notationDeclHandler != NULL */
4241  enc,
4242  s + enc->minBytesPerChar,
4243  next - enc->minBytesPerChar);
4244  if (!tem)
4245  return XML_ERROR_NO_MEMORY;
4246  normalizePublicId(tem);
4247  declNotationPublicId = tem;
4248  poolFinish(&tempPool);
4249  handleDefault = XML_FALSE;
4250  }
4251  break;
4254  const XML_Char *systemId
4256  s + enc->minBytesPerChar,
4257  next - enc->minBytesPerChar);
4258  if (!systemId)
4259  return XML_ERROR_NO_MEMORY;
4260  *eventEndPP = s;
4263  curBase,
4264  systemId,
4266  handleDefault = XML_FALSE;
4267  }
4268  poolClear(&tempPool);
4269  break;
4272  *eventEndPP = s;
4275  curBase,
4276  0,
4278  handleDefault = XML_FALSE;
4279  }
4280  poolClear(&tempPool);
4281  break;
4282  case XML_ROLE_ERROR:
4283  switch (tok) {
4285  /* PE references in internal subset are
4286  not allowed within declarations. */
4288  case XML_TOK_XML_DECL:
4290  default:
4291  return XML_ERROR_SYNTAX;
4292  }
4293 #ifdef XML_DTD
4294  case XML_ROLE_IGNORE_SECT:
4295  {
4296  enum XML_Error result;
4297  if (defaultHandler)
4299  handleDefault = XML_FALSE;
4300  result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4301  if (result != XML_ERROR_NONE)
4302  return result;
4303  else if (!next) {
4304  processor = ignoreSectionProcessor;
4305  return result;
4306  }
4307  }
4308  break;
4309 #endif /* XML_DTD */
4310  case XML_ROLE_GROUP_OPEN:
4311  if (prologState.level >= groupSize) {
4312  if (groupSize) {
4313  char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
4314  if (temp == NULL)
4315  return XML_ERROR_NO_MEMORY;
4316  groupConnector = temp;
4317  if (dtd->scaffIndex) {
4318  int *temp = (int *)REALLOC(dtd->scaffIndex,
4319  groupSize * sizeof(int));
4320  if (temp == NULL)
4321  return XML_ERROR_NO_MEMORY;
4322  dtd->scaffIndex = temp;
4323  }
4324  }
4325  else {
4326  groupConnector = (char *)MALLOC(groupSize = 32);
4327  if (!groupConnector)
4328  return XML_ERROR_NO_MEMORY;
4329  }
4330  }
4331  groupConnector[prologState.level] = 0;
4332  if (dtd->in_eldecl) {
4333  int myindex = nextScaffoldPart(parser);
4334  if (myindex < 0)
4335  return XML_ERROR_NO_MEMORY;
4336  dtd->scaffIndex[dtd->scaffLevel] = myindex;
4337  dtd->scaffLevel++;
4338  dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4339  if (elementDeclHandler)
4340  handleDefault = XML_FALSE;
4341  }
4342  break;
4344  if (groupConnector[prologState.level] == ASCII_PIPE)
4345  return XML_ERROR_SYNTAX;
4347  if (dtd->in_eldecl && elementDeclHandler)
4348  handleDefault = XML_FALSE;
4349  break;
4350  case XML_ROLE_GROUP_CHOICE:
4351  if (groupConnector[prologState.level] == ASCII_COMMA)
4352  return XML_ERROR_SYNTAX;
4353  if (dtd->in_eldecl
4354  && !groupConnector[prologState.level]
4355  && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4356  != XML_CTYPE_MIXED)
4357  ) {
4358  dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4359  = XML_CTYPE_CHOICE;
4360  if (elementDeclHandler)
4361  handleDefault = XML_FALSE;
4362  }
4364  break;
4366 #ifdef XML_DTD
4367  case XML_ROLE_INNER_PARAM_ENTITY_REF:
4369  if (!paramEntityParsing)
4370  dtd->keepProcessing = dtd->