ucommon  7.0.0
About: GNU uCommon C++ is a portable and optimized class framework for writing C++ applications that need to use threads and support concurrent synchronization, and that use sockets, XML parsing, object serialization, thread-optimized string and data structure classes, etc..
  Fossies Dox: ucommon-7.0.0.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

Loading...
Searching...
No Matches
xml.cpp
Go to the documentation of this file.
1// Copyright (C) 2006-2014 David Sugar, Tycho Softworks.
2// Copyright (C) 2015 Cherokees of Idaho.
3//
4// This file is part of GNU uCommon C++.
5//
6// GNU uCommon C++ is free software: you can redistribute it and/or modify
7// it under the terms of the GNU Lesser General Public License as published
8// by the Free Software Foundation, either version 3 of the License, or
9// (at your option) any later version.
10//
11// GNU uCommon C++ is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU Lesser General Public License for more details.
15//
16// You should have received a copy of the GNU Lesser General Public License
17// along with GNU uCommon C++. If not, see <http://www.gnu.org/licenses/>.
18
19
20#include <ucommon-config.h>
21#include <commoncpp/config.h>
22
23#include <commoncpp/export.h>
24// local includes
25#include <commoncpp/xml.h>
26
27static bool isElement(char c)
28{
29 return isalnum(c) || c == ':' || c == '-' || c == '.' || c == '_';
30}
31
32namespace ost {
33using namespace ucommon;
34
36{
37 state = NONE;
38 bufpos = 0;
39 bufsize = size;
40 buffer = new char[size];
41 ecount = dcount = 0;
42}
43
45{
46 if(buffer) {
47 delete[] buffer;
48 buffer = NULL;
49 }
50}
51
53{
54 buffer[bufpos++] = c;
55 if(bufpos >= bufsize) {
56 if(ecount)
58 bufpos = 0;
59 }
60}
61
63{
64 if(bufpos && ecount)
66 bufpos = 0;
67}
68
69bool XMLParser::parse(FILE *fp)
70{
71 state = NONE;
72 bufpos = 0;
73 ecount = dcount = 0;
74
75 int ch;
76 unsigned char cp;
77
78 while((ch = fgetc(fp)) != EOF) {
79 switch(state) {
80 case AMP:
81 if((!bufpos && ch == '#') || isElement(ch)) {
82 buffer[bufpos++] = ch;
83 break;
84 }
85 if(ch != ';')
86 return false;
87 buffer[bufpos] = 0;
88 if(buffer[0] == '#')
89 cp = atoi(buffer + 1);
90 else if(eq(buffer, "amp"))
91 cp = '&';
92 else if(eq(buffer, "lt"))
93 cp = '<';
94 else if(eq(buffer, "gt"))
95 cp = '>';
96 else if(eq(buffer, "apos"))
97 cp = '`';
98 else if(eq(buffer, "quot"))
99 cp = '\"';
100 else
101 return false;
102 characters((caddr_t)&cp, 1);
103 bufpos = 0;
104 state = NONE;
105 break;
106 case TAG:
107 if(ch == '>') {
108 state = NONE;
109 if(!parseTag())
110 return false;
111 }
112 else if(ch == '[' && bufpos == 7 && !strncmp(buffer, "![CDATA", 7)) {
113 state = CDATA;
114 }
115 else if(ch == '-' && bufpos == 2 && !strncmp(buffer, "!-", 2)) {
116 state = COMMENT;
117 bufpos = 0;
118 }
119 else if(ch == '[' && !strncmp(buffer, "!DOCTYPE ", 9)) {
120 state = DTD;
121 bufpos = 0;
122 }
123 else
124 putBuffer(ch);
125 break;
126 case COMMENT:
127 if(ch == '>' && bufpos >= 2 && !strncmp(&buffer[bufpos - 2], "--", 2)) {
128 bufpos -= 2;
129 if(bufpos)
131 bufpos = 0;
132 state = NONE;
133 }
134 else {
135 buffer[bufpos++] = ch;
136 if(bufpos == bufsize) {
138 bufpos = 0;
139 }
140 }
141 break;
142 case CDATA:
143 putBuffer(ch);
144 if(bufpos > 2)
145 if(eq(&buffer[bufpos - 3], "]]>")) {
146 bufpos -= 3;
147 state = NONE;
148 clearBuffer();
149 }
150 break;
151 case DTD:
152 if(ch == '<')
153 ++dcount;
154 else if(ch == '>' && dcount)
155 --dcount;
156 else if(ch == '>')
157 state = NONE;
158 break;
159 case NONE:
160 if(ch == '<') {
161 clearBuffer();
162 state = TAG;
163 }
164 else if(ecount && ch == '&') {
165 clearBuffer();
166 state = AMP;
167 }
168 else if(ecount)
169 putBuffer(ch);
170 break;
171 case END:
172 return true;
173 }
174 if(state == END)
175 return true;
176 }
177 // eof before end of ducument...
178 return false;
179}
180
181
182bool XMLParser::parse(const char *buf)
183{
184 state = NONE;
185 bufpos = 0;
186 ecount = dcount = 0;
187
188 uint8_t ch;
189 unsigned char cp;
190
191 while((ch = (uint8_t)*(buf++)) != 0) {
192 switch(state) {
193 case AMP:
194 if((!bufpos && ch == '#') || isElement(ch)) {
195 buffer[bufpos++] = ch;
196 break;
197 }
198 if(ch != ';')
199 return false;
200 buffer[bufpos] = 0;
201 if(buffer[0] == '#')
202 cp = atoi(buffer + 1);
203 else if(eq(buffer, "amp"))
204 cp = '&';
205 else if(eq(buffer, "lt"))
206 cp = '<';
207 else if(eq(buffer, "gt"))
208 cp = '>';
209 else if(eq(buffer, "apos"))
210 cp = '`';
211 else if(eq(buffer, "quot"))
212 cp = '\"';
213 else
214 return false;
215 characters((caddr_t)&cp, 1);
216 bufpos = 0;
217 state = NONE;
218 break;
219 case TAG:
220 if(ch == '>') {
221 state = NONE;
222 if(!parseTag())
223 return false;
224 }
225 else if(ch == '[' && bufpos == 7 && !strncmp(buffer, "![CDATA", 7)) {
226 state = CDATA;
227 }
228 else if(ch == '-' && bufpos == 2 && !strncmp(buffer, "!-", 2)) {
229 state = COMMENT;
230 bufpos = 0;
231 }
232 else if(ch == '[' && !strncmp(buffer, "!DOCTYPE ", 9)) {
233 state = DTD;
234 bufpos = 0;
235 }
236 else
237 putBuffer(ch);
238 break;
239 case COMMENT:
240 if(ch == '>' && bufpos >= 2 && !strncmp(&buffer[bufpos - 2], "--", 2)) {
241 bufpos -= 2;
242 if(bufpos)
244 bufpos = 0;
245 state = NONE;
246 }
247 else {
248 buffer[bufpos++] = ch;
249 if(bufpos == bufsize) {
251 bufpos = 0;
252 }
253 }
254 break;
255 case CDATA:
256 putBuffer(ch);
257 if(bufpos > 2)
258 if(eq(&buffer[bufpos - 3], "]]>")) {
259 bufpos -= 3;
260 state = NONE;
261 clearBuffer();
262 }
263 break;
264 case DTD:
265 if(ch == '<')
266 ++dcount;
267 else if(ch == '>' && dcount)
268 --dcount;
269 else if(ch == '>')
270 state = NONE;
271 break;
272 case NONE:
273 if(ch == '<') {
274 clearBuffer();
275 state = TAG;
276 }
277 else if(ecount && ch == '&') {
278 clearBuffer();
279 state = AMP;
280 }
281 else if(ecount)
282 putBuffer(ch);
283 break;
284 case END:
285 return true;
286 }
287 if(state == END)
288 return true;
289 }
290 // eof before end of ducument...
291 return false;
292}
293
294bool XMLParser::partial(const char *data, size_t len)
295{
296 if(state == END)
297 state = NONE;
298
299 unsigned char cp;
300 while(len--) {
301 switch(state) {
302 case AMP:
303 if((!bufpos && *data == '#') || isElement(*data)) {
304 buffer[bufpos++] = *data;
305 break;
306 }
307 if(*data != ';')
308 return false;
309 buffer[bufpos] = 0;
310 if(buffer[0] == '#')
311 cp = atoi(buffer + 1);
312 else if(eq(buffer, "amp"))
313 cp = '&';
314 else if(eq(buffer, "lt"))
315 cp = '<';
316 else if(eq(buffer, "gt"))
317 cp = '>';
318 else if(eq(buffer, "apos"))
319 cp = '`';
320 else if(eq(buffer, "quot"))
321 cp = '\"';
322 else
323 return false;
324 characters((caddr_t)&cp, 1);
325 bufpos = 0;
326 state = NONE;
327 break;
328 case TAG:
329 if(*data == '>') {
330 state = NONE;
331 if(!parseTag())
332 return false;
333 }
334 else if(*data == '[' && bufpos == 7 && !strncmp(buffer, "![CDATA", 7)) {
335 state = CDATA;
336 }
337 else if(*data == '-' && bufpos == 2 && !strncmp(buffer, "!-", 2)) {
338 state = COMMENT;
339 bufpos = 0;
340 }
341 else if(*data == '[' && !strncmp(buffer, "!DOCTYPE ", 9)) {
342 state = DTD;
343 bufpos = 0;
344 }
345 else
346 putBuffer(*data);
347 break;
348 case COMMENT:
349 if(*data == '>' && bufpos >= 2 && !strncmp(&buffer[bufpos - 2], "--", 2)) {
350 bufpos -= 2;
351 if(bufpos)
353 bufpos = 0;
354 state = NONE;
355 }
356 else {
357 buffer[bufpos++] = *data;
358 if(bufpos == bufsize) {
360 bufpos = 0;
361 }
362 }
363 break;
364 case CDATA:
365 putBuffer(*data);
366 if(bufpos > 2)
367 if(eq(&buffer[bufpos - 3], "]]>")) {
368 bufpos -= 3;
369 state = NONE;
370 clearBuffer();
371 }
372 break;
373 case DTD:
374 if(*data == '<')
375 ++dcount;
376 else if(*data == '>' && dcount)
377 --dcount;
378 else if(*data == '>')
379 state = NONE;
380 break;
381 case NONE:
382 case END:
383 if(*data == '<') {
384 clearBuffer();
385 state = TAG;
386 }
387 else if(ecount && *data == '&') {
388 clearBuffer();
389 state = AMP;
390 }
391 else if(ecount)
392 putBuffer(*data);
393 }
394 ++data;
395 }
396 return true;
397}
398
400{
401 size_t len = bufpos;
402 const char *data = buffer;
403 bool end = false;
404 caddr_t attrib[128];
405 unsigned attr = 0;
406 char *ep;
407
408 if(*data == '/') {
409 while(--len) {
410 if(!isElement(*(++data)))
411 break;
412 }
413 if(len)
414 return false;
415
416 buffer[bufpos] = 0;
417 endElement((caddr_t)(buffer + 1));
418 bufpos = 0;
419 --ecount;
420 if(ecount < 0)
421 return false;
422 if(!ecount) {
423 state = END;
424 endDocument();
425 }
426 }
427 else if(*data == '!') {
428 bufpos = 0;
429 return true; // dtd
430 }
431 else if(*data == '?') {
432 if(!strnicmp(data, "?xml version=\"", 14)) {
433 // version info
434 }
435 bufpos = 0;
436 }
437 else if(!isElement(*data))
438 return false;
439 else {
440 end = false;
441 if(buffer[bufpos - 1] == '/') {
442 --bufpos;
443 end = true;
444 }
445 len = 0;
446 data = buffer;
447 while(len < bufpos) {
448 if(!isElement(*data))
449 break;
450 ++len;
451 ++data;
452 }
453 if(len == bufpos) {
454 if(!ecount)
456 ++ecount;
457 attrib[0] = attrib[1] = NULL;
458 buffer[bufpos] = 0;
459 startElement((caddr_t)buffer, attrib);
460 if(end) {
461ending:
462 --ecount;
464 if(!ecount) {
465 state = END;
466 endDocument();
467 }
468 }
469 bufpos = 0;
470 return true;
471 }
472 if(!ecount)
474 ++ecount;
475
476 // attributes, name is between data and len
477
478 for(;;) {
479 while(!isElement(buffer[len]) && len < bufpos) {
480 if(!isspace(buffer[len]))
481 return false;
482 buffer[len++] = 0;
483 }
484
485 if(len == bufpos)
486 break;
487
488 attrib[attr++] = (caddr_t)(buffer + len);
489 while(len < bufpos && isElement(buffer[len]))
490 ++len;
491
492 if(len == bufpos)
493 return false;
494
495 if(buffer[len] != '=')
496 return false;
497
498 buffer[len++] = 0;
499 if(len == bufpos) {
500 attrib[attr++] = (caddr_t)"";
501 break;
502 }
503
504 if(isspace(buffer[len])) {
505 attrib[attr++] = (caddr_t)"";
506 continue;
507 }
508 if(buffer[len] == '\'' || buffer[len] == '\"') {
509 ep = strchr(buffer + len + 1, buffer[len]);
510 if(!ep)
511 return false;
512 attrib[attr++] = (caddr_t)buffer + len + 1;
513 *(ep++) = 0;
514 len = ep - buffer;
515 continue;
516 }
517 if(!isElement(buffer[len]))
518 return false;
519 attrib[attr++] = (caddr_t)buffer;
520 while(isElement(buffer[len]) && len < bufpos)
521 ++len;
522 if(len == bufpos) {
523 buffer[len] = 0;
524 break;
525 }
526 }
527
528 attrib[attr++] = NULL;
529 attrib[attr++] = NULL;
530 startElement((caddr_t)buffer, attrib);
531 if(end)
532 goto ending;
533 bufpos = 0;
534 return true;
535 }
536 return true;
537}
538
539// all our lovely base virtuals stubbed out so if we are lazy and forget to
540// implement something we want to ignore anyway (say comments...) we don't
541// bring whatever it is crashing down one day when we choose to add a
542// comment into an xml stream...
543
545{
546}
547
549{
550}
551
552void XMLParser::comment(const caddr_t text, size_t len)
553{
554}
555
556void XMLParser::characters(const caddr_t text, size_t len)
557{
558}
559
560} // namespace ucommon
__LOCAL void putBuffer(char c)
Definition: xml.cpp:52
bool parse(const char *cp)
Parse a stream buffer and return parser document completion flag.
Definition: xml.cpp:182
__LOCAL void clearBuffer(void)
Definition: xml.cpp:62
virtual ~XMLParser()
Destroy xml parser.
Definition: xml.cpp:44
bool partial(const char *address, size_t size)
Parse a chunk of data and return parser completion flag.
Definition: xml.cpp:294
__LOCAL bool parseTag(void)
Definition: xml.cpp:399
bool end(void) const
End of document check.
Definition: xml.h:144
virtual void startElement(const caddr_t name, caddr_t *attr)=0
Notify start of an element in the document.
char * buffer
Definition: xml.h:50
virtual void endDocument(void)
Notify end of document event.
Definition: xml.cpp:548
virtual void characters(const caddr_t text, size_t size)
Virtual to receive character text extracted from the document.
Definition: xml.cpp:556
unsigned bufpos
Definition: xml.h:51
virtual void comment(const caddr_t text, size_t size)
Virtual to receive embedded comments in XML document being parsed.
Definition: xml.cpp:552
virtual void endElement(const caddr_t name)=0
Notify end of an element in the document.
@ COMMENT
Definition: xml.h:49
XMLParser(unsigned size=8192)
Create xml parser.
Definition: xml.cpp:35
int ecount
Definition: xml.h:48
int dcount
Definition: xml.h:48
unsigned bufsize
Definition: xml.h:51
virtual void startDocument(void)
Notify start of document event.
Definition: xml.cpp:544
enum ost::XMLParser::@8 state
Export interfaces for library interfaces.
int strnicmp(const char *s1, const char *s2, size_t size)
Definition: cpr.cpp:104
#define caddr_t
Definition: file.h:86
Definition: address.cpp:63
Common namespace for all ucommon objects.
Definition: access.cpp:23
bool eq(const struct sockaddr *s1, const struct sockaddr *s2)
Compare two socket addresses to see if equal.
Definition: socket.h:2100
#define ch(x, y, z)
Definition: sha2.cpp:120
#define EOF
Definition: stream.cpp:51
static bool isElement(char c)
Definition: xml.cpp:27