"Fossies" - the Fresh Open Source Software Archive

Member "devtodo-0.1.20/util/XML.cc" (28 Jun 2007, 5904 Bytes) of package /linux/privat/old/devtodo-0.1.20.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "XML.cc" see the Fossies "Dox" file reference documentation.

    1 #include "XML.h"
    2 
    3 bool XML::initialised = false;
    4 Lexer XML::xmlScan, XML::tagScan, XML::commentScan, XML::dataScan, XML::processScan;
    5 
    6 #ifdef CRASH_SIGNAL
    7 Signal2<string const &, map<string, string> const &> XML::onElementBegin;
    8 Signal1<string const &> XML::onElementEnd;
    9 Signal2<XML const &, string const &> XML::onBody;
   10 Signal2<XML const &, string const &> XML::onData;
   11 #endif
   12 
   13 XML::XML(Type type, XML *parent, Lexer::iterator &token) : _parent(parent), _type(type) {
   14     switch (type) {
   15         case Element : parseElement(token); break;
   16         case Body : parseBody(token); break;
   17         case Data : parseData(token); break;
   18     }
   19 }
   20 
   21 XML::XML() : _parent(0), _type(Element) {
   22     init();
   23 }
   24 
   25 XML::XML(char const *str) : _parent(0), _type(Element) {
   26     init();
   27     parse(str);
   28 }
   29 
   30 XML::~XML() {
   31     for (vector<XML*>::iterator i = _child.begin(); i != _child.end(); ++i)
   32         delete *i;
   33 }
   34 
   35 void XML::parse(char const *str) {
   36     try {
   37     Lexer::iterator i = xmlScan.begin(str);
   38 
   39         if (i.type() == XmlDecl) {
   40             ++i;
   41         }
   42         parseElement(i);
   43     } catch (Lexer::exception &e) {
   44         throw exception(e.what(), e.line());
   45     }
   46 }
   47 
   48 void XML::init() {
   49     // Only initialise scanners once
   50     if (!initialised) {
   51         // <?xml version="1.0" encoding="UTF-8" standalone="no"?>
   52         xmlScan.addPattern(XmlDecl, "<\\?xml.*?>[[:space:]]*");
   53         xmlScan.addPattern(XmlCommentBegin, "<!--");
   54         xmlScan.addPattern(XmlBegin, "<[a-zA-Z0-9_-]+"
   55             "([[:space:]]+[a-zA-Z_0-9-]+=(([/a-zA-Z_0-9,.]+)|(\"[^\"]*\")|('[^']*')))"
   56             "*[[:space:]]*(/?)>");
   57         xmlScan.addPattern(XmlEnd, "</[a-zA-Z0-9_-]+>");
   58         xmlScan.addPattern(XmlDataBegin, "<!DATA[[:space:]]*\\[\\[");
   59         xmlScan.addPattern(XmlContent, "([\n\r]|[^<])+");
   60 
   61 
   62         commentScan.addPattern(CommentEnd, "-->[[:space:]]*");
   63         commentScan.addPattern(CommentBody, "[\n\r]|.");
   64 
   65         tagScan.addPattern(ElementWS, "[[:space:]]+", true);
   66         tagScan.addPattern(ElementValue, "('(\\.|[^'])*')|(\"(\\.|[^\"])*\")");
   67         tagScan.addPattern(ElementKey, "([a-zA-Z_][a-zA-Z0-9-]*)");
   68         tagScan.addPattern(ElementAssignment, "=");                                          
   69         tagScan.addPattern(ElementTerminator, "/");
   70 
   71         dataScan.addPattern(DataEnd, "]]>");
   72         dataScan.addPattern(DataBody, "[\n\r]|.");
   73 
   74         processScan.addPattern(ProcessBegin, "<\\?xml");
   75         processScan.addPattern(ProcessBody, "\\?>|[^?][^>]");
   76         processScan.addPattern(ProcessEnd, "\\?>");
   77 
   78         initialised = true;
   79     }
   80 }
   81 
   82 // Skip comments
   83 void XML::skip(Lexer::iterator &token) {
   84     while (token.type() == XmlCommentBegin)
   85     {
   86     int skip = 0;
   87 
   88         try {
   89             for (Lexer::iterator i = commentScan.begin(token.source()); i != commentScan.end(); ++i) {
   90                 skip += i.size();
   91                 if (i.type() == CommentEnd) 
   92                     break;
   93             }
   94         } catch (Lexer::exception &e) {
   95             throw exception(e.what(),  token.line() + e.line() - 1);
   96         }
   97         token.skip(skip);
   98         ++token;
   99     }
  100 }
  101 
  102 // Get next token, skipping any comments
  103 void XML::next(Lexer::iterator &token) {
  104     ++token;
  105     skip(token);
  106 }
  107 
  108 void XML::parseElement(Lexer::iterator &token) {
  109     skip(token);
  110 
  111     if (token.type() != XmlBegin)
  112         throw exception("expected element, got '" + token.value() + "'", token.line());
  113 
  114 char str[token.size()];
  115     strncpy(str, token.value().c_str() + 1, token.size() - 2);
  116     str[token.size() - 2] = 0;
  117     
  118     try {
  119     Lexer::iterator i = tagScan.begin(str);
  120 
  121         if (i.type() != ElementKey)
  122             throw exception("invalid key", token.line());
  123         _data = i.value();
  124 
  125         // Extract attributes
  126         for (++i; i != tagScan.end(); ++i) {
  127 
  128             if (i.type() == ElementTerminator) {
  129                 next(token);
  130                 return;
  131             }
  132             if (i.type() != ElementKey)
  133                 throw exception("expected key for attribute, got '" + i.value() + "'", token.line());
  134         string k = i.value();
  135             ++i;
  136             if (i.type() != ElementAssignment)
  137                 throw exception("expected assignment operator after attribute key, got '" + i.value() + "'", token.line());
  138             ++i;
  139             if (i.type() != ElementValue)
  140                 throw exception("expected value for key '" + k + "', got '" + i.value() + "'", token.line());
  141             _attrib[k] = str::stripcslashes(i.value().substr(1, i.size() - 2));
  142         }
  143     } catch (Lexer::exception &e) {
  144         throw exception(e.what(), token.line() + e.line() - 1);
  145     }
  146 
  147 #ifdef CRASH_SIGNAL
  148     XML::onElementBegin(_data, _attrib);
  149 #endif
  150 
  151     next(token);
  152 
  153     // Scan children
  154     while (token != xmlScan.end())
  155         switch (token.type())
  156         {
  157             case XmlBegin :
  158                 _child.push_back(new XML(Element, this, token));
  159             break;
  160             case XmlDataBegin :
  161                 _child.push_back(new XML(Data, this, token));
  162             break;
  163             case XmlEnd :
  164                 if (token.value().substr(2, token.size() - 3) != _data)
  165                     throw exception("expected tag closure for '" + _data + "', got '" + token.value() + "'", token.line());
  166 #ifdef CRASH_SIGNAL
  167                 XML::onElementEnd(_data);
  168 #endif
  169                 next(token);
  170                 return;
  171             break;
  172             case XmlContent :
  173                 _child.push_back(new XML(Body, this, token));
  174             break;
  175             default:
  176                 throw exception("unexpected token '" + token.value() + "'", token.line());
  177             break;
  178         }
  179 }
  180 
  181 void XML::parseBody(Lexer::iterator &token)
  182 {
  183     skip(token);
  184 
  185     if (token.type() != XmlContent)
  186         throw exception("expected body, got '" + token.value() + "'", token.line());
  187 
  188     // text is buffered into the buffer, then appended to _data as it fills
  189 char const *s = token.value().c_str();
  190 unsigned size = token.value().size();
  191 
  192     for (unsigned i = 0; i < size; ++i) {
  193         if (s[i] == '&') {
  194             if (!strncmp(s + i, "&lt;", 4)) {
  195                 _data += '<';
  196                 i += 3;
  197             } else
  198             if (!strncmp(s + i, "&gt;", 4)) {
  199                 _data += '>';
  200                 i += 3;
  201             } else
  202             if (!strncmp(s + i, "&amp;", 5)) {
  203                 _data += '&';
  204                 i += 4;
  205             }
  206         } else
  207             _data += s[i];
  208     }
  209 #ifdef CRASH_SIGNAL
  210     XML::onBody(*_parent, _data);
  211 #endif
  212 
  213     next(token);
  214 }
  215 
  216 void XML::parseData(Lexer::iterator &token)
  217 {
  218     skip(token);
  219 
  220 int skip = 0;
  221 
  222     for (Lexer::iterator i = dataScan.begin(token.source()); i != dataScan.end(); ++i)
  223     {
  224         skip += i.size();
  225         if (i.type() == DataEnd) break;
  226         _data += i.value();
  227     }
  228 
  229     token.skip(skip);
  230 
  231 #ifdef CRASH_SIGNAL
  232     XML::onData(*_parent, _data);
  233 #endif
  234 }