"Fossies" - the Fresh Open Source Software Archive

Member "z-push/include/z_RTF.php" (2 Aug 2013, 25973 Bytes) of package /linux/www/old/group-e_z-push_v3.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "z_RTF.php" see the Fossies "Dox" file reference documentation.

A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.


    1 <?php
    2 /*
    3     This class contains code from rtfclass.php that was written by Markus Fischer and placed by him under
    4     GPLv2 License.
    5 
    6     =======================================NOTES FROM ORIGINAL AUTHOR====================================
    7         Rich Text Format - Parsing Class
    8         ================================
    9 
   10         (c) 2000 Markus Fischer
   11         <mfischer@josefine.ben.tuwien.ac.at>
   12            http://josefine.ben.tuwien.ac.at/~mfischer/
   13 
   14         Latest versions of this class can always be found at
   15             http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/rtfclass.phps
   16         Testing suite is available at
   17             http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/
   18 
   19         License: GPLv2
   20 
   21         Specification:
   22             http://msdn.microsoft.com/library/default.asp?URL=/library/specs/rtfspec.htm
   23 
   24         General Notes:
   25         ==============
   26         Unknown or unspupported control symbols are silently gnored
   27 
   28         Group stacking is still not supported :(
   29             group stack logic implemented; however not really used yet
   30     =====================================================================================================
   31 
   32     It was modified by me (Andreas Brodowski) to allow compressed RTF being uncompressed by code I ported from
   33     Java to PHP and adapted according the needs of Z-Push.
   34     
   35     Currently it is being used to detect empty RTF Streams from Nokia Phones in MfE Clients
   36     
   37     It needs to be used by other backend writers that needs to have notes in calendar, appointment or tasks 
   38     objects to be written to their databases since devices send them usually in RTF Format... With Zarafa
   39     you can write them directly to DB and Zarafa is doing the conversion job. Other Groupware systems usually
   40     don't have this possibility...
   41      
   42 */
   43 
   44 require_once "utils.php";
   45 
   46 class rtf {
   47     var $LZRTF_HDR_DATA = "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
   48     var $LZRTF_HDR_LEN = 207;
   49     var $CRC32_TABLE = array(     0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,
   50                                   0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,
   51                                   0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,
   52                                   0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,
   53                                   0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,
   54                                   0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,
   55                                   0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
   56                                   0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,
   57                                   0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,
   58                                   0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,
   59                                   0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,
   60                                   0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,
   61                                   0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,
   62                                   0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
   63                                   0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,
   64                                   0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,
   65                                   0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,
   66                                   0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,
   67                                   0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,
   68                                   0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,
   69                                   0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
   70                                   0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,
   71                                   0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,
   72                                   0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,
   73                                   0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,
   74                                   0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,
   75                                   0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,
   76                                   0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
   77                                   0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,
   78                                   0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,
   79                                   0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,
   80                                   0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D,
   81                                );
   82     
   83     var $rtf;           // rtf core stream
   84     var $rtf_len;       // length in characters of the stream (get performace due avoiding calling strlen everytime)
   85     var $err = array();     // array of error message, no entities on no error
   86 
   87     var $wantXML;       // convert to XML
   88     var $wantHTML;      // convert to HTML
   89     var $wantASCII;     // convert to HTML
   90 
   91                 // the only variable which should be accessed from the outside
   92     var $out;           // output data stream (depends on which $wantXXXXX is set to true
   93     var $outstyles;     // htmlified styles (generated after parsing if wantHTML
   94     var $styles;        // if wantHTML, stylesheet definitions are put in here
   95 
   96                 // internal parser variables --------------------------------
   97                 // control word variables
   98     var $cword;         // holds the current (or last) control word, depending on $cw
   99     var $cw;            // are we currently parsing a control word ?
  100     var $cfirst;        // could this be the first character ? so watch out for control symbols
  101 
  102     var $flags = array();   // parser flags
  103 
  104     var $queue;         // every character which is no sepcial char, not belongs to a control word/symbol; is generally considered being 'plain'
  105 
  106     var $stack = array();   // group stack
  107 
  108     /* keywords which don't follw the specification (used by Word '97 - 2000) */
  109     // not yet used
  110     var $control_exception = array(
  111         "clFitText",
  112         "clftsWidth(-?[0-9]+)?",
  113         "clNoWrap(-?[0-9]+)?",
  114         "clwWidth(-?[0-9]+)?",
  115         "tdfrmtxtBottom(-?[0-9]+)?",
  116         "tdfrmtxtLeft(-?[0-9]+)?",
  117         "tdfrmtxtRight(-?[0-9]+)?",
  118         "tdfrmtxtTop(-?[0-9]+)?",
  119         "trftsWidthA(-?[0-9]+)?",
  120         "trftsWidthB(-?[0-9]+)?",
  121         "trftsWidth(-?[0-9]+)?",
  122         "trwWithA(-?[0-9]+)?",
  123         "trwWithB(-?[0-9]+)?",
  124         "trwWith(-?[0-9]+)?",
  125         "spectspecifygen(-?[0-9]+)?",
  126     );
  127 
  128     var $charset_table = array(
  129         "0"     =>  "ANSI",
  130         "1"     =>  "Default",
  131         "2"     =>  "Symbol",
  132         "77"    =>  "Mac",
  133         "128"   =>  "Shift Jis",
  134         "129"   =>  "Hangul",
  135         "130"   =>  "Johab",
  136         "134"   =>  "GB2312",
  137         "136"   =>  "Big5",
  138         "161"   =>  "Greek",
  139         "162"   =>  "Turkish",
  140         "163"   =>  "Vietnamese",
  141         "177"   =>  "Hebrew",
  142         "178"   =>  "Arabic",
  143         "179"   =>  "Arabic Traditional",
  144         "180"   =>  "Arabic user",
  145         "181"   =>  "Hebrew user",
  146         "186"   =>  "Baltic",
  147         "204"   =>  "Russian",
  148         "222"   =>  "Thai",
  149         "238"   =>  "Eastern European",
  150         "255"   =>  "PC 437",
  151         "255"   =>  "OEM",
  152     );
  153 
  154     /* note: the only conversion table used */
  155     var $fontmodifier_table = array(
  156         "bold"          => "b",
  157         "italic"        => "i",
  158         "underlined"    => "u",
  159         "strikethru"    => "strike",
  160     );
  161 
  162     
  163     function rtf() {
  164         $this->rtf_len = 0;
  165         $this->rtf = '';
  166 
  167         $this->out = '';
  168     }
  169 
  170     // loadrtf - load the raw rtf data to be converted by this class
  171     // data = the raw rtf
  172     function loadrtf($data) {
  173         if (($this->rtf = $this->uncompress($data))) {
  174             $this->rtf_len = byte_strlen($this->rtf);
  175         };
  176         if($this->rtf_len == 0) {
  177             debugLog("No data in stream found");
  178             return false;
  179         };
  180         return true;
  181     }
  182 
  183     function output($typ) {
  184         switch($typ) {
  185             case "ascii": $this->wantASCII = true; break;
  186             case "xml": $this->wantXML = true; break;
  187             case "html": $this->wantHTML = true; break;
  188             default: break;
  189         }
  190     }
  191 
  192     // uncompress - uncompress compressed rtf data
  193     // src = the compressed raw rtf in LZRTF format
  194     function uncompress($src) {
  195         $header = unpack("LcSize/LuSize/Lmagic/Lcrc32",byte_substr($src,0,16));
  196         $in = 16;
  197         if ($header['cSize'] != byte_strlen($src)-4) {
  198             debugLog("Stream too short");
  199             return false; 
  200         }
  201 
  202         if ($header['crc32'] != $this->LZRTFCalcCRC32($src,16,(($header['cSize']+4))-16)) {
  203             debugLog("CRC MISMATCH");
  204             return false;
  205         }
  206 
  207         if ($header['magic'] == 0x414c454d) {           // uncompressed RTF - return as is.
  208             $dest = byte_substr($src,$in,$header['uSize']); 
  209         } else if ($header['magic'] == 0x75465a4c) {        // compressed RTF - uncompress.
  210             $dst = $this->LZRTF_HDR_DATA;
  211             $out = $this->LZRTF_HDR_LEN;
  212             $oblen = $this->LZRTF_HDR_LEN + $header['uSize'];
  213             $flagCount = 0;
  214             $flags = 0;
  215             while ($out<$oblen) {
  216                 $flags = ($flagCount++ % 8 == 0) ? ord($src[$in++]) : $flags >> 1;
  217                 if (($flags & 1) == 1) {
  218                     $offset = ord($src[$in++]);
  219                     $length = ord($src[$in++]);
  220                     $offset = ($offset << 4) | ($length >> 4);
  221                     $length = ($length & 0xF) + 2;
  222                     $offset = (int)($out / 4096) * 4096 + $offset;
  223                     if ($offset >= $out) $offset -= 4096;
  224                     $end = $offset + $length;
  225                     while ($offset < $end) {
  226                         $dst .= $dst[$offset++];
  227                         $out++;
  228                     };
  229                 } else {
  230                     $dst .= $src[$in++];
  231                     $out++;
  232                 }
  233             }
  234             $src = $dst;
  235             $dest = byte_substr($src,$this->LZRTF_HDR_LEN,$header['uSize']);
  236         } else {                        // unknown magic - returfn false (please report if this ever happens)
  237             debugLog("Unknown Magic");
  238             return false;
  239         }
  240 
  241         return $dest;
  242     }
  243 
  244     // LZRTFCalcCRC32 - calculates the CRC32 of the LZRTF data part
  245     // buf = the whole rtf data part
  246     // off = start point of crc calculation
  247     // len = length of data to calculate CRC for
  248     // function is necessary since in RTF there is no XOR 0xffffffff being done (said to be 0x00 unsafe CRC32 calculation 
  249     function LZRTFCalcCRC32($buf, $off, $len) {
  250         $c=0;
  251         $end = $off + $len;
  252         for($i=$off;$i < $end;$i++) {
  253             $c=$this->CRC32_TABLE[($c ^ ord($buf{$i})) & 0xFF] ^ (($c >> 8) & 0x00ffffff);
  254         }
  255         return $c;
  256     }
  257 
  258     function parserInit() { /* Default values according to the specs */
  259         $this->flags = array(
  260             "fontsize"      => 24,
  261             "beginparagraph"    => true,
  262         );
  263     }
  264 
  265     function parseControl($control, $parameter) {
  266         switch ($control) {     
  267             case "fonttbl":         // font table definition start
  268                 $this->flags["fonttbl"] = true; // signal fonttable control words they are allowed to behave as expected
  269                 break;
  270             case "f":           // define or set font
  271                 if($this->flags["fonttbl"]) {   // if its set, the fonttable definition is written to; else its read from
  272                     $this->flags["fonttbl_current_write"] = $parameter;
  273                 } else {
  274                     $this->flags["fonttbl_current_read"] = $parameter;
  275                 }
  276                 break;
  277             case "fcharset":        // this is for preparing flushQueue; it then moves the Queue to $this->fonttable .. instead to formatted output
  278                 $this->flags["fonttbl_want_fcharset"] = $parameter;
  279                 break;
  280             case "fs":          // sets the current fontsize; is used by stylesheets (which are therefore generated on the fly
  281                 $this->flags["fontsize"] = $parameter;
  282                 break;
  283             case "qc":          // handle center alignment
  284                 $this->flags["alignment"] = "center";
  285                 break;
  286             case "qr":          // handle right alignment
  287                 $this->flags["alignment"] = "right";
  288                 break;
  289             case "pard":        // reset paragraph settings (only alignment)
  290                 $this->flags["alignment"] = "";
  291                 break;
  292             case "par":         // define new paragraph (for now, thats a simple break in html) begin new line
  293                 $this->flags["beginparagraph"] = true;
  294                 if($this->wantHTML) {
  295                     $this->out .= "</div>";
  296                 }
  297                 if($this->wantASCII) {
  298                     $this->out .= "\n";
  299                 }
  300                 break;
  301             case "bnone":       // bold
  302                 $parameter = "0";
  303             case "b":
  304                 // haven'y yet figured out WHY I need a (string)-cast here ... hm
  305                 if((string)$parameter == "0")
  306                     $this->flags["bold"] = false;
  307                 else
  308                     $this->flags["bold"] = true;
  309                 break;
  310             case "ulnone":      // underlined
  311                 $parameter = "0";
  312             case "ul":
  313                 if((string)$parameter == "0")
  314                     $this->flags["underlined"] = false;
  315                 else
  316                     $this->flags["underlined"] = true;
  317                 break;
  318             case "inone":       // italic
  319                 $parameter = "0";
  320             case "i":
  321                 if((string)$parameter == "0")
  322                     $this->flags["italic"] = false;
  323                 else
  324                     $this->flags["italic"] = true;
  325                 break;
  326             case "strikenone":      // strikethru
  327                 $parameter = "0";
  328             case "strike":
  329                 if((string)$parameter == "0")
  330                     $this->flags["strikethru"] = false;
  331                 else
  332                     $this->flags["strikethru"] = true;
  333                 break;
  334             case "plain":       // reset all font modifiers and fontsize to 12
  335                 $this->flags["bold"] = false;
  336                 $this->flags["italic"] = false;
  337                 $this->flags["underlined"] = false;
  338                 $this->flags["strikethru"] = false;
  339                 $this->flags["fontsize"] = 12;
  340 
  341                 $this->flags["subscription"] = false;
  342                 $this->flags["superscription"] = false;
  343                 break;
  344             case "subnone":     // subscription
  345                 $parameter = "0";
  346             case "sub":
  347                 if((string)$parameter == "0")
  348                     $this->flags["subscription"] = false;
  349                 else
  350                     $this->flags["subscription"] = true;
  351                 break;
  352             case "supernone":       // superscription
  353                 $parameter = "0";
  354             case "super":
  355                 if((string)$parameter == "0")
  356                     $this->flags["superscription"] = false;
  357                 else
  358                     $this->flags["superscription"] = true;
  359                 break;
  360         }
  361     }
  362 
  363     /*
  364     Dispatch the control word to the output stream
  365     */
  366 
  367     function flushControl() {
  368         if(ereg("^([A-Za-z]+)(-?[0-9]*) ?$", $this->cword, $match)) {
  369             $this->parseControl($match[1], $match[2]);
  370             if($this->wantXML) {
  371                 $this->out.="<control word=\"".$match[1]."\"";
  372                 if(byte_strlen($match[2]) > 0)
  373                     $this->out.=" param=\"".$match[2]."\"";
  374                 $this->out.="/>";
  375             }
  376         }
  377     }
  378 
  379     /*
  380     If output stream supports comments, dispatch it
  381     */
  382     
  383     function flushComment($comment) {
  384         if($this->wantXML || $this->wantHTML) {
  385             $this->out.="<!-- ".$comment." -->";
  386         }
  387     }
  388 
  389     /*
  390     Dispatch start/end of logical rtf groups (not every output type needs it; merely debugging purpose)
  391     */
  392 
  393     function flushGroup($state) {
  394         if($state == "open") { /* push onto the stack */
  395             array_push($this->stack, $this->flags);
  396             if($this->wantXML)
  397                 $this->out.="<group>";
  398         }
  399         if($state == "close") { /* pop from the stack */
  400             $this->last_flags = $this->flags;
  401             $this->flags = array_pop($this->stack);
  402 
  403             $this->flags["fonttbl_current_write"] = ""; // on group close, no more fontdefinition will be written to this id
  404                             // this is not really the right way to do it !
  405                             // of course a '}' not necessarily donates a fonttable end; a fonttable
  406                             // group at least *can* contain sub-groups
  407                             // therefore an stacked approach is heavily needed
  408             $this->flags["fonttbl"] = false;        // no matter what you do, if a group closes, its fonttbl definition is closed too
  409 
  410             if($this->wantXML)
  411                 $this->out.="</group>";
  412         }
  413     }
  414 
  415     function flushHead() {
  416         if($this->wantXML)
  417             $this->out.="<rtf>";
  418     }
  419 
  420     function flushBottom() {
  421         if($this->wantXML)
  422             $this->out.="</rtf>";
  423     }
  424 
  425     function checkHtmlSpanContent($command) {
  426         reset($this->fontmodifier_table);
  427         while(list($rtf, $html) = each($this->fontmodifier_table)) {
  428             if($this->flags[$rtf] == true) {
  429                 if($command == "start")
  430                     $this->out .= "<".$html.">";
  431                 else
  432                     $this->out .= "</".$html.">";
  433             }
  434         }
  435     }
  436 
  437     /*
  438         flush text in queue
  439     */
  440     function flushQueue() {
  441         if(byte_strlen($this->queue)) {
  442             // processing logic
  443             if (isset($this->flags["fonttbl_want_fcharset"]) &&
  444                 ereg("^[0-9]+$", $this->flags["fonttbl_want_fcharset"])) {
  445                 $this->fonttable[$this->flags["fonttbl_want_fcharset"]]["charset"] = $this->queue;
  446                 $this->flags["fonttbl_want_fcharset"] = "";
  447                 $this->queue = "";
  448             }
  449 
  450         // output logic
  451             if (byte_strlen($this->queue)) {
  452             /*
  453                     Everything which passes this is (or, at leat, *should*) be only outputted plaintext
  454                 Thats why we can safely add the css-stylesheet when using wantHTML
  455             */
  456                 if($this->wantXML)
  457                     $this->out.= "<plain>".$this->queue."</plain>";
  458                 else if($this->wantHTML) {
  459                 // only output html if a valid (for now, just numeric;) fonttable is given
  460                     if(ereg("^[0-9]+$", $this->flags["fonttbl_current_read"])) {
  461                         if($this->flags["beginparagraph"] == true) {
  462                             $this->flags["beginparagraph"] = false;
  463                             $this->out .= "<div align=\"";
  464                             switch($this->flags["alignment"]) {
  465                                 case "right":
  466                                     $this->out .= "right";
  467                                     break;
  468                                 case "center":
  469                                     $this->out .= "center";
  470                                     break;
  471                                 case "left":
  472                                 default:
  473                                     $this->out .= "left";
  474                             }
  475                             $this->out .= "\">";
  476                         }
  477 
  478                         /* define new style for that span */
  479                         $this->styles["f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]] = "font-family:".$this->fonttable[$this->flags["fonttbl_current_read"]]["charset"]." font-size:".$this->flags["fontsize"].";";
  480                         /* write span start */
  481                         $this->out .= "<span class=\"f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]."\">";
  482 
  483                         /* check if the span content has a modifier */
  484                         $this->checkHtmlSpanContent("start");
  485                         /* write span content */
  486                         $this->out .= $this->queue;
  487                         /* close modifiers */
  488                         $this->checkHtmlSpanContent("stop");
  489                         /* close span */
  490                         "</span>";
  491                     }
  492                 }
  493                 $this->queue = "";
  494             }
  495         }
  496     }
  497 
  498     /*
  499     handle special charactes like \'ef
  500     */
  501     
  502     function flushSpecial($special) {
  503         if(byte_strlen($special) == 2) {
  504             if($this->wantASCII) 
  505                 $this->out .= chr(hexdec('0x'.$special));
  506             else if($this->wantXML)
  507                 $this->out .= "<special value=\"".$special."\"/>";
  508             else if($this->wantHTML){
  509                 $this->out .= "<special value=\"".$special."\"/>";
  510                 switch($special) {
  511                     case "c1": $this->out .= "&Aacute;"; break;
  512                     case "e1": $this->out .= "&aacute;"; break;
  513                     case "c0": $this->out .= "&Agrave;"; break;
  514                     case "e0": $this->out .= "&agrave;"; break;
  515                     case "c9": $this->out .= "&Eacute;"; break;
  516                     case "e9": $this->out .= "&eacute;"; break;
  517                     case "c8": $this->out .= "&Egrave;"; break;
  518                     case "e8": $this->out .= "&egrave;"; break;
  519                     case "cd": $this->out .= "&Iacute;"; break;
  520                     case "ed": $this->out .= "&iacute;"; break;
  521                     case "cc": $this->out .= "&Igrave;"; break;
  522                     case "ec": $this->out .= "&igrave;"; break;
  523                     case "d3": $this->out .= "&Oacute;"; break;
  524                     case "f3": $this->out .= "&oacute;"; break;
  525                     case "d2": $this->out .= "&Ograve;"; break;
  526                     case "f2": $this->out .= "&ograve;"; break;
  527                     case "da": $this->out .= "&Uacute;"; break;
  528                     case "fa": $this->out .= "&uacute;"; break;
  529                     case "d9": $this->out .= "&Ugrave;"; break;
  530                     case "f9": $this->out .= "&ugrave;"; break;
  531                     case "80": $this->out .= "&#8364;"; break;
  532                     case "d1": $this->out .= "&Ntilde;"; break;
  533                     case "f1": $this->out .= "&ntilde;"; break;
  534                     case "c7": $this->out .= "&Ccedil;"; break;
  535                     case "e7": $this->out .= "&ccedil;"; break;
  536                     case "dc": $this->out .= "&Uuml;"; break;
  537                     case "fc": $this->out .= "&uuml;"; break;
  538                     case "bf": $this->out .= "&#191;"; break;
  539                     case "a1": $this->out .= "&#161;"; break;
  540                     case "b7": $this->out .= "&middot;"; break;
  541                     case "a9": $this->out .= "&copy;"; break;
  542                     case "ae": $this->out .= "&reg;"; break;
  543                     case "ba": $this->out .= "&ordm;"; break;
  544                     case "aa": $this->out .= "&ordf;"; break;
  545                     case "b2": $this->out .= "&sup2;"; break;
  546                     case "b3": $this->out .= "&sup3;"; break;
  547                 }
  548             }
  549         }
  550     }
  551 
  552     /*
  553     Output errors at end
  554     */
  555     function flushErrors() {
  556         if(count($this->err) > 0) {
  557             if($this->wantXML) {
  558                 $this->out .= "<errors>";
  559                 while(list($num,$value) = each($this->err)) {
  560                     $this->out .= "<message>".$value."</message>";
  561                 }
  562                 $this->out .= "</errors>";
  563             }
  564         }
  565     }
  566 
  567     function makeStyles() {
  568         $this->outstyles = "<style type=\"text/css\"><!--\n";
  569         reset($this->styles);
  570         while(list($stylename, $styleattrib) = each($this->styles)) {
  571             $this->outstyles .= ".".$stylename." { ".$styleattrib." }\n";
  572         }
  573         $this->outstyles .= "--></style>\n";
  574     }
  575 
  576     function parse() {
  577 
  578         $this->parserInit();
  579 
  580         $i = 0;
  581         $this->cw= false;   // flag if control word is currently parsed
  582         $this->cfirst = false;  // first control character ?
  583         $this->cword = "";  // last or current control word (depends on $this->cw
  584 
  585         $this->queue = "";  // plain text data found during parsing
  586 
  587         $this->flushHead();
  588 
  589         while($i < $this->rtf_len) {
  590             switch($this->rtf[$i]) {
  591             case "{":
  592                 if($this->cw) {
  593                     $this->flushControl();
  594                     $this->cw = false; 
  595                     $this->cfirst = false;
  596                 } else
  597                     $this->flushQueue();
  598 
  599                 $this->flushGroup("open");
  600                 break;
  601             case "}":
  602                 if($this->cw) {
  603                     $this->flushControl();
  604                     $this->cw = false; 
  605                     $this->cfirst = false;
  606                 } else
  607                     $this->flushQueue();
  608 
  609                 $this->flushGroup("close");
  610                 break;
  611 
  612             case "\\":
  613                 if($this->cfirst) { // catches '\\' 
  614                     $this->queue .= "\\"; // replaced single quotes
  615                     $this->cfirst = false;
  616                     $this->cw = false;
  617                     break;
  618                 }
  619                 if($this->cw) {
  620                     $this->flushControl();
  621                 } else
  622                     $this->flushQueue();
  623                 $this->cw = true;
  624                 $this->cfirst = true;
  625                 $this->cword = "";
  626                 break;
  627             default:
  628                 if((ord($this->rtf[$i]) == 10) || (ord($this->rtf[$i]) == 13)) break; // eat line breaks
  629                 if($this->cw) { // active control word ?
  630                     /*
  631                             Watch the RE: there's an optional space at the end which IS part of
  632                         the control word (but actually its ignored by flushControl)
  633                     */
  634                     if(ereg("^[a-zA-Z0-9-]?$", $this->rtf[$i])) { // continue parsing
  635                         $this->cword .= $this->rtf[$i];
  636                         $this->cfirst = false;
  637                     } else {
  638                     /*
  639                         Control word could be a 'control symbol', like \~ or \* etc.
  640                     */
  641                         $specialmatch = false;
  642                         if($this->cfirst) {
  643                             if($this->rtf[$i] == '\'') { // expect to get some special chars
  644                                 $this->flushQueue();
  645                                 $this->flushSpecial($this->rtf[$i+1].$this->rtf[$i+2]);
  646                                 $i+=2;
  647                                 $specialmatch = true;
  648                                 $this->cw = false; 
  649                                 $this->cfirst = false; 
  650                                 $this->cword = "";
  651                             } else 
  652                                 if(ereg("^[{}\*]$", $this->rtf[$i])) {
  653                                     $this->flushComment("control symbols not yet handled");
  654                                     $specialmatch = true;
  655                                 }
  656                             $this->cfirst = false;
  657                         } else {
  658                             if($this->rtf[$i] == ' ') { // space delimtes control words, so just discard it and flush the controlword
  659                                 $this->cw = false;
  660                                 $this->flushControl();
  661                                 break;
  662                             }
  663                         }
  664                         if(!$specialmatch) {
  665                             $this->flushControl();
  666                             $this->cw = false;
  667                             $this->cfirst = false;
  668                             /*
  669                                 The current character is a delimeter, but is NOT
  670                                 part of the control word so we hop one step back
  671                                 in the stream and process it again
  672                             */
  673                             $i--;
  674                         }
  675                     }
  676                 } else {
  677                     // < and > need translation before putting into queue when XML or HTML is wanted
  678                     if(($this->wantHTML) || ($this->wantXML)) {
  679                         switch($this->rtf[$i]) {
  680                             case "<":
  681                                 $this->queue .= "&lt;";
  682                                 break;
  683                             case ">":
  684                                 $this->queue .= "&gt;";
  685                                 break;
  686                             default:
  687                                 $this->queue .= $this->rtf[$i];
  688                                 break;
  689                         }
  690                     } else 
  691                         $this->queue .= $this->rtf[$i];
  692                     }
  693                 }
  694                 $i++;
  695         }
  696         $this->flushQueue();
  697         $this->flushErrors();
  698         $this->flushBottom();
  699 
  700         if($this->wantHTML) {
  701             $this->makeStyles();
  702         }
  703     }
  704 }
  705 
  706 
  707 ?>