A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 <?php 2 /* 3 This class contains code from rtfclass.php that was written by Markus Fischer and placed by him under 4 GPLv2 License. 5 6 =======================================NOTES FROM ORIGINAL AUTHOR==================================== 7 Rich Text Format - Parsing Class 8 ================================ 9 10 (c) 2000 Markus Fischer 11 <mfischer@josefine.ben.tuwien.ac.at> 12 http://josefine.ben.tuwien.ac.at/~mfischer/ 13 14 Latest versions of this class can always be found at 15 http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/rtfclass.phps 16 Testing suite is available at 17 http://josefine.ben.tuwien.ac.at/~mfischer/developing/php/rtf/ 18 19 License: GPLv2 20 21 Specification: 22 http://msdn.microsoft.com/library/default.asp?URL=/library/specs/rtfspec.htm 23 24 General Notes: 25 ============== 26 Unknown or unspupported control symbols are silently gnored 27 28 Group stacking is still not supported :( 29 group stack logic implemented; however not really used yet 30 ===================================================================================================== 31 32 It was modified by me (Andreas Brodowski) to allow compressed RTF being uncompressed by code I ported from 33 Java to PHP and adapted according the needs of Z-Push. 34 35 Currently it is being used to detect empty RTF Streams from Nokia Phones in MfE Clients 36 37 It needs to be used by other backend writers that needs to have notes in calendar, appointment or tasks 38 objects to be written to their databases since devices send them usually in RTF Format... With Zarafa 39 you can write them directly to DB and Zarafa is doing the conversion job. Other Groupware systems usually 40 don't have this possibility... 41 42 */ 43 44 require_once "utils.php"; 45 46 class rtf { 47 var $LZRTF_HDR_DATA = "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"; 48 var $LZRTF_HDR_LEN = 207; 49 var $CRC32_TABLE = array( 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3, 50 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91, 51 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7, 52 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5, 53 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B, 54 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59, 55 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F, 56 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D, 57 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433, 58 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01, 59 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457, 60 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65, 61 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB, 62 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9, 63 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F, 64 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD, 65 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683, 66 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1, 67 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7, 68 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5, 69 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B, 70 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79, 71 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F, 72 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D, 73 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713, 74 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21, 75 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777, 76 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45, 77 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB, 78 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9, 79 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF, 80 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D, 81 ); 82 83 var $rtf; // rtf core stream 84 var $rtf_len; // length in characters of the stream (get performace due avoiding calling strlen everytime) 85 var $err = array(); // array of error message, no entities on no error 86 87 var $wantXML; // convert to XML 88 var $wantHTML; // convert to HTML 89 var $wantASCII; // convert to HTML 90 91 // the only variable which should be accessed from the outside 92 var $out; // output data stream (depends on which $wantXXXXX is set to true 93 var $outstyles; // htmlified styles (generated after parsing if wantHTML 94 var $styles; // if wantHTML, stylesheet definitions are put in here 95 96 // internal parser variables -------------------------------- 97 // control word variables 98 var $cword; // holds the current (or last) control word, depending on $cw 99 var $cw; // are we currently parsing a control word ? 100 var $cfirst; // could this be the first character ? so watch out for control symbols 101 102 var $flags = array(); // parser flags 103 104 var $queue; // every character which is no sepcial char, not belongs to a control word/symbol; is generally considered being 'plain' 105 106 var $stack = array(); // group stack 107 108 /* keywords which don't follw the specification (used by Word '97 - 2000) */ 109 // not yet used 110 var $control_exception = array( 111 "clFitText", 112 "clftsWidth(-?[0-9]+)?", 113 "clNoWrap(-?[0-9]+)?", 114 "clwWidth(-?[0-9]+)?", 115 "tdfrmtxtBottom(-?[0-9]+)?", 116 "tdfrmtxtLeft(-?[0-9]+)?", 117 "tdfrmtxtRight(-?[0-9]+)?", 118 "tdfrmtxtTop(-?[0-9]+)?", 119 "trftsWidthA(-?[0-9]+)?", 120 "trftsWidthB(-?[0-9]+)?", 121 "trftsWidth(-?[0-9]+)?", 122 "trwWithA(-?[0-9]+)?", 123 "trwWithB(-?[0-9]+)?", 124 "trwWith(-?[0-9]+)?", 125 "spectspecifygen(-?[0-9]+)?", 126 ); 127 128 var $charset_table = array( 129 "0" => "ANSI", 130 "1" => "Default", 131 "2" => "Symbol", 132 "77" => "Mac", 133 "128" => "Shift Jis", 134 "129" => "Hangul", 135 "130" => "Johab", 136 "134" => "GB2312", 137 "136" => "Big5", 138 "161" => "Greek", 139 "162" => "Turkish", 140 "163" => "Vietnamese", 141 "177" => "Hebrew", 142 "178" => "Arabic", 143 "179" => "Arabic Traditional", 144 "180" => "Arabic user", 145 "181" => "Hebrew user", 146 "186" => "Baltic", 147 "204" => "Russian", 148 "222" => "Thai", 149 "238" => "Eastern European", 150 "255" => "PC 437", 151 "255" => "OEM", 152 ); 153 154 /* note: the only conversion table used */ 155 var $fontmodifier_table = array( 156 "bold" => "b", 157 "italic" => "i", 158 "underlined" => "u", 159 "strikethru" => "strike", 160 ); 161 162 163 function rtf() { 164 $this->rtf_len = 0; 165 $this->rtf = ''; 166 167 $this->out = ''; 168 } 169 170 // loadrtf - load the raw rtf data to be converted by this class 171 // data = the raw rtf 172 function loadrtf($data) { 173 if (($this->rtf = $this->uncompress($data))) { 174 $this->rtf_len = byte_strlen($this->rtf); 175 }; 176 if($this->rtf_len == 0) { 177 debugLog("No data in stream found"); 178 return false; 179 }; 180 return true; 181 } 182 183 function output($typ) { 184 switch($typ) { 185 case "ascii": $this->wantASCII = true; break; 186 case "xml": $this->wantXML = true; break; 187 case "html": $this->wantHTML = true; break; 188 default: break; 189 } 190 } 191 192 // uncompress - uncompress compressed rtf data 193 // src = the compressed raw rtf in LZRTF format 194 function uncompress($src) { 195 $header = unpack("LcSize/LuSize/Lmagic/Lcrc32",byte_substr($src,0,16)); 196 $in = 16; 197 if ($header['cSize'] != byte_strlen($src)-4) { 198 debugLog("Stream too short"); 199 return false; 200 } 201 202 if ($header['crc32'] != $this->LZRTFCalcCRC32($src,16,(($header['cSize']+4))-16)) { 203 debugLog("CRC MISMATCH"); 204 return false; 205 } 206 207 if ($header['magic'] == 0x414c454d) { // uncompressed RTF - return as is. 208 $dest = byte_substr($src,$in,$header['uSize']); 209 } else if ($header['magic'] == 0x75465a4c) { // compressed RTF - uncompress. 210 $dst = $this->LZRTF_HDR_DATA; 211 $out = $this->LZRTF_HDR_LEN; 212 $oblen = $this->LZRTF_HDR_LEN + $header['uSize']; 213 $flagCount = 0; 214 $flags = 0; 215 while ($out<$oblen) { 216 $flags = ($flagCount++ % 8 == 0) ? ord($src[$in++]) : $flags >> 1; 217 if (($flags & 1) == 1) { 218 $offset = ord($src[$in++]); 219 $length = ord($src[$in++]); 220 $offset = ($offset << 4) | ($length >> 4); 221 $length = ($length & 0xF) + 2; 222 $offset = (int)($out / 4096) * 4096 + $offset; 223 if ($offset >= $out) $offset -= 4096; 224 $end = $offset + $length; 225 while ($offset < $end) { 226 $dst .= $dst[$offset++]; 227 $out++; 228 }; 229 } else { 230 $dst .= $src[$in++]; 231 $out++; 232 } 233 } 234 $src = $dst; 235 $dest = byte_substr($src,$this->LZRTF_HDR_LEN,$header['uSize']); 236 } else { // unknown magic - returfn false (please report if this ever happens) 237 debugLog("Unknown Magic"); 238 return false; 239 } 240 241 return $dest; 242 } 243 244 // LZRTFCalcCRC32 - calculates the CRC32 of the LZRTF data part 245 // buf = the whole rtf data part 246 // off = start point of crc calculation 247 // len = length of data to calculate CRC for 248 // function is necessary since in RTF there is no XOR 0xffffffff being done (said to be 0x00 unsafe CRC32 calculation 249 function LZRTFCalcCRC32($buf, $off, $len) { 250 $c=0; 251 $end = $off + $len; 252 for($i=$off;$i < $end;$i++) { 253 $c=$this->CRC32_TABLE[($c ^ ord($buf{$i})) & 0xFF] ^ (($c >> 8) & 0x00ffffff); 254 } 255 return $c; 256 } 257 258 function parserInit() { /* Default values according to the specs */ 259 $this->flags = array( 260 "fontsize" => 24, 261 "beginparagraph" => true, 262 ); 263 } 264 265 function parseControl($control, $parameter) { 266 switch ($control) { 267 case "fonttbl": // font table definition start 268 $this->flags["fonttbl"] = true; // signal fonttable control words they are allowed to behave as expected 269 break; 270 case "f": // define or set font 271 if($this->flags["fonttbl"]) { // if its set, the fonttable definition is written to; else its read from 272 $this->flags["fonttbl_current_write"] = $parameter; 273 } else { 274 $this->flags["fonttbl_current_read"] = $parameter; 275 } 276 break; 277 case "fcharset": // this is for preparing flushQueue; it then moves the Queue to $this->fonttable .. instead to formatted output 278 $this->flags["fonttbl_want_fcharset"] = $parameter; 279 break; 280 case "fs": // sets the current fontsize; is used by stylesheets (which are therefore generated on the fly 281 $this->flags["fontsize"] = $parameter; 282 break; 283 case "qc": // handle center alignment 284 $this->flags["alignment"] = "center"; 285 break; 286 case "qr": // handle right alignment 287 $this->flags["alignment"] = "right"; 288 break; 289 case "pard": // reset paragraph settings (only alignment) 290 $this->flags["alignment"] = ""; 291 break; 292 case "par": // define new paragraph (for now, thats a simple break in html) begin new line 293 $this->flags["beginparagraph"] = true; 294 if($this->wantHTML) { 295 $this->out .= "</div>"; 296 } 297 if($this->wantASCII) { 298 $this->out .= "\n"; 299 } 300 break; 301 case "bnone": // bold 302 $parameter = "0"; 303 case "b": 304 // haven'y yet figured out WHY I need a (string)-cast here ... hm 305 if((string)$parameter == "0") 306 $this->flags["bold"] = false; 307 else 308 $this->flags["bold"] = true; 309 break; 310 case "ulnone": // underlined 311 $parameter = "0"; 312 case "ul": 313 if((string)$parameter == "0") 314 $this->flags["underlined"] = false; 315 else 316 $this->flags["underlined"] = true; 317 break; 318 case "inone": // italic 319 $parameter = "0"; 320 case "i": 321 if((string)$parameter == "0") 322 $this->flags["italic"] = false; 323 else 324 $this->flags["italic"] = true; 325 break; 326 case "strikenone": // strikethru 327 $parameter = "0"; 328 case "strike": 329 if((string)$parameter == "0") 330 $this->flags["strikethru"] = false; 331 else 332 $this->flags["strikethru"] = true; 333 break; 334 case "plain": // reset all font modifiers and fontsize to 12 335 $this->flags["bold"] = false; 336 $this->flags["italic"] = false; 337 $this->flags["underlined"] = false; 338 $this->flags["strikethru"] = false; 339 $this->flags["fontsize"] = 12; 340 341 $this->flags["subscription"] = false; 342 $this->flags["superscription"] = false; 343 break; 344 case "subnone": // subscription 345 $parameter = "0"; 346 case "sub": 347 if((string)$parameter == "0") 348 $this->flags["subscription"] = false; 349 else 350 $this->flags["subscription"] = true; 351 break; 352 case "supernone": // superscription 353 $parameter = "0"; 354 case "super": 355 if((string)$parameter == "0") 356 $this->flags["superscription"] = false; 357 else 358 $this->flags["superscription"] = true; 359 break; 360 } 361 } 362 363 /* 364 Dispatch the control word to the output stream 365 */ 366 367 function flushControl() { 368 if(ereg("^([A-Za-z]+)(-?[0-9]*) ?$", $this->cword, $match)) { 369 $this->parseControl($match[1], $match[2]); 370 if($this->wantXML) { 371 $this->out.="<control word=\"".$match[1]."\""; 372 if(byte_strlen($match[2]) > 0) 373 $this->out.=" param=\"".$match[2]."\""; 374 $this->out.="/>"; 375 } 376 } 377 } 378 379 /* 380 If output stream supports comments, dispatch it 381 */ 382 383 function flushComment($comment) { 384 if($this->wantXML || $this->wantHTML) { 385 $this->out.="<!-- ".$comment." -->"; 386 } 387 } 388 389 /* 390 Dispatch start/end of logical rtf groups (not every output type needs it; merely debugging purpose) 391 */ 392 393 function flushGroup($state) { 394 if($state == "open") { /* push onto the stack */ 395 array_push($this->stack, $this->flags); 396 if($this->wantXML) 397 $this->out.="<group>"; 398 } 399 if($state == "close") { /* pop from the stack */ 400 $this->last_flags = $this->flags; 401 $this->flags = array_pop($this->stack); 402 403 $this->flags["fonttbl_current_write"] = ""; // on group close, no more fontdefinition will be written to this id 404 // this is not really the right way to do it ! 405 // of course a '}' not necessarily donates a fonttable end; a fonttable 406 // group at least *can* contain sub-groups 407 // therefore an stacked approach is heavily needed 408 $this->flags["fonttbl"] = false; // no matter what you do, if a group closes, its fonttbl definition is closed too 409 410 if($this->wantXML) 411 $this->out.="</group>"; 412 } 413 } 414 415 function flushHead() { 416 if($this->wantXML) 417 $this->out.="<rtf>"; 418 } 419 420 function flushBottom() { 421 if($this->wantXML) 422 $this->out.="</rtf>"; 423 } 424 425 function checkHtmlSpanContent($command) { 426 reset($this->fontmodifier_table); 427 while(list($rtf, $html) = each($this->fontmodifier_table)) { 428 if($this->flags[$rtf] == true) { 429 if($command == "start") 430 $this->out .= "<".$html.">"; 431 else 432 $this->out .= "</".$html.">"; 433 } 434 } 435 } 436 437 /* 438 flush text in queue 439 */ 440 function flushQueue() { 441 if(byte_strlen($this->queue)) { 442 // processing logic 443 if (isset($this->flags["fonttbl_want_fcharset"]) && 444 ereg("^[0-9]+$", $this->flags["fonttbl_want_fcharset"])) { 445 $this->fonttable[$this->flags["fonttbl_want_fcharset"]]["charset"] = $this->queue; 446 $this->flags["fonttbl_want_fcharset"] = ""; 447 $this->queue = ""; 448 } 449 450 // output logic 451 if (byte_strlen($this->queue)) { 452 /* 453 Everything which passes this is (or, at leat, *should*) be only outputted plaintext 454 Thats why we can safely add the css-stylesheet when using wantHTML 455 */ 456 if($this->wantXML) 457 $this->out.= "<plain>".$this->queue."</plain>"; 458 else if($this->wantHTML) { 459 // only output html if a valid (for now, just numeric;) fonttable is given 460 if(ereg("^[0-9]+$", $this->flags["fonttbl_current_read"])) { 461 if($this->flags["beginparagraph"] == true) { 462 $this->flags["beginparagraph"] = false; 463 $this->out .= "<div align=\""; 464 switch($this->flags["alignment"]) { 465 case "right": 466 $this->out .= "right"; 467 break; 468 case "center": 469 $this->out .= "center"; 470 break; 471 case "left": 472 default: 473 $this->out .= "left"; 474 } 475 $this->out .= "\">"; 476 } 477 478 /* define new style for that span */ 479 $this->styles["f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]] = "font-family:".$this->fonttable[$this->flags["fonttbl_current_read"]]["charset"]." font-size:".$this->flags["fontsize"].";"; 480 /* write span start */ 481 $this->out .= "<span class=\"f".$this->flags["fonttbl_current_read"]."s".$this->flags["fontsize"]."\">"; 482 483 /* check if the span content has a modifier */ 484 $this->checkHtmlSpanContent("start"); 485 /* write span content */ 486 $this->out .= $this->queue; 487 /* close modifiers */ 488 $this->checkHtmlSpanContent("stop"); 489 /* close span */ 490 "</span>"; 491 } 492 } 493 $this->queue = ""; 494 } 495 } 496 } 497 498 /* 499 handle special charactes like \'ef 500 */ 501 502 function flushSpecial($special) { 503 if(byte_strlen($special) == 2) { 504 if($this->wantASCII) 505 $this->out .= chr(hexdec('0x'.$special)); 506 else if($this->wantXML) 507 $this->out .= "<special value=\"".$special."\"/>"; 508 else if($this->wantHTML){ 509 $this->out .= "<special value=\"".$special."\"/>"; 510 switch($special) { 511 case "c1": $this->out .= "Á"; break; 512 case "e1": $this->out .= "á"; break; 513 case "c0": $this->out .= "À"; break; 514 case "e0": $this->out .= "à"; break; 515 case "c9": $this->out .= "É"; break; 516 case "e9": $this->out .= "é"; break; 517 case "c8": $this->out .= "È"; break; 518 case "e8": $this->out .= "è"; break; 519 case "cd": $this->out .= "Í"; break; 520 case "ed": $this->out .= "í"; break; 521 case "cc": $this->out .= "Ì"; break; 522 case "ec": $this->out .= "ì"; break; 523 case "d3": $this->out .= "Ó"; break; 524 case "f3": $this->out .= "ó"; break; 525 case "d2": $this->out .= "Ò"; break; 526 case "f2": $this->out .= "ò"; break; 527 case "da": $this->out .= "Ú"; break; 528 case "fa": $this->out .= "ú"; break; 529 case "d9": $this->out .= "Ù"; break; 530 case "f9": $this->out .= "ù"; break; 531 case "80": $this->out .= "€"; break; 532 case "d1": $this->out .= "Ñ"; break; 533 case "f1": $this->out .= "ñ"; break; 534 case "c7": $this->out .= "Ç"; break; 535 case "e7": $this->out .= "ç"; break; 536 case "dc": $this->out .= "Ü"; break; 537 case "fc": $this->out .= "ü"; break; 538 case "bf": $this->out .= "¿"; break; 539 case "a1": $this->out .= "¡"; break; 540 case "b7": $this->out .= "·"; break; 541 case "a9": $this->out .= "©"; break; 542 case "ae": $this->out .= "®"; break; 543 case "ba": $this->out .= "º"; break; 544 case "aa": $this->out .= "ª"; break; 545 case "b2": $this->out .= "²"; break; 546 case "b3": $this->out .= "³"; break; 547 } 548 } 549 } 550 } 551 552 /* 553 Output errors at end 554 */ 555 function flushErrors() { 556 if(count($this->err) > 0) { 557 if($this->wantXML) { 558 $this->out .= "<errors>"; 559 while(list($num,$value) = each($this->err)) { 560 $this->out .= "<message>".$value."</message>"; 561 } 562 $this->out .= "</errors>"; 563 } 564 } 565 } 566 567 function makeStyles() { 568 $this->outstyles = "<style type=\"text/css\"><!--\n"; 569 reset($this->styles); 570 while(list($stylename, $styleattrib) = each($this->styles)) { 571 $this->outstyles .= ".".$stylename." { ".$styleattrib." }\n"; 572 } 573 $this->outstyles .= "--></style>\n"; 574 } 575 576 function parse() { 577 578 $this->parserInit(); 579 580 $i = 0; 581 $this->cw= false; // flag if control word is currently parsed 582 $this->cfirst = false; // first control character ? 583 $this->cword = ""; // last or current control word (depends on $this->cw 584 585 $this->queue = ""; // plain text data found during parsing 586 587 $this->flushHead(); 588 589 while($i < $this->rtf_len) { 590 switch($this->rtf[$i]) { 591 case "{": 592 if($this->cw) { 593 $this->flushControl(); 594 $this->cw = false; 595 $this->cfirst = false; 596 } else 597 $this->flushQueue(); 598 599 $this->flushGroup("open"); 600 break; 601 case "}": 602 if($this->cw) { 603 $this->flushControl(); 604 $this->cw = false; 605 $this->cfirst = false; 606 } else 607 $this->flushQueue(); 608 609 $this->flushGroup("close"); 610 break; 611 612 case "\\": 613 if($this->cfirst) { // catches '\\' 614 $this->queue .= "\\"; // replaced single quotes 615 $this->cfirst = false; 616 $this->cw = false; 617 break; 618 } 619 if($this->cw) { 620 $this->flushControl(); 621 } else 622 $this->flushQueue(); 623 $this->cw = true; 624 $this->cfirst = true; 625 $this->cword = ""; 626 break; 627 default: 628 if((ord($this->rtf[$i]) == 10) || (ord($this->rtf[$i]) == 13)) break; // eat line breaks 629 if($this->cw) { // active control word ? 630 /* 631 Watch the RE: there's an optional space at the end which IS part of 632 the control word (but actually its ignored by flushControl) 633 */ 634 if(ereg("^[a-zA-Z0-9-]?$", $this->rtf[$i])) { // continue parsing 635 $this->cword .= $this->rtf[$i]; 636 $this->cfirst = false; 637 } else { 638 /* 639 Control word could be a 'control symbol', like \~ or \* etc. 640 */ 641 $specialmatch = false; 642 if($this->cfirst) { 643 if($this->rtf[$i] == '\'') { // expect to get some special chars 644 $this->flushQueue(); 645 $this->flushSpecial($this->rtf[$i+1].$this->rtf[$i+2]); 646 $i+=2; 647 $specialmatch = true; 648 $this->cw = false; 649 $this->cfirst = false; 650 $this->cword = ""; 651 } else 652 if(ereg("^[{}\*]$", $this->rtf[$i])) { 653 $this->flushComment("control symbols not yet handled"); 654 $specialmatch = true; 655 } 656 $this->cfirst = false; 657 } else { 658 if($this->rtf[$i] == ' ') { // space delimtes control words, so just discard it and flush the controlword 659 $this->cw = false; 660 $this->flushControl(); 661 break; 662 } 663 } 664 if(!$specialmatch) { 665 $this->flushControl(); 666 $this->cw = false; 667 $this->cfirst = false; 668 /* 669 The current character is a delimeter, but is NOT 670 part of the control word so we hop one step back 671 in the stream and process it again 672 */ 673 $i--; 674 } 675 } 676 } else { 677 // < and > need translation before putting into queue when XML or HTML is wanted 678 if(($this->wantHTML) || ($this->wantXML)) { 679 switch($this->rtf[$i]) { 680 case "<": 681 $this->queue .= "<"; 682 break; 683 case ">": 684 $this->queue .= ">"; 685 break; 686 default: 687 $this->queue .= $this->rtf[$i]; 688 break; 689 } 690 } else 691 $this->queue .= $this->rtf[$i]; 692 } 693 } 694 $i++; 695 } 696 $this->flushQueue(); 697 $this->flushErrors(); 698 $this->flushBottom(); 699 700 if($this->wantHTML) { 701 $this->makeStyles(); 702 } 703 } 704 } 705 706 707 ?>