A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 static char cvsid[] = "$Header: /Users/phelps/cvs/prj/RosettaMan/rman.c,v 1.154 2003/07/26 19:00:48 phelps Exp $"; 2 3 /* 4 PolyglotMan by Thomas A. Phelps (phelps@ACM.org) 5 6 accept man pages as formatted by (10) 7 Hewlett-Packard HP-UX, AT&T System V, SunOS, Sun Solaris, OSF/1, 8 DEC Ultrix, SGI IRIX, Linux, FreeBSD, SCO 9 10 output as (9) 11 printable ASCII, section headers only, TkMan, [tn]roff, HTML, 12 LaTeX, LaTeX2e, RTF, Perl pod, MIME, DocBook XML 13 14 written March 24, 1993 15 bs2tk generalized into RosettaMan November 4-5, 1993 16 source interpretation added September 24, 1996 17 renamed PolyglotMan due to lawsuit by Rosetta, Inc. August 8, 1997 18 */ 19 20 #include <unistd.h> 21 #include <stdio.h> 22 #include <string.h> 23 #include <ctype.h> 24 #include <stdlib.h> 25 #include <assert.h> 26 #include <sys/types.h> 27 #include <sys/stat.h> 28 #include <fcntl.h> 29 30 31 /*** make #define's into consts? => can't because compilers not smart enough ***/ 32 /* maximum number of tags per line */ 33 #define MAXTAGS 50*100 34 #define MAXBUF 2*5000 35 #define MAXLINES 20000 36 #define MAXTOC 500 37 #define xputchar(c) (fcharout? putchar(c): (c)) 38 #define sputchar(c) (fcharout? plain[sI++]=(char)c: (char)(c)) 39 #define stagadd(tag) tagadd(tag,sI,0) 40 enum { c_dagger='\xa7', c_bullet='\xb7', c_plusminus='\xb1' }; 41 42 43 /*** tag management ***/ 44 45 enum tagtype { NOTAG, TITLE, ITALICS, BOLD, SYMBOL, SMALLCAPS, BOLDITALICS, MONO, MANREF }; /* MANREF last */ 46 struct { enum tagtype type; int first; int last; } tags[MAXTAGS], tagtmp; 47 int tagc=0; 48 struct { char *text; int type; int line; } toc[MAXTOC]; 49 int tocc=0; 50 51 52 /* characters in this list automatically prefixed by a backslash (set in output format function */ 53 char *escchars=""; 54 char *vollist = VOLLIST; 55 const char *manvalid = "._-+:"; /* in addition to alphanumerics, valid characters to find in a man page name */ 56 char *manrefname; 57 char *manrefsect; 58 59 enum command { 60 61 /*BEGINCHARTAGS,*/ 62 CHARTAB='\t', 63 CHARPERIOD='.', CHARLSQUOTE='`', CHARRSQUOTE='\'', CHARGT='>', CHARLT='<', 64 CHARAMP='&', CHARBACKSLASH='\\', CHARDASH='-', CHARHAT='^', CHARVBAR='|', 65 CHARNBSP=0xa0, CHARCENT=0xa2, CHARSECT=0xa7, CHARCOPYR=0xa9, CHARNOT=0xac, 66 CHARDAGGER=0xad, CHARREGTM=0xae, CHARDEG=0xb0, CHARPLUSMINUS=0xb1, 67 CHARACUTE=0xb4, CHARBULLET=0xb7, CHAR14=0xbc, CHAR12=0xbd, CHAR34=0xbe, 68 CHARMUL=0xd7, CHARDIV=0xf7, 69 CHANGEBAR=0x100, CHARLQUOTE, CHARRQUOTE, 70 /*ENDCHARTAGS,*/ 71 72 /*BEGINFONTTAGS,*/ 73 BEGINBOLD, ENDBOLD, BEGINITALICS, ENDITALICS, BEGINBOLDITALICS, ENDBOLDITALICS, 74 BEGINSC, ENDSC, BEGINY, ENDY, BEGINCODE, ENDCODE, BEGINMANREF, ENDMANREF, 75 FONTSIZE, 76 /*ENDFONTTAGS*/ 77 78 /*BEGINLAYOUTTAGS,*/ 79 ITAB, BEGINCENTER, ENDCENTER, HR, 80 /*ENDLAYOUTTAGS,*/ 81 82 /*BEGINSTRUCTTAGS,*/ 83 BEGINDOC, ENDDOC, BEGINCOMMENT, ENDCOMMENT, COMMENTLINE, BEGINBODY, ENDBODY, 84 BEGINHEADER, ENDHEADER, BEGINFOOTER, ENDFOOTER, BEGINLINE, ENDLINE, SHORTLINE, 85 BEGINSECTION, ENDSECTION, BEGINSUBSECTION, ENDSUBSECTION, 86 BEGINSECTHEAD, ENDSECTHEAD, BEGINSUBSECTHEAD, ENDSUBSECTHEAD, 87 BEGINBULPAIR, ENDBULPAIR, BEGINBULLET, ENDBULLET, BEGINBULTXT, ENDBULTXT, 88 BEGINTABLE, ENDTABLE, BEGINTABLELINE, ENDTABLELINE, BEGINTABLEENTRY, ENDTABLEENTRY, 89 BEGININDENT, ENDINDENT, BEGINCODEBLOCK, ENDCODEBLOCK, 90 91 BEGINDIFFA, ENDDIFFA, BEGINDIFFD, ENDDIFFD 92 /*,*//*ENDSTRUCTTAGS,*/ 93 }; 94 95 const char *tcltkOP[] = { "Command-Line Name", "Database Name", "Database Class" }; 96 97 98 /* characters that need special handling in any output format, *more than just a backslash* */ 99 /* characters in this list need a corresponding case statement in each output format */ 100 /*char *trouble="\t.`'><&\\^|-\xa7\xb7\xb1";*/ 101 const unsigned char trouble[]= { CHARTAB, CHARPERIOD, CHARLSQUOTE, CHARRSQUOTE, 102 CHARGT, CHARLT, CHARAMP, CHARBACKSLASH, CHARDASH, CHARHAT, CHARVBAR, CHARCENT, 103 CHARSECT, CHARCOPYR, CHARNOT, CHARDAGGER, CHARREGTM, CHARDEG, CHARPLUSMINUS, 104 CHARACUTE, CHARBULLET, CHAR14, CHAR12, CHAR34, CHARMUL, CHARDIV, 105 0 }; 106 107 108 enum command tagbeginend[][2] = { /* parallel to enum tagtype */ 109 { -1,-1 }, 110 { -1,-1 }, 111 { BEGINITALICS, ENDITALICS }, 112 { BEGINBOLD, ENDBOLD }, 113 { BEGINY, ENDY }, 114 { BEGINSC, ENDSC }, 115 { BEGINBOLDITALICS, ENDBOLDITALICS }, 116 { -1,-1 }, 117 { BEGINMANREF, ENDMANREF } 118 }; 119 120 void (*fn)(enum command) = NULL; 121 enum command prevcmd = BEGINDOC; 122 123 124 /*** globals ***/ 125 126 int fSource=-1; /* -1 => not determined yet */ 127 int finlist=0; 128 int fDiff=0; 129 FILE *difffd; 130 char diffline[MAXBUF]; 131 char diffline2[MAXBUF]; 132 char *message = NULL; 133 int fontdelta=0; 134 int intArg; 135 136 int fPara=0; /* line or paragraph groupings of text */ 137 int fSubsections=0; /* extract subsection titles too? */ 138 int fChangeleft=0; /* move change bars to left? (-1 => delete them) */ 139 int fReflow=0; 140 int fURL=0; /* scan for URLs too? */ 141 /*int fMan=1; /* invoke agressive man page filtering? */ 142 int fQS=0; /* squeeze out spaces (scnt and interword)? */ 143 int fIQS=0; /* squeeze out initial spaces (controlled separately from fQS) */ 144 int fILQS=0; /* squeeze out spaces for usual indent */ 145 int fHeadfoot=0; /* show canonical header and footer at bottom? */ 146 int falluc=0; 147 int itabcnt=0; 148 int fQuiet=0; 149 int fTclTk=0; 150 151 /* patterns observed in section heads that don't conform to first-letter-uppercase-rest-lowercase pattern (stay all uc, or go all lc, or have subsequent uc) */ 152 int lcexceptionslen = -1; /* computed by system */ 153 char *lcexceptions[] = { 154 /* new rule: double/all consonants == UC? */ 155 /* articles, verbs, conjunctions, prepositions, pronouns */ 156 "a", "an", "the", 157 "am", "are", "is", "were", 158 "and", "or", 159 "by", "for", "from", "in", "into", "it", "of", "on", "to", "with", 160 "that", "this", 161 162 /* terms */ 163 "API", "CD", "GUI", "UI", /*I/O=>I/O already*/ "ID", "IDs", "OO", 164 "IOCTLs", "IPC", "RPC", 165 166 /* system names */ 167 "AWK", "cvs", "rcs", "GL", "vi", "PGP", "QuickTime", "DDD", "XPG/3", 168 "NFS", "NIS", "NIS+", "AFS", 169 "UNIX", "SysV", 170 "XFree86", "ICCCM", 171 "MH", "MIME", 172 "TeX", "LaTeX", "PicTeX", 173 "PostScript", "EPS", "EPSF", "EPSI", 174 "HTML", "URL", "WWW", 175 176 /* institution names */ 177 "ANSI", "CERN", "GNU", "ISO", "NCSA", 178 179 /* Sun-specific */ 180 "MT-Level", "SPARC", 181 182 NULL 183 }; 184 185 186 int TabStops=8; 187 int hanging=0; /* location of hanging indent (if ==0, none) */ 188 enum { NAME, SYNOPSIS, DESCRIPTION, SEEALSO, FILES, AUTHOR, RANDOM/*last!*/ }; 189 char *sectheadname[] = { 190 "NAME:NOMBRE", "SYNOPSIS", "DESCRIPTION:INTRODUCTION", "SEE ALSO:RELATED INFORMATION", "FILES", "AUTHOR:AUTHORS", "RANDOM" 191 }; 192 int sectheadid = RANDOM; 193 int oldsectheadid = RANDOM; 194 195 int fCodeline=0; 196 int fNOHY=0; /* re-linebreak so no words are hyphenated; not used by TkMan, but gotta keep for people converting formatted text */ 197 int fNORM=0; /* normalize? initial space => tabs, no changebars, exactly one blank line between sections */ 198 const char TABLEOFCONTENTS[] = "Table of Contents"; 199 const char HEADERANDFOOTER[] = "Header and Footer"; 200 char manName[80] = "man page"; 201 char manSect[10] = "1"; 202 const char PROVENANCE[] = 203 "manual page source format generated by PolyglotMan v" POLYGLOTMANVERSION; 204 const char HOME[] = "available at http://polyglotman.sourceforge.net/"; 205 const char horizontalrule[] = "------------------------------------------------------------"; 206 207 const int LINEBREAK = 70; 208 int linelen = 0; /* length of result in plain[] */ 209 int spcsqz; /* number of spaces squeezed out */ 210 int ccnt = 0; /* # of changebars */ 211 int scnt, scnt2; /* counts of initial spaces in line */ 212 int s_sum, s_cnt; 213 int bs_sum, bs_cnt; 214 int ncnt=0, oncnt=0; /* count of interline newlines */ 215 int CurLine=1; 216 int AbsLine=1-1; /* absolute line number */ 217 int indent=0; /* global indentation */ 218 int lindent=0; /* usual local indent */ 219 int auxindent=0; /* aux indent */ 220 int I; /* index into line/paragraph */ 221 int fcharout=1; /* show text or not */ 222 char lookahead; 223 /*int tabgram[MAXBUF];*/ /* histogram of first character positions */ 224 char buf[MAXBUF]; 225 char plain[MAXBUF]; /* current text line with control characters stripped out */ 226 char hitxt[MAXBUF]; /* highlighted text (available at time of BEGIN<highlight> signal */ 227 228 char header[MAXBUF]; /* complete line */ 229 char header2[MAXBUF]; /* SGIs have two lines of headers and footers */ 230 char header3[MAXBUF]; /* GNU and some others have a third! */ 231 char footer[MAXBUF]; 232 char footer2[MAXBUF]; 233 #define CRUFTS 5 234 char *cruft[CRUFTS] = { header, header2, header3, footer, footer2 }; 235 236 char *File, *in; /* File = pointer to full file contents, in = current file pointer */ 237 char *argv0; 238 int finTable=0; 239 char tableSep='\0'; /*\t';*/ 240 /*int fTable=0; 241 int fotable=0;*/ 242 char *tblcellformat; 243 int tblcellspan; 244 /*int tblspanmax;*/ 245 int listtype=-1; /* current list type bogus to begin with */ 246 enum listtypes { DL, OL, UL }; 247 248 int fIP=0; 249 250 251 252 /*** utility functions ***/ 253 254 255 /* case insensitive versions of strcmp and strncmp */ 256 257 int 258 stricmp(const char *s1, const char *s2) { 259 assert(s1!=NULL && s2!=NULL); 260 /*strincmp(s1, s2, strlen(s1)+1);*/ 261 262 while (tolower(*s1)==tolower(*s2)) { 263 if (*s1=='\0' /*&& *s2=='\0'*/) return 0; 264 s1++; s2++; 265 } 266 267 if (tolower(*s1)<tolower(*s2)) return -1; 268 else return 1; 269 } 270 271 int lcexceptionscmp(const char **a, const char **b) { return stricmp(*a, *b); } 272 273 int 274 strincmp(const char *s1, const char *s2, size_t n) { 275 assert(s1!=NULL && s2!=NULL && n>0); 276 277 while (n>0 && tolower(*s1)==tolower(*s2)) { 278 n--; s1++; s2++; 279 } 280 if (n==0) return 0; 281 else if (tolower(*s1)<tolower(*s2)) return -1; 282 else return 1; 283 } 284 285 /* compare string and a colon-separated list of strings */ 286 int 287 strcoloncmp2(char *candidate, int end, const char *list, int sen) { 288 const char *l = list; 289 char *c,c2; 290 291 assert(candidate!=NULL && list!=NULL); 292 assert(end>=-1 && end<=255); 293 assert(sen==0 || sen==1); 294 295 if (*l==':') l++; /* tolerate a leading colon */ 296 297 /* invariant: c and v point to start of strings to compare */ 298 while (*l) { 299 assert(l==list || l[-1]==':'); 300 for (c=candidate; *c && *l; c++,l++) 301 if ((sen && *c!=*l) || (!sen && tolower(*c)!=tolower(*l))) 302 break; 303 304 /* if candidate matches a valid one as far as valid goes, it's a keeper */ 305 if ((*l=='\0' || *l==':') && (*c==end || end==-1)) { 306 if (*c=='\b') { 307 c2 = c[-1]; 308 while (*c=='\b' && c[1]==c2) c+=2; 309 } 310 /* no volume qualifiers with digits */ 311 if (!isdigit(*c)) return 1; 312 } 313 314 /* bump to start of next valid */ 315 while (*l && *l++!=':') /* nada */; 316 } 317 318 return 0; 319 } 320 321 int 322 strcoloncmp(char *candidate, int end, const char *list) { 323 int sen=1; 324 const char *l = list; 325 326 assert(candidate!=NULL && list!=NULL); 327 assert(end>=-1 && end<=255); 328 329 if (*l=='=') l++; else end=-1; 330 if (*l=='i') { sen=0; l++; } 331 332 return strcoloncmp2(candidate, end, l, sen); 333 } 334 335 /* strdup not universally available */ 336 char * 337 mystrdup(char *p) { 338 char *q; 339 340 if (p==NULL) return NULL; 341 342 q = malloc(strlen(p)+1); /* +1 gives space for \0 that is not reported by strlen */ 343 if (q!=NULL) strcpy(q,p); 344 return q; 345 } 346 347 348 /* given line of text, return "casified" version in place: 349 if word in exceptions list, return exception conversion 350 else uc first letter, lc rest 351 */ 352 void casify(char *p) { 353 char tmpch, *q, **exp; 354 int fuc; 355 356 for (fuc=1; *p; p++) { 357 if (isspace(*p) || strchr("&/",*p)!=NULL) fuc=1; 358 else if (fuc) { 359 /* usually */ 360 if (p[1] && isupper(p[1]) /*&& p[2] && isupper(p[2])*/) fuc=0; 361 /* check for exceptions */ 362 for (q=p; *q && !isspace(*q); q++) /*nada*/; 363 tmpch = *q; *q='\0'; 364 exp = (char **)bsearch(&p, lcexceptions, lcexceptionslen, sizeof(char *), lcexceptionscmp); 365 *q = tmpch; 366 if (exp!=NULL) { 367 for (q=*exp; *q; q++) *p++=*q; 368 fuc = 1; 369 } 370 } else *p=tolower(*p); 371 } 372 } 373 374 375 /* add an attribute tag to a range of characters */ 376 377 void 378 tagadd(int /*enum tagtype--abused in source parsing*/ type, int first, int last) { 379 assert(type!=NOTAG); 380 381 if (tagc<MAXTAGS) { 382 tags[tagc].type = type; 383 tags[tagc].first = first; 384 tags[tagc].last = last; 385 tagc++; 386 } 387 } 388 389 390 /* 391 collect all saves to string table one one place, so that 392 if decide to go with string table instead of multiple malloc, it's easy 393 (probably few enough malloc's that more sophistication is unnecessary) 394 */ 395 396 void 397 tocadd(char *text, enum command type, int line) { 398 char *r; 399 400 assert(text!=NULL && strlen(text)>0); 401 assert(type==BEGINSECTION || type==BEGINSUBSECTION); 402 403 if (tocc<MAXTOC) { 404 r = malloc(strlen(text)+1); if (r==NULL) return; 405 strcpy(r,text); 406 toc[tocc].text = r; 407 toc[tocc].type = type; 408 toc[tocc].line = line; 409 tocc++; 410 } 411 } 412 413 414 415 char *manTitle = MANTITLEPRINTF; 416 char *manRef = MANREFPRINTF; 417 char *href; 418 int fmanRef=1; /* make 'em links or just show 'em? */ 419 420 void 421 manrefextract(char *p) { 422 char *p0; 423 static char *nonhref = "\">'"; 424 425 while (*p==' ') p++; 426 if (strincmp(p,"http",4)==0) { 427 href="%s"; manrefname = p; 428 p+=4; 429 while (*p && !isspace(*p) && !strchr(nonhref,*p)) p++; 430 } else { 431 href = manRef; 432 433 manrefname = p; 434 while (*p && *p!=' ' && *p!='(') p++; *p++='\0'; 435 while (*p==' ' || *p=='(') p++; p0=p; 436 while (*p && *p!=')') p++; 437 manrefsect = p0; 438 } 439 *p='\0'; 440 } 441 442 443 444 445 /* 446 * OUTPUT FORMATS 447 */ 448 449 void 450 formattedonly(void) { 451 fprintf(stderr, "The output formats for Tk and TkMan require nroff-formatted input\n"); 452 exit(1); 453 } 454 455 456 /* 457 * DefaultFormat -- in weak OO inheritance, top of hierarchy for everybody 458 */ 459 void 460 DefaultFormat(enum command cmd) { 461 int i; 462 463 switch (cmd) { 464 case ITAB: 465 for (i=0; i<itabcnt; i++) putchar('\t'); 466 break; 467 default: 468 /* nada */ 469 break; 470 } 471 } 472 473 474 /* 475 * DefaultLine -- in weak OO inheritance, top of hierarchy for line-based formats 476 * for output format to "inherit", have "default: DefaultLine(cmd)" and override case statement "methods" 477 */ 478 479 void 480 DefaultLine(enum command cmd) { 481 switch (cmd) { 482 default: 483 DefaultFormat(cmd); 484 } 485 } 486 487 488 /* 489 * DefaultPara -- top of hierarchy for output formats that are formatted by their viewers 490 */ 491 492 void 493 DefaultPara(enum command cmd) { 494 switch (cmd) { 495 default: 496 DefaultFormat(cmd); 497 } 498 } 499 500 501 502 /* 503 * Tk -- just emit list of text-tags pairs 504 */ 505 506 void 507 Tk(enum command cmd) { 508 static int skip=0; /* skip==1 when line has no text */ 509 int i; 510 511 if (fSource) formattedonly(); 512 513 /* invariant: always ready to insert text */ 514 515 switch (cmd) { 516 case BEGINDOC: 517 I=0; CurLine=1; 518 escchars = "\"[]$"; 519 printf(/*$t insert end */ "\""); 520 break; 521 case ENDDOC: 522 if (fHeadfoot) { 523 /* grr, should have +mark syntax for Tk text widget! -- maybe just just +sect#, +subsect# 524 printf("\\n\\n\" {} \"%s\\n\" {+headfoot h2}\n", HEADERANDFOOTER); 525 */ 526 printf("\\n\\n\" {} \"%s\\n\" h2\n",HEADERANDFOOTER); 527 /*printf("$t mark set headfoot %d.0\n", CurLine);*/ 528 CurLine++; 529 530 for (i=0; i<CRUFTS; i++) { 531 if (*cruft[i]) { 532 printf(/*$t insert end */"{%s} sc \\n\n", cruft[i]); 533 CurLine++; 534 } 535 } 536 } else printf("\"\n"); 537 break; 538 539 case COMMENTLINE: printf("# "); break; 540 541 case BEGINLINE: 542 /*I=0; -- need to do this at end of line so set for filterline() */ 543 /* nothing to do at start of line except catch up on newlines */ 544 for (i=0; i<ncnt; i++) printf("\\n"); 545 CurLine+=ncnt; 546 /*if (fSource) for (i=0; i<indent; i++) putchar('\t');*/ 547 break; 548 case ENDLINE: 549 /*if (!fSource) {*/ 550 if (!skip) /*if (ncnt)*/ printf("\\n"); /*else xputchar(' ');*/ 551 skip=0; 552 CurLine++; I=0; 553 /* 554 } else { 555 putchar(' '); I++; 556 } 557 */ 558 break; 559 560 case ENDSECTHEAD: 561 printf("\\n\" h2 \""); 562 tagc=0; 563 skip=1; 564 break; 565 case ENDSUBSECTHEAD: 566 printf("\\n\" h3 \""); /* add h3? */ 567 tagc=0; 568 skip=1; 569 break; 570 case HR: /*printf("\\n%s\\n", horizontalrule); CurLine+=2; I=0;*/ break; 571 case BEGINTABLEENTRY: 572 /*if (fSource) putchar('\t');*/ 573 break; 574 case BEGINTABLELINE: 575 case ENDTABLEENTRY: 576 break; 577 case ENDTABLELINE: 578 printf("\" tt \""); 579 /*tagadd(MONO, 0, I);*/ 580 break; 581 582 case CHANGEBAR: putchar('|'); I++; break; 583 case CHARLQUOTE: 584 case CHARRQUOTE: 585 putchar('\\'); putchar('"'); I++; 586 break; 587 case CHARLSQUOTE: 588 case CHARRSQUOTE: 589 case CHARPERIOD: 590 case CHARTAB: 591 case CHARDASH: 592 case CHARLT: 593 case CHARGT: 594 case CHARHAT: 595 case CHARVBAR: 596 case CHARAMP: 597 case CHARPLUSMINUS: 598 case CHARNBSP: 599 case CHARCENT: 600 case CHARSECT: 601 case CHARCOPYR: 602 case CHARNOT: 603 case CHARREGTM: 604 case CHARDEG: 605 case CHARACUTE: 606 case CHAR14: 607 case CHAR12: 608 case CHAR34: 609 case CHARMUL: 610 case CHARDIV: 611 putchar(cmd); I++; break; 612 case CHARDAGGER: 613 putchar('+'); I++; break; 614 case CHARBACKSLASH: printf("\\\\"); I++; break; 615 case CHARBULLET: printf("\" {} %c symbol \"",c_bullet); I++; break; 616 617 618 case BEGINSECTHEAD: 619 case BEGINSUBSECTHEAD: 620 /*if (fSource && sectheadid!=NAME) { printf("\\n\\n"); CurLine+=2; I=0; }*/ 621 tagc=0; /* section and subsection formatting controlled descriptively */ 622 /* no break;*/ 623 624 case BEGINBOLD: 625 case BEGINITALICS: 626 case BEGINBOLDITALICS: 627 case BEGINCODE: 628 case BEGINY: 629 case BEGINSC: 630 case BEGINMANREF: 631 /* end text, begin attributed text */ 632 printf("\" {} \""); 633 break; 634 635 /* rely on the fact that no more than one tag per range of text */ 636 case ENDBOLD: printf("\" b \""); break; 637 case ENDITALICS: printf("\" i \""); break; 638 case ENDBOLDITALICS: printf("\" bi \""); break; 639 case ENDCODE: printf("\" tt \""); break; 640 case ENDY: printf("\" symbol \""); break; 641 case ENDSC: printf("\" sc \""); break; 642 case ENDMANREF: printf("\" manref \""); break; 643 /* presentation attributes dealt with at end of line */ 644 645 case BEGINBODY: 646 /*if (fSource) { printf("\\n\\n"); CurLine+=2; I=0; }*/ 647 break; 648 case SHORTLINE: 649 /*if (fSource) { printf("\\n"); CurLine++; I=0; }*/ 650 break; 651 case ENDBODY: 652 case BEGINBULPAIR: case ENDBULPAIR: 653 /*if (fSource) { printf("\\n"); CurLine++; I=0; }*/ 654 break; 655 case BEGINBULTXT: 656 /*if (fSource) putchar('\t');*/ 657 break; 658 case BEGINBULLET: case ENDBULLET: 659 case ENDBULTXT: 660 case BEGINSECTION: case ENDSECTION: 661 case BEGINSUBSECTION: case ENDSUBSECTION: 662 case BEGINHEADER: case ENDHEADER: 663 case BEGINFOOTER: case ENDFOOTER: 664 case BEGINTABLE: case ENDTABLE: 665 case FONTSIZE: 666 case BEGININDENT: case ENDINDENT: 667 /* no action */ 668 break; 669 default: 670 DefaultLine(cmd); 671 } 672 } 673 674 675 676 677 /* 678 * TkMan -- Tk format wrapped with commands 679 */ 680 681 int linetabcnt[MAXLINES]; /* don't want to bother with realloc */ 682 int clocnt=0, clo[MAXLINES]; 683 int paracnt=0, para[MAXLINES]; 684 int rebuscnt=0, rebus[MAXLINES]; 685 int rebuspatcnt=0, rebuspatlen[25]; 686 char *rebuspat[25]; 687 688 void 689 TkMan(enum command cmd) { 690 static int lastscnt=-1; 691 static int lastlinelen=-1; 692 static int lastsect=0; 693 /*static int coalese=0;*/ 694 static int finflow=0; 695 int i; 696 char c,*p; 697 698 /* invariant: always ready to insert text */ 699 700 switch (cmd) { 701 case BEGINDOC: 702 printf("$t insert end "); /* opening quote supplied in Tk() below */ 703 Tk(cmd); 704 break; 705 case ENDDOC: 706 Tk(ENDLINE); 707 708 if (fHeadfoot) { 709 /* grr, should have +mark syntax for Tk text widget! 710 printf("\\n\\n\" {} \"%s\\n\" {+headfoot h2}\n", HEADERANDFOOTER); 711 */ 712 printf("\\n\\n\" {} \"%s\\n\" h2\n", HEADERANDFOOTER); 713 /* printf("$t mark set headfoot end-2l\n");*/ 714 CurLine++; 715 716 for (i=0; i<CRUFTS; i++) { 717 if (*cruft[i]) { 718 printf("$t insert end {%s} sc \\n\n", cruft[i]); 719 CurLine++; 720 } 721 } 722 } else printf("\"\n"); 723 724 /* 725 printf("$t insert 1.0 {"); 726 for (i=0; i<MAXBUF; i++) if (tabgram[i]) printf("%d=%d, ", i, tabgram[i]); 727 printf("\\n\\n}\n"); 728 */ 729 730 printf("set manx(tabcnts) {"); for (i=1; i<CurLine; i++) printf("%d ", linetabcnt[i]); printf("}\n"); 731 printf("set manx(clo) {"); for (i=0; i<clocnt; i++) printf("%d ", clo[i]); printf("}\n"); 732 printf("set manx(para) {"); for (i=0; i<paracnt; i++) printf("%d ", para[i]); printf("}\n"); 733 printf("set manx(reb) {"); for (i=0; i<rebuscnt; i++) printf("%d ", rebus[i]); printf("}\n"); 734 735 break; 736 737 case BEGINCOMMENT: fcharout=0; break; 738 case ENDCOMMENT: fcharout=1; break; 739 case COMMENTLINE: break; 740 741 case ENDSECTHEAD: 742 case ENDSUBSECTHEAD: 743 lastsect=1; 744 Tk(cmd); 745 break; 746 747 case BEGINLINE: 748 Tk(cmd); 749 linetabcnt[CurLine] = itabcnt; 750 /* old pattern for command line options "^\\|*\[ \t\]+-\[^-\].*\[^ \t\]" */ 751 c = plain[0]; 752 if (linelen>=2 && ((c=='-' || c=='%' || c=='\\' || c=='$' /**/ /* not much talk of money in man pages so reasonable */) && (isalnum(plain[1]) /*<= plain[1]!='-'*//*no dash*/ || ncnt/*GNU long option*/) && plain[1]!=' ') ) clo[clocnt++] = CurLine; 753 /* 754 would like to require second letter to be a capital letter to cut down on number of matches, 755 but command names usually start with lowercase letter 756 maybe use a uppercase requirement as secondary strategy, but probably not 757 */ 758 if ((ncnt || lastsect) && linelen>0 && scnt>0 && scnt<=7/*used to be <=5 until groff spontaneously started putting in 7*/) para[paracnt++] = CurLine; 759 lastsect=0; 760 761 762 /* rebus too, instead of search through whole Tk widget */ 763 if (rebuspatcnt && scnt>=5 /* not sect or subsect heads */) { 764 for (p=plain; *p && *p!=' '; p++) /*empty*/; /* never first word */ 765 while (*p) { 766 for (i=0; i<rebuspatcnt; i++) { 767 if (tolower(*p) == tolower(*rebuspat[i]) && strincmp(p, rebuspat[i], rebuspatlen[i])==0) { 768 /* don't interfere with man page refs */ 769 for (; *p && !isspace(*p); p++) if (*p=='(') continue; 770 rebus[rebuscnt++] = CurLine; 771 p=""; /* break for outer */ 772 break; /* just locating any line with any rebus, not exact positions */ 773 } 774 } 775 /* just check start of words, though doesn't have to be full word (if did, could use strlen rather than strnlen) */ 776 while (*p && *p!=' ') p++; 777 while (*p && *p==' ') p++; 778 } 779 } 780 781 782 if (fReflow && !ncnt && (finflow || lastlinelen>50) && (abs(scnt-lastscnt)<=1 || abs(scnt-hanging)<=1)) { 783 finflow=1; 784 putchar(' '); 785 } else { 786 Tk(ENDLINE); 787 /*if ((CurLine&0x3f)==0x3f) printf("\"\nupdate idletasks\n$t insert end \""); blows up some Tk text buffer, apparently, on long lines*/ 788 if ((CurLine&0x1f)==0x1f) printf("\"\nupdate idletasks\n$t insert end \""); 789 finflow=0; 790 791 /*if (fCodeline) printf("CODE");*/ 792 } 793 lastlinelen=linelen; lastscnt=scnt; 794 break; 795 796 case ENDLINE: 797 /* don't call Tk(ENDLINE) */ 798 break; 799 800 default: /* if not caught above, it's the same as Tk */ 801 Tk(cmd); 802 } 803 } 804 805 806 807 808 /* 809 * ASCII 810 */ 811 812 void 813 ASCII(enum command cmd) { 814 int i; 815 816 switch (cmd) { 817 case ENDDOC: 818 if (fHeadfoot) { 819 printf("\n%s\n", HEADERANDFOOTER); 820 for (i=0; i<CRUFTS; i++) if (*cruft[i]) printf("%s\n", cruft[i]); 821 } 822 break; 823 case CHARRQUOTE: 824 case CHARLQUOTE: 825 putchar('"'); 826 break; 827 case CHARLSQUOTE: 828 putchar('`'); 829 break; 830 case CHARRSQUOTE: 831 case CHARACUTE: 832 putchar('\''); 833 break; 834 case CHARPERIOD: 835 case CHARTAB: 836 case CHARDASH: 837 case CHARLT: 838 case CHARAMP: 839 case CHARBACKSLASH: 840 case CHARGT: 841 case CHARHAT: 842 case CHARVBAR: 843 case CHARNBSP: 844 putchar(cmd); break; 845 case CHARDAGGER: putchar('+'); break; 846 case CHARBULLET: putchar('*'); break; 847 case CHARPLUSMINUS: printf("+-"); break; 848 case CHANGEBAR: putchar('|'); break; 849 case CHARCENT: putchar('c'); break; 850 case CHARSECT: putchar('S'); break; 851 case CHARCOPYR: printf("(C)"); break; 852 case CHARNOT: putchar('~'); break; 853 case CHARREGTM: printf("(R)"); break; 854 case CHARDEG: putchar('o'); break; 855 case CHAR14: printf("1/4"); break; 856 case CHAR12: printf("1/2"); break; 857 case CHAR34: printf("3/4"); break; 858 case CHARMUL: putchar('X'); break; 859 case CHARDIV: putchar('/'); break; 860 case HR: printf("\n%s\n", horizontalrule); break; 861 862 case BEGINLINE: 863 for (i=0; i<ncnt; i++) putchar('\n'); 864 break; 865 case BEGINBODY: 866 case SHORTLINE: 867 if (!fSource) break; 868 case ENDLINE: 869 putchar('\n'); 870 CurLine++; 871 break; 872 873 case BEGINDOC: 874 case ENDBODY: 875 case BEGINHEADER: case ENDHEADER: 876 case BEGINFOOTER: case ENDFOOTER: 877 case BEGINSECTION: case ENDSECTION: 878 case BEGINSECTHEAD: case ENDSECTHEAD: 879 case BEGINSUBSECTHEAD: case ENDSUBSECTHEAD: 880 case BEGINBULPAIR: case ENDBULPAIR: 881 case BEGINBULLET: case ENDBULLET: 882 case BEGINBULTXT: case ENDBULTXT: 883 case BEGINSUBSECTION: case ENDSUBSECTION: 884 885 case BEGINTABLE: case ENDTABLE: 886 case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: 887 case BEGININDENT: case ENDINDENT: 888 case FONTSIZE: 889 case BEGINBOLD: case ENDBOLD: 890 case BEGINCODE: case ENDCODE: 891 case BEGINITALICS: case ENDITALICS: 892 case BEGINMANREF: case ENDMANREF: 893 case BEGINBOLDITALICS: case ENDBOLDITALICS: 894 case BEGINY: case ENDY: 895 case BEGINSC: case ENDSC: 896 /* nothing */ 897 break; 898 default: 899 DefaultLine(cmd); 900 } 901 } 902 903 904 905 /* 906 * Perl 5 pod ("plain old documentation") 907 */ 908 909 void 910 pod(enum command cmd) { 911 static int curindent=0; 912 int i; 913 914 if (hanging==-1) { 915 if (curindent) hanging=curindent; else hanging=5; 916 } 917 918 919 if (cmd==BEGINBULPAIR) { 920 /* want to have multiply indented text */ 921 if (curindent && hanging!=curindent) printf("\n=back\n\n"); 922 if (hanging!=curindent) printf("\n=over %d\n\n",hanging); 923 curindent=hanging; 924 } else if (cmd==ENDBULPAIR) { 925 /* nothing--wait until next command */ 926 } else if (cmd==BEGINLINE && !scnt) { 927 if (curindent) printf("\n=back\n\n"); 928 curindent=0; 929 } else if (cmd==BEGINBODY) { 930 if (curindent) { 931 printf("\n=back\n\n"); 932 curindent=0; 933 auxindent=0; 934 } 935 } 936 /* 937 case BEGINBULPAIR: 938 printf("=over %d\n\n", hanging); 939 break; 940 case ENDBULPAIR: 941 printf("\n=back\n\n"); 942 break; 943 */ 944 switch (cmd) { 945 case BEGINDOC: I=0; break; 946 947 case BEGINCOMMENT: fcharout=0; break; 948 case ENDCOMMENT: fcharout=1; break; 949 case COMMENTLINE: break; 950 951 case CHARRQUOTE: 952 case CHARLQUOTE: 953 putchar('"'); 954 break; 955 case CHARLSQUOTE: 956 putchar('`'); 957 break; 958 case CHARRSQUOTE: 959 case CHARACUTE: 960 putchar('\''); 961 break; 962 case CHARPERIOD: 963 case CHARTAB: 964 case CHARDASH: 965 case CHARLT: 966 case CHARAMP: 967 case CHARBACKSLASH: 968 case CHARGT: 969 case CHARHAT: 970 case CHARVBAR: 971 case CHARNBSP: 972 putchar(cmd); break; 973 case CHARDAGGER: putchar('+'); break; 974 case CHARPLUSMINUS: printf("+-"); break; 975 case CHANGEBAR: putchar('|'); break; 976 case CHARCENT: putchar('c'); break; 977 case CHARSECT: putchar('S'); break; 978 case CHARCOPYR: printf("(C)"); break; 979 case CHARNOT: putchar('~'); break; 980 case CHARREGTM: printf("(R)"); break; 981 case CHARDEG: putchar('o'); break; 982 case CHAR14: printf("1/4"); break; 983 case CHAR12: printf("1/2"); break; 984 case CHAR34: printf("3/4"); break; 985 case CHARMUL: putchar('X'); break; 986 case CHARDIV: putchar('/'); break; 987 case HR: printf("\n%s\n", horizontalrule); break; 988 case CHARBULLET: putchar('*'); break; 989 990 case BEGINLINE: 991 for (i=0; i<ncnt; i++) putchar('\n'); 992 CurLine+=ncnt; 993 break; 994 case ENDLINE: 995 putchar('\n'); 996 CurLine++; 997 I=0; 998 break; 999 1000 case BEGINSECTHEAD: printf("=head1 "); break; 1001 case BEGINSUBSECTHEAD: printf("=head2 "); break; 1002 1003 case ENDSECTHEAD: 1004 case ENDSUBSECTHEAD: 1005 printf("\n"); 1006 break; 1007 1008 case BEGINCODE: 1009 case BEGINBOLD: printf("B<"); break; 1010 case BEGINITALICS: printf("I<"); break; 1011 case BEGINMANREF: printf("L<"); break; 1012 1013 case ENDBOLD: 1014 case ENDCODE: 1015 case ENDITALICS: 1016 case ENDMANREF: 1017 printf(">"); 1018 break; 1019 1020 case BEGINBULLET: 1021 printf("\n=item "); 1022 break; 1023 case ENDBULLET: 1024 printf("\n\n"); 1025 fcharout=0; 1026 break; 1027 case BEGINBULTXT: 1028 fcharout=1; 1029 auxindent=hanging; 1030 break; 1031 case ENDBULTXT: 1032 auxindent=0; 1033 break; 1034 1035 1036 case ENDDOC: 1037 case BEGINBODY: case ENDBODY: 1038 case BEGINHEADER: case ENDHEADER: 1039 case BEGINFOOTER: case ENDFOOTER: 1040 case BEGINSECTION: case ENDSECTION: 1041 case BEGINSUBSECTION: case ENDSUBSECTION: 1042 case BEGINBULPAIR: case ENDBULPAIR: 1043 1044 case SHORTLINE: 1045 case BEGINTABLE: case ENDTABLE: 1046 case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: 1047 case BEGININDENT: case ENDINDENT: 1048 case FONTSIZE: 1049 case BEGINBOLDITALICS: case ENDBOLDITALICS: 1050 case BEGINY: case ENDY: 1051 case BEGINSC: case ENDSC: 1052 /* nothing */ 1053 break; 1054 default: 1055 DefaultLine(cmd); 1056 } 1057 } 1058 1059 1060 1061 void 1062 Sections(enum command cmd) { 1063 1064 switch (cmd) { 1065 case ENDSECTHEAD: 1066 case ENDSUBSECTHEAD: 1067 putchar('\n'); 1068 case BEGINDOC: 1069 fcharout=0; 1070 break; 1071 1072 case BEGINCOMMENT: fcharout=0; break; 1073 case ENDCOMMENT: fcharout=1; break; 1074 case COMMENTLINE: break; 1075 1076 case BEGINSUBSECTHEAD: 1077 printf(" "); 1078 /* no break */ 1079 case BEGINSECTHEAD: 1080 fcharout=1; 1081 break; 1082 case CHARRQUOTE: 1083 case CHARLQUOTE: 1084 xputchar('"'); 1085 break; 1086 case CHARLSQUOTE: 1087 xputchar('`'); 1088 break; 1089 case CHARRSQUOTE: 1090 case CHARACUTE: 1091 xputchar('\''); 1092 break; 1093 case BEGINTABLE: case ENDTABLE: 1094 case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: 1095 case BEGININDENT: case ENDINDENT: 1096 case FONTSIZE: 1097 break; 1098 case CHARPERIOD: 1099 case CHARTAB: 1100 case CHARDASH: 1101 case CHARBACKSLASH: 1102 case CHARLT: 1103 case CHARGT: 1104 case CHARHAT: 1105 case CHARVBAR: 1106 case CHARAMP: 1107 case CHARNBSP: 1108 xputchar(cmd); break; 1109 case CHARDAGGER: xputchar('+'); break; 1110 case CHARBULLET: xputchar('*'); break; 1111 case CHARPLUSMINUS: xputchar('+'); xputchar('-'); break; 1112 case CHARCENT: xputchar('c'); break; 1113 case CHARSECT: xputchar('S'); break; 1114 case CHARCOPYR: xputchar('('); xputchar('C'); xputchar(')'); break; 1115 case CHARNOT: xputchar('~'); break; 1116 case CHARREGTM: xputchar('('); xputchar('R'); xputchar(')'); break; 1117 case CHARDEG: xputchar('o'); break; 1118 case CHAR14: xputchar('1'); xputchar('/'); xputchar('4'); break; 1119 case CHAR12: xputchar('1'); xputchar('/'); xputchar('2'); break; 1120 case CHAR34: xputchar('3'); xputchar('/'); xputchar('4'); break; 1121 case CHARMUL: xputchar('X'); break; 1122 case CHARDIV: xputchar('/'); break; 1123 case ITAB: DefaultLine(cmd); break; 1124 1125 1126 default: 1127 /* nothing */ 1128 break; 1129 } 1130 } 1131 1132 1133 1134 void 1135 Roff(enum command cmd) { 1136 switch (cmd) { 1137 case BEGINDOC: 1138 I=1; 1139 printf(".TH %s %s \"generated by PolyglotMan\" UCB\n", manName, manSect); 1140 printf(".\\\" %s,\n", PROVENANCE); 1141 printf(".\\\" %s\n", HOME); 1142 CurLine=1; 1143 break; 1144 case BEGINBODY: printf(".LP\n"); break; 1145 1146 case BEGINCOMMENT: 1147 case ENDCOMMENT: 1148 break; 1149 case COMMENTLINE: printf("'\\\" "); break; 1150 1151 case BEGINSECTHEAD: printf(".SH "); break; 1152 case BEGINSUBSECTHEAD:printf(".SS "); break; 1153 case BEGINBULPAIR: printf(".IP "); break; 1154 case SHORTLINE: printf("\n.br"); break; 1155 case BEGINBOLD: printf("\\fB"); break; /* \n.B -- grr! */ 1156 case ENDCODE: 1157 case ENDBOLD: printf("\\fR"); break; /* putchar('\n'); */ 1158 case BEGINITALICS: printf("\\fI"); break; 1159 case ENDITALICS: printf("\\fR"); break; 1160 case BEGINCODE: 1161 case BEGINBOLDITALICS:printf("\\f4"); break; 1162 case ENDBOLDITALICS: printf("\\fR"); break; 1163 1164 case CHARLQUOTE: printf("\\*(rq"); break; 1165 case CHARRQUOTE: printf("\\*(lq"); break; 1166 case CHARNBSP: printf("\\|"); break; 1167 case CHARLSQUOTE: putchar('`'); break; 1168 case CHARRSQUOTE: putchar('\''); break; 1169 case CHARPERIOD: if (I==1) printf("\\&"); putchar('.'); I++; break; 1170 case CHARDASH: printf("\\-"); break; 1171 case CHARTAB: 1172 case CHARLT: 1173 case CHARGT: 1174 case CHARHAT: 1175 case CHARVBAR: 1176 case CHARAMP: 1177 putchar(cmd); break; 1178 case CHARBULLET: printf("\\(bu"); break; 1179 case CHARDAGGER: printf("\\(dg"); break; 1180 case CHARPLUSMINUS: printf("\\(+-"); break; 1181 case CHANGEBAR: putchar('|'); break; 1182 case CHARCENT: printf("\\(ct"); break; 1183 case CHARSECT: printf("\\(sc"); break; 1184 case CHARCOPYR: printf("\\(co"); break; 1185 case CHARNOT: printf("\\(no"); break; 1186 case CHARREGTM: printf("\\(rg"); break; 1187 case CHARDEG: printf("\\(de"); break; 1188 case CHARACUTE: printf("\\(aa"); break; 1189 case CHAR14: printf("\\(14"); break; 1190 case CHAR12: printf("\\(12"); break; 1191 case CHAR34: printf("\\(34"); break; 1192 case CHARMUL: printf("\\(mu"); break; 1193 case CHARDIV: printf("\\(di"); break; 1194 case HR: /*printf("\n%s\n", horizontalrule);*/ break; 1195 case CHARBACKSLASH: printf("\\\\"); break; /* correct? */ 1196 1197 case BEGINLINE: 1198 /*for (i=0; i<ncnt; i++) putchar('\n');*/ 1199 break; 1200 1201 case BEGINBULLET: putchar('"'); break; 1202 case ENDBULLET: printf("\"\n"); break; 1203 1204 case ENDLINE: 1205 CurLine++; 1206 I=1; 1207 /* no break */ 1208 case ENDSUBSECTHEAD: 1209 case ENDSECTHEAD: 1210 case ENDDOC: 1211 putchar('\n'); 1212 break; 1213 1214 case BEGINCODEBLOCK: printf(".nf\n"); 1215 case ENDCODEBLOCK: printf(".fi\n"); 1216 1217 case ENDBODY: 1218 case ENDBULPAIR: 1219 case BEGINBULTXT: case ENDBULTXT: 1220 case BEGINSECTION: case ENDSECTION: 1221 case BEGINSUBSECTION: case ENDSUBSECTION: 1222 case BEGINY: case ENDY: 1223 case BEGINSC: case ENDSC: 1224 case BEGINTABLE: case ENDTABLE: 1225 case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: 1226 case BEGININDENT: case ENDINDENT: 1227 case FONTSIZE: 1228 case BEGINHEADER: case ENDHEADER: 1229 case BEGINFOOTER: case ENDFOOTER: 1230 case BEGINMANREF: case ENDMANREF: 1231 /* nothing */ 1232 break; 1233 default: 1234 DefaultPara(cmd); 1235 } 1236 } 1237 1238 1239 1240 /* 1241 * HTML 1242 */ 1243 1244 void 1245 HTML(enum command cmd) { 1246 static int pre=0; 1247 int i; 1248 int lasttoc; 1249 1250 /* always respond to these signals */ 1251 switch (cmd) { 1252 case CHARNBSP: printf(" "); I++; break; 1253 case CHARTAB: printf("<tt> </tt> <tt> </tt> "); break; 1254 case CHARLQUOTE: printf("“"); break; 1255 case CHARRQUOTE: printf("”"); break; 1256 case CHARLSQUOTE: printf("‘"); break; 1257 case CHARRSQUOTE: printf("’"); break; 1258 case CHARPERIOD: 1259 case CHARDASH: 1260 case CHARBACKSLASH: 1261 case CHARVBAR: /*printf("¦"); -- broken bar no good */ 1262 case CHARHAT: 1263 putchar(cmd); 1264 break; 1265 case CHARDAGGER: printf("†"); break; 1266 case CHARBULLET: if (I>0 || !finlist) printf("·"/*"·"*//*§--middot hardly visible*/); 1267 break; 1268 case CHARPLUSMINUS: printf("±"); break; 1269 case CHARGT: printf(">"); break; 1270 case CHARLT: printf("<"); break; 1271 case CHARAMP: printf("&"); break; 1272 case CHARCENT: printf("¢"); break; 1273 case CHARSECT: printf("§"); break; 1274 case CHARCOPYR: printf("©"); break; 1275 case CHARNOT: printf("¬"); break; 1276 case CHARREGTM: printf("®"); break; 1277 case CHARDEG: printf("°"); break; 1278 case CHARACUTE: printf("´"); break; 1279 case CHAR14: printf("¼"); break; 1280 case CHAR12: printf("½"); break; 1281 case CHAR34: printf("¾"); break; 1282 case CHARMUL: printf("×"); break; 1283 case CHARDIV: printf("÷"); break; 1284 default: 1285 break; 1286 } 1287 1288 /* while in pre mode... */ 1289 if (pre) { 1290 switch (cmd) { 1291 case ENDLINE: I=0; CurLine++; if (!fPara && scnt) printf("<br>"); printf("\n"); break; 1292 case ENDTABLE: 1293 if (fSource) { 1294 printf("</table>\n"); 1295 } else { 1296 printf("</pre><br>\n"); pre=0; fQS=fIQS=fPara=1; 1297 } 1298 break; 1299 case ENDCODEBLOCK: printf("</pre>"); pre=0; break; 1300 case SHORTLINE: 1301 case ENDBODY: 1302 printf("\n"); 1303 break; 1304 default: 1305 /* nothing */ 1306 break; 1307 } 1308 return; 1309 } 1310 1311 /* usual operation */ 1312 switch (cmd) { 1313 case BEGINDOC: 1314 /* escchars = ... => HTML doesn't backslash-quote metacharacters */ 1315 printf("<!-- %s, -->\n", PROVENANCE); 1316 printf("<!-- %s -->\n\n", HOME); 1317 printf("<html>\n<head>\n"); 1318 /* printf("<isindex>\n");*/ 1319 /* better title possible? */ 1320 printf("<title>"); printf(manTitle, manName, manSect); printf("</title>\n"); 1321 printf("</head>\n<body bgcolor='white'>\n"); 1322 printf("<a href='#toc'>%s</a><p>\n", TABLEOFCONTENTS); 1323 I=0; 1324 break; 1325 case ENDDOC: 1326 /* header and footer wanted? */ 1327 printf("<p>\n"); 1328 if (fHeadfoot) { 1329 printf("<hr><h2>%s</h2>\n", HEADERANDFOOTER); 1330 for (i=0; i<CRUFTS; i++) if (*cruft[i]) printf("%s<br>\n", cruft[i]); 1331 } 1332 1333 if (!tocc) { 1334 /*printf("\n<h1>ERROR: Empty man page</h1>\n");*/ 1335 } else { 1336 printf("\n<hr><p>\n"); 1337 printf("<a name='toc'><b>%s</b></a><p>\n", TABLEOFCONTENTS); 1338 printf("<ul>\n"); 1339 for (i=0, lasttoc=BEGINSECTION; i<tocc; lasttoc=toc[i].type, i++) { 1340 if (lasttoc!=toc[i].type) { 1341 if (toc[i].type==BEGINSUBSECTION) printf("<ul>\n"); 1342 else printf("</ul>\n"); 1343 } 1344 printf("<li><a name='toc%d' href='#sect%d'>%s</a></li>\n", i, i, toc[i].text); 1345 } 1346 if (lasttoc==BEGINSUBSECTION) printf("</ul>"); 1347 printf("</ul>\n"); 1348 } 1349 printf("</body>\n</html>\n"); 1350 break; 1351 case BEGINBODY: 1352 printf("<p>\n"); 1353 break; 1354 case ENDBODY: break; 1355 1356 case BEGINCOMMENT: printf("\n<!--\n"); break; 1357 case ENDCOMMENT: printf("\n-->\n"); break; 1358 case COMMENTLINE: break; 1359 1360 case BEGINSECTHEAD: 1361 printf("\n<h2><a name='sect%d' href='#toc%d'>", tocc, tocc); 1362 break; 1363 case ENDSECTHEAD: 1364 printf("</a></h2>\n"); 1365 /* useful extraction from FILES, ENVIRONMENT? */ 1366 break; 1367 case BEGINSUBSECTHEAD: 1368 printf("\n<h3><a name='sect%d' href='#toc%d'>", tocc, tocc); 1369 break; 1370 case ENDSUBSECTHEAD: 1371 printf("</a></h3>\n"); 1372 break; 1373 case BEGINSECTION: break; 1374 case ENDSECTION: 1375 if (sectheadid==NAME && message!=NULL) printf(message); 1376 break; 1377 case BEGINSUBSECTION: break; 1378 case ENDSUBSECTION: break; 1379 1380 case BEGINBULPAIR: 1381 if (listtype==OL) printf("\n<ol>\n"); 1382 else if (listtype==UL) printf("\n<ul>\n"); 1383 else printf("\n<dl>\n"); 1384 break; 1385 case ENDBULPAIR: 1386 if (listtype==OL) printf("\n</ol>\n"); 1387 else if (listtype==UL) printf("\n</ul>\n"); 1388 else printf("</dl>\n"); 1389 break; 1390 case BEGINBULLET: 1391 if (listtype==OL || listtype==UL) fcharout=0; 1392 else printf("\n<dt>"); 1393 break; 1394 case ENDBULLET: 1395 if (listtype==OL || listtype==UL) fcharout=1; 1396 else printf("</dt>"); 1397 break; 1398 case BEGINBULTXT: 1399 if (listtype==OL || listtype==UL) printf("<li>"); 1400 else printf("\n<dd>"); 1401 break; 1402 case ENDBULTXT: 1403 if (listtype==OL || listtype==UL) printf("</li>"); 1404 else printf("</dd>\n"); 1405 break; 1406 1407 case BEGINLINE: 1408 /* if (ncnt) printf("<p>\n"); -- if haven't already generated structural tag */ 1409 if (ncnt) printf("\n<p>"); 1410 1411 /* trailing spaces already trimmed off, so look for eol now */ 1412 if (fCodeline) { 1413 printf("<code>"); 1414 for (i=0; i<scnt-indent; i++) printf(" "/* */); /* ? */ 1415 tagc=0; 1416 1417 /* already have .tag=BOLDITALICS, .first=0 */ 1418 /* would be more elegant, but can't print initial spaces before first tag 1419 tags[0].last = linelen; 1420 tagc=1; 1421 fIQS=0; 1422 */ 1423 } 1424 1425 break; 1426 1427 case ENDLINE: 1428 /*if (fCodeline) { fIQS=1; fCodeline=0; }*/ 1429 if (fCodeline) { printf("</code><br>"); fCodeline=0; } 1430 I=0; CurLine++; if (!fPara && scnt) printf("<br>"); printf("\n"); 1431 break; 1432 1433 case SHORTLINE: 1434 if (fCodeline) { printf("</code>"); fCodeline=0; } 1435 if (!fIP) printf("<br>\n"); 1436 break; 1437 1438 1439 case BEGINTABLE: 1440 if (fSource) { 1441 /*printf("<center><table border>\n");*/ 1442 printf("<table border='0'>\n"); 1443 } else { 1444 printf("<br><pre>\n"); pre=1; fQS=fIQS=fPara=0; 1445 } 1446 break; 1447 case ENDTABLE: 1448 if (fSource) { 1449 printf("</table>\n"); 1450 } else { 1451 printf("</pre><br>\n"); pre=0; fQS=fIQS=fPara=1; 1452 } 1453 break; 1454 case BEGINTABLELINE: printf("<tr>"); break; 1455 case ENDTABLELINE: printf("</tr>\n"); break; 1456 case BEGINTABLEENTRY: 1457 printf("<td align='"); 1458 switch (tblcellformat[0]) { 1459 case 'c': printf("center"); break; 1460 case 'n': /*printf("decimal"); break; -- fall through to right for now */ 1461 case 'r': printf("right"); break; 1462 case 'l': 1463 default: 1464 printf("left"); 1465 } 1466 if (tblcellspan>1) printf(" colspan=%d", tblcellspan); 1467 printf("'>"); 1468 break; 1469 case ENDTABLEENTRY: 1470 printf("</td>"); 1471 break; 1472 1473 /* something better with CSS */ 1474 case BEGININDENT: printf("<blockquote>"); break; 1475 case ENDINDENT: printf("</blockquote>\n"); break; 1476 1477 case FONTSIZE: 1478 /* HTML font step sizes are bigger than troff's */ 1479 if ((fontdelta+=intArg)!=0) printf("<font size='%c1'>", (intArg>0)?'+':'-'); else printf("</font>\n"); 1480 break; 1481 1482 case BEGINBOLD: printf("<b>"); break; 1483 case ENDBOLD: printf("</b>"); break; 1484 case BEGINITALICS: printf("<i>"); break; 1485 case ENDITALICS: printf("</i>"); break; 1486 case BEGINBOLDITALICS: 1487 case BEGINCODE: printf("<code>"); break; 1488 case ENDBOLDITALICS: 1489 case ENDCODE: printf("</code>"); break; 1490 case BEGINCODEBLOCK: printf("<pre>"); pre=1; break; /* wrong for two-column lists in kermit.1, pine.1, perl4.1 */ 1491 case ENDCODEBLOCK: printf("</pre>"); pre=0; break; 1492 case BEGINCENTER: printf("<center>"); break; 1493 case ENDCENTER: printf("</center>"); break; 1494 case BEGINMANREF: 1495 manrefextract(hitxt); 1496 if (fmanRef) { printf("<a href='"); printf(href, manrefname, manrefsect); printf("'>"); } 1497 else printf("<i>"); 1498 break; 1499 case ENDMANREF: 1500 if (fmanRef) printf("</a>\n"); else printf("</i>"); 1501 break; 1502 case HR: printf("\n<hr>\n"); break; 1503 1504 /* U (was B, I), strike -- all temporary until HTML 4.0's INS and DEL widespread */ 1505 case BEGINDIFFA: printf("<ins><u>"); break; 1506 case ENDDIFFA: printf("</u></ins>"); break; 1507 case BEGINDIFFD: printf("<del><strike>"); break; 1508 case ENDDIFFD: printf("</strike></del>"); break; 1509 1510 case BEGINSC: case ENDSC: 1511 case BEGINY: case ENDY: 1512 case BEGINHEADER: case ENDHEADER: 1513 case BEGINFOOTER: case ENDFOOTER: 1514 case CHANGEBAR: 1515 /* nothing */ 1516 break; 1517 default: 1518 DefaultPara(cmd); 1519 } 1520 } 1521 1522 1523 1524 /* 1525 * DocBook XML 1526 * improvements by Aaron Hawley applied 2003 June 5 1527 * 1528 * N.B. The framework for XML is in place but not done. If you 1529 * are familiar with the DocBook DTD, however, it shouldn't be 1530 * too difficult to finish it. If you do so, please send your 1531 * code to me so that I may share the wealth in the next release. 1532 */ 1533 1534 const char *DOCBOOKPATH = "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd"; 1535 1536 void 1537 XML(enum command cmd) { 1538 static int pre=0; 1539 int i; 1540 int lasttoc; 1541 char *p; 1542 static int fRefEntry=0; 1543 static int fRefPurpose=0; 1544 /*static char *bads => XML doesn't backslash-quote metacharacters */ 1545 1546 /* 1547 */ 1548 1549 /* always respond to these signals */ 1550 switch (cmd) { 1551 case CHARLQUOTE: case CHARRQUOTE: printf("""); break; 1552 case CHARBULLET: printf("•"); break; 1553 case CHARDAGGER: printf("†"); break; 1554 case CHARPLUSMINUS: printf("±"); break; 1555 case CHARCOPYR: printf("©"); break; 1556 case CHARNOT: printf("¬"); break; 1557 case CHARMUL: printf("×"); break; 1558 case CHARDIV: printf("÷"); break; 1559 case CHARAMP: printf("&"); break; 1560 case CHARDASH: 1561 if (sectheadid==NAME && !fRefPurpose) { 1562 printf("</refname><refpurpose>"); 1563 fRefPurpose=1; 1564 } else putchar('-'); 1565 break; 1566 case CHARBACKSLASH: putchar('\\'); break; 1567 case CHARGT: printf(">"); break; 1568 case CHARLT: printf("<"); break; 1569 case CHARLSQUOTE: 1570 case CHARRSQUOTE: 1571 case CHARPERIOD: 1572 case CHARTAB: 1573 case CHARHAT: 1574 case CHARVBAR: 1575 case CHARNBSP: 1576 case CHARCENT: 1577 case CHARSECT: 1578 case CHARREGTM: 1579 case CHARDEG: 1580 case CHARACUTE: 1581 case CHAR14: 1582 case CHAR12: 1583 case CHAR34: 1584 putchar(cmd); 1585 break; 1586 default: 1587 break; 1588 } 1589 1590 /* while in pre mode... */ 1591 if (pre) { 1592 switch (cmd) { 1593 case ENDLINE: I=0; CurLine++; if (!fPara && scnt) putchar(' '); break; 1594 case ENDTABLE: 1595 if (fSource) printf("</table>\n"); 1596 else { printf("</literallayout>\n"); pre=0; fQS=fIQS=fPara=1; } 1597 break; 1598 default: 1599 /* nothing */ 1600 break; 1601 } 1602 return; 1603 } 1604 1605 /* usual operation */ 1606 switch (cmd) { 1607 case BEGINDOC: 1608 printf("\n<!DOCTYPE refentry PUBLIC \"-//OASIS//DTD DocBook XML V4.1.2//EN\"\n"); 1609 printf(" \"%s\">\n", DOCBOOKPATH); 1610 1611 printf("<!--\n\n\tI am looking for help to finish DocBook XML.\n\n-->\n"); 1612 1613 printf("<!-- %s\n", PROVENANCE); 1614 printf(" %s -->\n\n",HOME); 1615 /* better title possible? */ 1616 for (p=manName; *p; p++) *p = tolower(*p); 1617 printf("<refentry id='%s.%s'>\n", manName, manSect); 1618 printf("<refmeta>\n<refentrytitle>%s</refentrytitle>\n", manName); 1619 printf("<manvolnum>%s</manvolnum>\n</refmeta>\n\n", manSect); 1620 1621 I=0; 1622 break; 1623 1624 case ENDDOC: 1625 /* header and footer wanted? */ 1626 if (fHeadfoot) { 1627 printf("\n<refsect1>\n<title>%s</title>\n", HEADERANDFOOTER); 1628 for (i=0; i<CRUFTS; i++) if (*cruft[i]) printf("<para>%s</para>\n", cruft[i]); 1629 printf("\n</refsect1>"); 1630 } 1631 1632 /* table of contents, such as found in HTML, can be generated automatically by XML software */ 1633 1634 printf("</refentry>\n"); 1635 break; 1636 case BEGINBODY: 1637 if (fPara) printf("\n</para>"); 1638 printf("<para>"); fPara = 1; 1639 break; 1640 case ENDBODY: 1641 if (fRefPurpose) { printf("</refpurpose>"); fRefPurpose=0; } 1642 else { printf("\n</para>"); fPara=0; } 1643 break; 1644 1645 case BEGINCOMMENT: printf("\n<!--\n"); break; 1646 case ENDCOMMENT: printf("\n-->\n"); break; 1647 case COMMENTLINE: break; 1648 1649 case BEGINSECTHEAD: 1650 case BEGINSUBSECTHEAD: 1651 if (sectheadid != NAME && sectheadid != SYNOPSIS) printf("<title>"); 1652 break; 1653 case ENDSECTHEAD: 1654 case ENDSUBSECTHEAD: 1655 if (sectheadid == NAME) printf("<refname>"); 1656 else if (sectheadid == SYNOPSIS) {} 1657 else { printf("</title>\n<para>"); fPara=1; } 1658 break; 1659 1660 case BEGINSECTION: 1661 if (sectheadid==NAME) printf("<refnamediv>\n"); 1662 /*printf("<RefEntry>"); -- do lotsa parsing here for RefName, RefPurpose*/ 1663 else if (sectheadid==SYNOPSIS) printf("<refsynopsisdiv>\n<cmdsynopsis>\n"); 1664 else printf("\n<refsect1>\n"); 1665 break; 1666 case ENDSECTION: 1667 if (sectheadid==NAME) { 1668 if (fRefPurpose) { printf("</refpurpose>"); fRefPurpose=0; } 1669 printf("\n</refnamediv>\n\n"); 1670 } else if (sectheadid==SYNOPSIS) printf("\n</cmdsynopsis>\n</refsynopsisdiv>\n"); 1671 else { 1672 if (fPara) { printf("\n</para>"); fPara=0; } 1673 printf("\n</refsect1>\n"); 1674 } 1675 break; 1676 1677 case BEGINSUBSECTION: printf("\n<refsect2>"); break; 1678 case ENDSUBSECTION: printf("\n</refsect2>"); break; 1679 1680 /* need to update this for enumerated and plain lists */ 1681 case BEGINBULPAIR: printf("<variablelist>\n"); break; 1682 case ENDBULPAIR: printf("</variablelist>\n"); break; 1683 case BEGINBULLET: printf("<term>"); break; 1684 case ENDBULLET: printf("</term>\n"); break; 1685 case BEGINBULTXT: printf("<listitem>\n<para>"); break; 1686 case ENDBULTXT: printf("\n</para></listitem>\n"); break; 1687 1688 case BEGINLINE: 1689 /* remember, get BEGINBODY call at start of paragraph */ 1690 if (fRefEntry) { 1691 if (fRefPurpose) { 1692 for (p=plain; *p!='-'; p++) { 1693 /* nothing?! */ 1694 } 1695 } 1696 } 1697 1698 break; 1699 1700 case ENDLINE: 1701 /*if (fCodeline) { fIQS=1; fCodeline=0; }*/ 1702 if (fCodeline) { printf("</code>"); fCodeline=0; } /* */ 1703 I=0; CurLine++; if (!fPara && scnt) printf("<sbr/>"); else putchar(' '); 1704 break; 1705 1706 case SHORTLINE: 1707 if (fCodeline) { printf("</code>"); fCodeline=0; } 1708 if (!fIP && !fPara) printf("<sbr/>\n"); 1709 break; 1710 1711 case BEGINTABLE: 1712 if (fSource) printf("<table>\n"); 1713 else { printf("<literallayout>\n"); pre=1; fQS=fIQS=fPara=0; } 1714 break; 1715 case ENDTABLE: 1716 if (fSource) printf("</table>\n"); 1717 else { printf("</literallayout>\n"); pre=0; fQS=fIQS=fPara=1; } 1718 break; 1719 case BEGINTABLELINE: printf("<row>"); break; 1720 case ENDTABLELINE: printf("</row>\n"); break; 1721 case BEGINTABLEENTRY: printf("<entry>"); break; 1722 case ENDTABLEENTRY: printf("</entry>"); break; 1723 1724 case BEGININDENT: case ENDINDENT: 1725 case FONTSIZE: 1726 break; 1727 1728 /* have to make some guess about bold and italics */ 1729 case BEGINBOLD: printf("<command>"); break; 1730 case ENDBOLD: printf("</command>"); break; 1731 case BEGINITALICS: printf("<emphasis>"); break; /* could be literal or arg */ 1732 case ENDITALICS: printf("</emphasis>"); break; 1733 case BEGINBOLDITALICS: case BEGINCODE: printf("<literal>"); break; 1734 case ENDBOLDITALICS: case ENDCODE: printf("</literal>"); break; 1735 case BEGINMANREF: 1736 manrefextract(hitxt); 1737 if (fmanRef) { printf("<link linkend='"); printf(href, manrefname, manrefsect); printf("'>"); } 1738 break; 1739 case ENDMANREF: 1740 if (fmanRef) printf("</link>"); 1741 break; 1742 1743 case HR: 1744 case BEGINSC: case ENDSC: 1745 case BEGINY: case ENDY: 1746 case BEGINHEADER: case ENDHEADER: 1747 case BEGINFOOTER: case ENDFOOTER: 1748 case CHANGEBAR: 1749 /* nothing */ 1750 break; 1751 default: 1752 DefaultPara(cmd); 1753 } 1754 } 1755 1756 1757 1758 /* generates MIME compliant to RFC 1563 */ 1759 1760 void 1761 MIME(enum command cmd) { 1762 static int pre=0; 1763 int i; 1764 1765 /* always respond to these signals */ 1766 switch (cmd) { 1767 case CHARDASH: 1768 case CHARAMP: 1769 case CHARPERIOD: 1770 case CHARTAB: 1771 putchar(cmd); break; 1772 case CHARLSQUOTE: putchar('`'); break; 1773 case CHARACUTE: 1774 case CHARRSQUOTE: putchar('\''); break; 1775 case CHARBULLET: putchar('*'); break; 1776 case CHARDAGGER: putchar('|'); break; 1777 case CHARPLUSMINUS: printf("+-"); break; 1778 case CHARNBSP: putchar(' '); break; 1779 case CHARCENT: putchar('c'); break; 1780 case CHARSECT: putchar('S'); break; 1781 case CHARCOPYR: printf("(C)"); break; 1782 case CHARNOT: putchar('~'); break; 1783 case CHARREGTM: printf("(R)"); break; 1784 case CHARDEG: putchar('o'); break; 1785 case CHAR14: printf("1/4"); break; 1786 case CHAR12: printf("1/2"); break; 1787 case CHAR34: printf("3/4"); break; 1788 case CHARMUL: putchar('X'); break; 1789 case CHARDIV: putchar('/'); break; 1790 case CHARLQUOTE: 1791 case CHARRQUOTE: 1792 putchar('"'); 1793 break; 1794 case CHARBACKSLASH: /* these should be caught as escaped chars */ 1795 case CHARGT: 1796 case CHARLT: 1797 assert(1); 1798 break; 1799 default: 1800 break; 1801 } 1802 1803 /* while in pre mode... */ 1804 if (pre) { 1805 switch (cmd) { 1806 case ENDLINE: I=0; CurLine++; if (!fPara && scnt) printf("\n\n"); break; 1807 case ENDTABLE: printf("</fixed>\n\n"); pre=0; fQS=fIQS=fPara=1; break; 1808 default: 1809 /* nothing */ 1810 break; 1811 } 1812 return; 1813 } 1814 1815 /* usual operation */ 1816 switch (cmd) { 1817 case BEGINDOC: 1818 printf("Content-Type: text/enriched\n"); 1819 printf("Text-Width: 60\n"); 1820 escchars = "<>\\"; 1821 1822 I=0; 1823 break; 1824 case ENDDOC: 1825 /* header and footer wanted? */ 1826 printf("\n\n"); 1827 if (fHeadfoot) { 1828 printf("\n"); 1829 MIME(BEGINSECTHEAD); printf("%s",HEADERANDFOOTER); MIME(ENDSECTHEAD); 1830 for (i=0; i<CRUFTS; i++) if (*cruft[i]) printf("\n%s\n", cruft[i]); 1831 } 1832 1833 /* 1834 printf("\n<comment>\n"); 1835 printf("%s\n%s\n", PROVENANCE, HOME); 1836 printf("</comment>\n\n"); 1837 */ 1838 1839 /* 1840 printf("\n<HR><P>\n"); 1841 printf("<A NAME=\"toc\"><B>%s</B></A><P>\n", TABLEOFCONTENTS); 1842 printf("<UL>\n"); 1843 for (i=0, lasttoc=BEGINSECTION; i<tocc; lasttoc=toc[i].type, i++) { 1844 if (lasttoc!=toc[i].type) { 1845 if (toc[i].type==BEGINSUBSECTION) printf("<UL>\n"); 1846 else printf("</UL>\n"); 1847 } 1848 printf("<LI><A NAME=\"toc%d\" HREF=\"#sect%d\">%s</A></LI>\n", i, i, toc[i].text); 1849 } 1850 if (lasttoc==BEGINSUBSECTION) printf("</UL>"); 1851 printf("</UL>\n"); 1852 printf("</BODY></HTML>\n"); 1853 */ 1854 break; 1855 case BEGINBODY: 1856 printf("\n\n"); 1857 break; 1858 case ENDBODY: break; 1859 1860 case BEGINCOMMENT: fcharout=0; break; 1861 case ENDCOMMENT: fcharout=1; break; 1862 case COMMENTLINE: break; 1863 1864 case BEGINSECTHEAD: 1865 printf("\n<bigger><bigger><underline>"); 1866 /*A NAME=\"sect%d\" HREF=\"#toc%d\"><H2>", tocc, tocc);*/ 1867 break; 1868 case ENDSECTHEAD: 1869 printf("</underline></bigger></bigger>\n\n<indent>"); 1870 /* useful extraction from files, environment? */ 1871 break; 1872 case BEGINSUBSECTHEAD: 1873 printf("<bigger>"); 1874 /*\n<A NAME=\"sect%d\" HREF=\"#toc%d\"><H3>", tocc, tocc);*/ 1875 break; 1876 case ENDSUBSECTHEAD: 1877 printf("</bigger>\n\n</indent>"); 1878 break; 1879 case BEGINSECTION: 1880 case BEGINSUBSECTION: 1881 break; 1882 case ENDSECTION: 1883 case ENDSUBSECTION: 1884 printf("</indent>\n"); 1885 break; 1886 1887 case BEGINBULPAIR: break; 1888 case ENDBULPAIR: break; 1889 case BEGINBULLET: printf("<bold>"); break; 1890 case ENDBULLET: printf("</bold>\t"); break; 1891 case BEGINBULTXT: 1892 case BEGININDENT: 1893 printf("<indent>"); 1894 break; 1895 case ENDBULTXT: 1896 case ENDINDENT: 1897 printf("</indent>\n"); 1898 break; 1899 1900 case FONTSIZE: 1901 if ((fontdelta+=intArg)==0) { 1902 if (intArg>0) printf("</smaller>"); else printf("</bigger>"); 1903 } else { 1904 if (intArg>0) printf("<bigger>"); else printf("<smaller>"); 1905 } 1906 break; 1907 1908 case BEGINLINE: /*if (ncnt) printf("\n\n");*/ break; 1909 case ENDLINE: I=0; CurLine++; printf("\n"); break; 1910 case SHORTLINE: if (!fIP) printf("\n\n"); break; 1911 case BEGINTABLE: printf("<nl><fixed>\n"); pre=1; fQS=fIQS=fPara=0; break; 1912 case ENDTABLE: printf("</fixed><nl>\n"); pre=0; fQS=fIQS=fPara=1; break; 1913 case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: 1914 break; 1915 /* could use a new list type */ 1916 1917 case BEGINBOLD: printf("<bold>"); break; 1918 case ENDBOLD: printf("</bold>"); break; 1919 case BEGINITALICS: printf("<italics>"); break; 1920 case ENDITALICS: printf("</italics>"); break; 1921 case BEGINCODE: 1922 case BEGINBOLDITALICS:printf("<bold><italics>"); break; 1923 case ENDCODE: 1924 case ENDBOLDITALICS: printf("</bold></italics>"); break; 1925 case BEGINMANREF: 1926 printf("<x-color><param>blue</param>"); 1927 /* how to make this hypertext? 1928 manrefextract(hitxt); 1929 if (fmanRef) { printf("<A HREF=\""); printf(href, manrefname, manrefsect); printf("\">\n"); } 1930 else printf("<I>"); 1931 break; 1932 */ 1933 break; 1934 case ENDMANREF: 1935 printf("</x-color>"); 1936 break; 1937 1938 case HR: printf("\n\n%s\n\n", horizontalrule); break; 1939 1940 case BEGINSC: case ENDSC: 1941 case BEGINY: case ENDY: 1942 case BEGINHEADER: case ENDHEADER: 1943 case BEGINFOOTER: case ENDFOOTER: 1944 case CHANGEBAR: 1945 /* nothing */ 1946 break; 1947 default: 1948 DefaultPara(cmd); 1949 } 1950 } 1951 1952 1953 1954 /* 1955 * LaTeX 1956 */ 1957 1958 void 1959 LaTeX(enum command cmd) { 1960 1961 switch (cmd) { 1962 case BEGINDOC: 1963 escchars = "$&%#_{}"; /* and more to come? */ 1964 printf("%% %s,\n", PROVENANCE); 1965 printf("%% %s\n\n", HOME); 1966 /* definitions */ 1967 printf( 1968 "\\documentstyle{article}\n" 1969 "\\def\\thefootnote{\\fnsymbol{footnote}}\n" 1970 "\\setlength{\\parindent}{0pt}\n" 1971 "\\setlength{\\parskip}{0.5\\baselineskip plus 2pt minus 1pt}\n" 1972 "\\begin{document}\n" 1973 ); 1974 I=0; 1975 break; 1976 case ENDDOC: 1977 /* header and footer wanted? */ 1978 printf("\n\\end{document}\n"); 1979 1980 break; 1981 case BEGINBODY: 1982 printf("\n\n"); 1983 break; 1984 case ENDBODY: break; 1985 1986 case BEGINCOMMENT: 1987 case ENDCOMMENT: 1988 break; 1989 case COMMENTLINE: printf("%% "); break; 1990 1991 1992 case BEGINSECTION: break; 1993 case ENDSECTION: break; 1994 case BEGINSECTHEAD: printf("\n\\section{"); tagc=0; break; 1995 case ENDSECTHEAD: 1996 printf("}"); 1997 /* 1998 if (CurLine==1) printf("\\footnote{" 1999 "\\it conversion to \\LaTeX\ format by PolyglotMan " 2000 "available via anonymous ftp from {\\tt ftp.berkeley.edu:/ucb/people/phelps/tcltk}}" 2001 ); 2002 */ 2003 /* useful extraction from files, environment? */ 2004 printf("\n"); 2005 break; 2006 case BEGINSUBSECTHEAD:printf("\n\\subsection{"); break; 2007 case ENDSUBSECTHEAD: 2008 printf("}"); 2009 break; 2010 case BEGINSUBSECTION: break; 2011 case ENDSUBSECTION: break; 2012 case BEGINBULPAIR: printf("\\begin{itemize}\n"); break; 2013 case ENDBULPAIR: printf("\\end{itemize}\n"); break; 2014 case BEGINBULLET: printf("\\item ["); break; 2015 case ENDBULLET: printf("] "); break; 2016 case BEGINLINE: /*if (ncnt) printf("\n\n");*/ break; 2017 case ENDLINE: I=0; putchar('\n'); CurLine++; break; 2018 case BEGINTABLE: printf("\\begin{verbatim}\n"); break; 2019 case ENDTABLE: printf("\\end{verbatim}\n"); break; 2020 case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: 2021 break; 2022 case BEGININDENT: case ENDINDENT: 2023 case FONTSIZE: 2024 break; 2025 case SHORTLINE: if (!fIP) printf("\n\n"); break; 2026 case BEGINBULTXT: break; 2027 case ENDBULTXT: putchar('\n'); break; 2028 2029 case CHARLQUOTE: printf("``"); break; 2030 case CHARRQUOTE: printf("''"); break; 2031 case CHARLSQUOTE: 2032 case CHARRSQUOTE: 2033 case CHARPERIOD: 2034 case CHARTAB: 2035 case CHARDASH: 2036 case CHARNBSP: 2037 putchar(cmd); break; 2038 case CHARBACKSLASH: printf("$\\backslash$"); break; 2039 case CHARGT: printf("$>$"); break; 2040 case CHARLT: printf("$<$"); break; 2041 case CHARHAT: printf("$\\char94{}$"); break; 2042 case CHARVBAR: printf("$|$"); break; 2043 case CHARAMP: printf("\\&"); break; 2044 case CHARBULLET: printf("$\\bullet$ "); break; 2045 case CHARDAGGER: printf("\\dag "); break; 2046 case CHARPLUSMINUS: printf("\\pm "); break; 2047 case CHARCENT: printf("\\hbox{\\rm\\rlap/c}"); break; 2048 case CHARSECT: printf("\\S "); break; 2049 case CHARCOPYR: printf("\\copyright "); break; 2050 case CHARNOT: printf("$\\neg$"); break; 2051 case CHARREGTM: printf("(R)"); break; 2052 case CHARDEG: printf("$^\\circ$"); break; 2053 case CHARACUTE: putchar('\''); break; 2054 case CHAR14: printf("$\\frac{1}{4}$"); break; 2055 case CHAR12: printf("$\\frac{1}{2}$"); break; 2056 case CHAR34: printf("$\\frac{3}{4}$"); break; 2057 case CHARMUL: printf("\\times "); break; 2058 case CHARDIV: printf("\\div "); break; 2059 2060 case BEGINCODE: 2061 case BEGINBOLD: printf("{\\bf "); break; 2062 case BEGINSC: printf("{\\sc "); break; 2063 case BEGINITALICS: printf("{\\it "); break; 2064 case BEGINBOLDITALICS:printf("{\\bf\\it "); break; 2065 case BEGINMANREF: printf("{\\sf "); break; 2066 case ENDCODE: 2067 case ENDBOLD: 2068 case ENDSC: 2069 case ENDITALICS: 2070 case ENDBOLDITALICS: 2071 case ENDMANREF: 2072 putchar('}'); 2073 break; 2074 case HR: /*printf("\n%s\n", horizontalrule);*/ break; 2075 2076 case BEGINY: case ENDY: 2077 case BEGINHEADER: case ENDHEADER: 2078 case BEGINFOOTER: case ENDFOOTER: 2079 case CHANGEBAR: 2080 /* nothing */ 2081 break; 2082 default: 2083 DefaultPara(cmd); 2084 } 2085 } 2086 2087 2088 void 2089 LaTeX2e(enum command cmd) { 2090 2091 switch (cmd) { 2092 /* replace selected commands ... */ 2093 case BEGINDOC: 2094 escchars = "$&%#_{}"; 2095 printf("%% %s,\n", PROVENANCE); 2096 printf("%% %s\n\n", HOME); 2097 /* definitions */ 2098 printf( 2099 "\\documentclass{article}\n" 2100 "\\def\\thefootnote{\\fnsymbol{footnote}}\n" 2101 "\\setlength{\\parindent}{0pt}\n" 2102 "\\setlength{\\parskip}{0.5\\baselineskip plus 2pt minus 1pt}\n" 2103 "\\begin{document}\n" 2104 ); 2105 I=0; 2106 break; 2107 case BEGINCODE: 2108 case BEGINBOLD: printf("\\textbf{"); break; 2109 case BEGINSC: printf("\\textsc{"); break; 2110 case BEGINITALICS: printf("\\textit{"); break; 2111 case BEGINBOLDITALICS:printf("\\textbf{\\textit{"); break; 2112 case BEGINMANREF: printf("\\textsf{"); break; 2113 case ENDBOLDITALICS: printf("}}"); break; 2114 2115 /* ... rest same as old LaTeX */ 2116 default: 2117 LaTeX(cmd); 2118 } 2119 } 2120 2121 2122 2123 /* 2124 * Rich Text Format (RTF) 2125 */ 2126 2127 /* RTF could use more work */ 2128 2129 void 2130 RTF(enum command cmd) { 2131 2132 switch (cmd) { 2133 case BEGINDOC: 2134 escchars = "{}"; 2135 /* definitions */ 2136 printf( 2137 /* fonts */ 2138 "{\\rtf1\\deff2 {\\fonttbl" 2139 "{\\f20\\froman Times;}{\\f150\\fnil I Times Italic;}" 2140 "{\\f151\\fnil B Times Bold;}{\\f152\\fnil BI Times BoldItalic;}" 2141 "{\\f22\\fmodern Courier;}{\\f23\\ftech Symbol;}" 2142 "{\\f135\\fnil I Courier Oblique;}{\\f136\\fnil B Courier Bold;}{\\f137\\fnil BI Courier BoldOblique;}" 2143 "{\\f138\\fnil I Helvetica Oblique;}{\\f139\\fnil B Helvetica Bold;}}" 2144 "\n" 2145 2146 /* style sheets */ 2147 "{\\stylesheet{\\li720\\sa120 \\f20 \\sbasedon222\\snext0 Normal;}" 2148 "{\\s2\\sb200\\sa120 \\b\\f3\\fs20 \\sbasedon0\\snext2 section head;}" 2149 "{\\s3\\li180\\sa120 \\b\\f20 \\sbasedon0\\snext3 subsection head;}" 2150 "{\\s4\\fi-1440\\li2160\\sa240\\tx2160 \\f20 \\sbasedon0\\snext4 detailed list;}}" 2151 "\n" 2152 2153 /* more header to come--do undefined values default to nice values? */ 2154 ); 2155 I=0; 2156 break; 2157 case ENDDOC: 2158 /* header and footer wanted? */ 2159 printf("\\par{\\f150 %s,\n%s}", PROVENANCE, HOME); 2160 printf("}\n"); 2161 break; 2162 case BEGINBODY: 2163 printf("\n\n"); 2164 break; 2165 case ENDBODY: 2166 CurLine++; 2167 printf("\\par\n"); 2168 tagc=0; 2169 break; 2170 2171 case BEGINCOMMENT: fcharout=0; break; 2172 case ENDCOMMENT: fcharout=1; break; 2173 case COMMENTLINE: break; 2174 2175 case BEGINSECTION: break; 2176 case ENDSECTION: printf("\n\\par\n"); break; 2177 case BEGINSECTHEAD: printf("{\\s2 "); tagc=0; break; 2178 case ENDSECTHEAD: 2179 printf("}\\par"); 2180 /* useful extraction from files, environment? */ 2181 printf("\n"); 2182 break; 2183 case BEGINSUBSECTHEAD:printf("{\\s3 "); break; 2184 case ENDSUBSECTHEAD: 2185 printf("}\\par\n"); 2186 break; 2187 case BEGINSUBSECTION: break; 2188 case ENDSUBSECTION: break; 2189 case BEGINLINE: /*if (ncnt) printf("\n\n");*/ break; 2190 case ENDLINE: I=0; putchar(' '); /*putchar('\n'); CurLine++;*/ break; 2191 case SHORTLINE: if (!fIP) printf("\\line\n"); break; 2192 case BEGINBULPAIR: printf("{\\s4 "); break; 2193 case ENDBULPAIR: printf("}\\par\n"); break; 2194 case BEGINBULLET: break; 2195 case ENDBULLET: printf("\\tab "); fcharout=0; break; 2196 case BEGINBULTXT: fcharout=1; break; 2197 case ENDBULTXT: break; 2198 2199 case CHARLQUOTE: printf("``"); break; 2200 case CHARRQUOTE: printf("''"); break; 2201 case CHARLSQUOTE: 2202 case CHARRSQUOTE: 2203 case CHARPERIOD: 2204 case CHARTAB: 2205 case CHARDASH: 2206 case CHARBACKSLASH: 2207 case CHARGT: 2208 case CHARLT: 2209 case CHARHAT: 2210 case CHARVBAR: 2211 case CHARAMP: 2212 case CHARNBSP: 2213 case CHARCENT: 2214 case CHARSECT: 2215 case CHARCOPYR: 2216 case CHARNOT: 2217 case CHARREGTM: 2218 case CHARDEG: 2219 case CHARACUTE: 2220 case CHAR14: 2221 case CHAR12: 2222 case CHAR34: 2223 case CHARMUL: 2224 case CHARDIV: 2225 putchar(cmd); break; 2226 case CHARBULLET: printf("\\bullet "); break; 2227 case CHARDAGGER: printf("\\dag "); break; 2228 case CHARPLUSMINUS: printf("\\pm "); break; 2229 2230 case BEGINCODE: 2231 case BEGINBOLD: printf("{\\b "); break; 2232 case BEGINSC: printf("{\\fs20 "); break; 2233 case BEGINITALICS: printf("{\\i "); break; 2234 case BEGINBOLDITALICS:printf("{\\b \\i "); break; 2235 case BEGINMANREF: printf("{\\f22 "); break; 2236 case ENDBOLD: 2237 case ENDCODE: 2238 case ENDSC: 2239 case ENDITALICS: 2240 case ENDBOLDITALICS: 2241 case ENDMANREF: 2242 putchar('}'); 2243 break; 2244 case HR: printf("\n%s\n", horizontalrule); break; 2245 2246 case BEGINY: case ENDY: 2247 case BEGINHEADER: case ENDHEADER: 2248 case BEGINFOOTER: case ENDFOOTER: 2249 case BEGINTABLE: case ENDTABLE: 2250 case BEGINTABLELINE: case ENDTABLELINE: case BEGINTABLEENTRY: case ENDTABLEENTRY: 2251 case BEGININDENT: case ENDINDENT: 2252 case FONTSIZE: 2253 case CHANGEBAR: 2254 /* nothing */ 2255 break; 2256 default: 2257 DefaultPara(cmd); 2258 } 2259 } 2260 2261 2262 2263 /* 2264 * pointers to existing tools 2265 */ 2266 2267 void 2268 PostScript(enum command cmd) { 2269 fprintf(stderr, "Use groff or psroff to generate PostScript.\n"); 2270 exit(1); 2271 } 2272 2273 2274 void 2275 FrameMaker(enum command cmd) { 2276 fprintf(stderr, "FrameMaker comes with filters that convert from roff to MIF.\n"); 2277 exit(1); 2278 } 2279 2280 2281 2282 2283 /* 2284 * Utilities common to both parses 2285 */ 2286 2287 2288 /* 2289 level 0: DOC - need match 2290 level 1: SECTION - need match 2291 level 2: SUBSECTION | BODY | BULLETPAIR 2292 level 3: BODY (within SUB) | BULLETPAIR (within SUB) | BULTXT (within BULLETPAIR) 2293 level 4: BULTXT (within BULLETPAIR within SUBSECTION) 2294 2295 never see: SECTHEAD, SUBSECTHEAD, BULLET 2296 */ 2297 2298 int Psect=0, Psub=0, Pbp=0, Pbt=0, Pb=0, Pbul=0; 2299 2300 void 2301 pop(enum command cmd) { 2302 assert(cmd==ENDINDENT || cmd==BEGINBULLET || cmd==BEGINBULTXT || cmd==BEGINBULPAIR || cmd==BEGINBODY || cmd==BEGINSECTION || cmd==BEGINSUBSECTION || cmd==ENDDOC); 2303 /* 2304 int i; 2305 int p; 2306 int match; 2307 2308 p=cmdp-1; 2309 for (i=cmdp-1;i>=0; i--) 2310 if (cmd==cmdstack[i]) { match=i; break; } 2311 */ 2312 2313 /* if match, pop off all up to and including match */ 2314 /* otherwise, pop off one level*/ 2315 2316 if (Pbul) { 2317 (*fn)(ENDBULLET); Pbul=0; 2318 if (cmd==BEGINBULLET) return; 2319 } /* else close off ENDBULTXT */ 2320 2321 if (Pbt) { (*fn)(ENDBULTXT); Pbt=0; } 2322 if (cmd==BEGINBULTXT || cmd==BEGINBULLET) return; 2323 2324 if (Pb && cmd==BEGINBULPAIR) { (*fn)(ENDBODY); Pb=0; } /* special */ 2325 if (Pbp) { (*fn)(ENDBULPAIR); Pbp=0; } 2326 if (cmd==BEGINBULPAIR || cmd==ENDINDENT) return; 2327 2328 if (Pb) { (*fn)(ENDBODY); Pb=0; } 2329 if (cmd==BEGINBODY) return; 2330 2331 if (Psub) { (*fn)(ENDSUBSECTION); Psub=0; } 2332 if (cmd==BEGINSUBSECTION) return; 2333 2334 if (Psect) { (*fn)(ENDSECTION); Psect=0; } 2335 if (cmd==BEGINSECTION) return; 2336 } 2337 2338 2339 void 2340 poppush(enum command cmd) { 2341 assert(cmd==ENDINDENT || cmd==BEGINBULLET || cmd==BEGINBULTXT || cmd==BEGINBULPAIR || cmd==BEGINBODY || cmd==BEGINSECTION || cmd==BEGINSUBSECTION); 2342 2343 pop(cmd); 2344 2345 switch (cmd) { 2346 case BEGINBULLET: Pbul=1; break; 2347 case BEGINBULTXT: Pbt=1; break; 2348 case BEGINBULPAIR: Pbp=1; break; 2349 case BEGINBODY: Pb=1; break; 2350 case BEGINSECTION: Psect=1; break; 2351 case BEGINSUBSECTION: Psub=1; break; 2352 default: 2353 if (!fQuiet) fprintf(stderr, "poppush: unrecognized code %d\n", cmd); 2354 } 2355 2356 (*fn)(cmd); 2357 prevcmd = cmd; 2358 } 2359 2360 2361 2362 /* 2363 * PREFORMATTED PAGES PARSING 2364 */ 2365 2366 /* wrapper for getchar() that expands tabs, and sends maximum of n=40 consecutive spaces */ 2367 2368 int 2369 getchartab(void) { 2370 static int tabexp = 0; 2371 static int charinline = 0; 2372 static int cspccnt = 0; 2373 char c; 2374 2375 c = lookahead; 2376 if (tabexp) tabexp--; 2377 else if (c=='\n') { 2378 charinline=0; 2379 cspccnt=0; 2380 } else if (c=='\t') { 2381 tabexp = TabStops-(charinline%TabStops); if (tabexp==TabStops) tabexp=0; 2382 lookahead = c = ' '; 2383 } else if (cspccnt>=40) { 2384 if (*in==' ') { 2385 while (*in==' '||*in=='\t') in++; 2386 in--; 2387 } 2388 cspccnt=0; 2389 } 2390 2391 if (!tabexp && lookahead) lookahead = *in++; 2392 if (c=='\b') charinline--; else charinline++; 2393 if (c==' ') cspccnt++; 2394 return c; 2395 } 2396 2397 2398 /* replace gets. handles hyphenation too */ 2399 char * 2400 la_gets(char *buf) { 2401 static char la_buf[MAXBUF]; /* can lookahead a full line, but nobody does now */ 2402 static int fla=0, hy=0; 2403 char *ret,*p; 2404 int c,i; 2405 2406 assert(buf!=NULL); 2407 2408 if (fla) { 2409 /* could avoid copying if callers used return value */ 2410 strcpy(buf,la_buf); fla=0; 2411 ret=buf; /* correct? */ 2412 } else { 2413 /*ret=gets(buf); -- gets is deprecated (since it can read too much?) */ 2414 /* could do this... 2415 ret=fgets(buf, MAXBUF, stdin); 2416 buf[strlen(buf)-1]='\0'; 2417 ... but don't want to have to rescan line with strlen, so... */ 2418 2419 i=0; p=buf; 2420 2421 /* recover spaces if re-linebreaking */ 2422 for (; hy; hy--) { *p++=' '; i++; } 2423 2424 while (lookahead && (c=getchartab())!='\n' && i<MAXBUF) { *p++=c; i++; } 2425 assert(i<MAXBUF); 2426 2427 /*lookahead=ungetc(getchar(), stdin); /* only looking ahead one character for now */ 2428 2429 /* very special case: if in SEE ALSO section, re-linebreak so references aren't linebroken 2430 (also do this if fNOHY flag is set) -- doesn't affect lookahead */ 2431 /* 0xad is an en dash on Linux? */ 2432 if ((fPara || sectheadid==SEEALSO || fNOHY) && p>buf && p[-1]=='-' && isspace(lookahead)) { 2433 p--; /* zap hyphen */ 2434 /* zap boldfaced hyphens, gr! */ 2435 while (p[-1]=='\b' && p[-2]=='-') p-=2; 2436 2437 /* start getting next line, spaces first ... */ 2438 while (lookahead && isspace(lookahead) && lookahead!='\n') { getchartab(); hy++; } 2439 2440 /* ... append next nonspace string to previous ... */ 2441 while (lookahead && !isspace(lookahead) && i++<MAXBUF) *p++=getchartab(); 2442 2443 /* gobble following spaces (until, perhaps including, end of line) */ 2444 while (lookahead && isspace(lookahead) && lookahead!='\n') getchartab(); 2445 if (lookahead=='\n') { getchartab(); hy=0; } 2446 } 2447 2448 *p='\0'; 2449 ret=(lookahead)?buf:NULL; 2450 } 2451 2452 AbsLine++; 2453 return ret; /* change this to line length? (-1==old NULL) */ 2454 } 2455 2456 2457 /*** Kong ***/ 2458 2459 char phrase[MAXBUF]; /* first "phrase" (space of >=3 spaces) */ 2460 int phraselen; 2461 2462 void 2463 filterline(char *buf, char *plain) { 2464 char *p,*q,*r; 2465 char *ph; 2466 int iq; 2467 int i,j; 2468 int hl=-1, hl2=-1; 2469 int iscnt=0; /* interword space count */ 2470 int tagci; 2471 int I0; 2472 int etype; 2473 int efirst; 2474 enum tagtype tag = NOTAG; 2475 int esccode; 2476 2477 assert(buf!=NULL && plain!=NULL); 2478 2479 etype=NOTAG; 2480 efirst=-1; 2481 tagci=tagc; 2482 ph=phrase; phraselen=0; 2483 scnt=scnt2=0; 2484 s_sum=s_cnt=0; 2485 bs_sum=bs_cnt=0; 2486 ccnt=0; 2487 spcsqz=0; 2488 2489 /* strip only certain \x1b's and only at very beginning of line */ 2490 for (p=buf; *p=='\x1b' && (p[1]=='8'||p[1]=='9'); p+=2) 2491 /* nop */; 2492 2493 strcpy(plain,p); 2494 q=&plain[strlen(p)]; 2495 2496 /*** spaces and change bars ***/ 2497 for (scnt=0,p=plain; *p==' '; p++) scnt++; /* initial space count */ 2498 if (scnt>200) scnt=130-(q-p); 2499 2500 assert(*q=='\0'); 2501 q--; 2502 if (fChangeleft) 2503 for (; q-40>plain && *q=='|'; q--) { /* change bars */ 2504 if (fChangeleft!=-1) ccnt++; 2505 while (q-2>=plain && q[-1]=='\b' && q[-2]=='|') q-=2; /* boldface changebars! */ 2506 } 2507 2508 /*if (q!=&plain[scnt-1])*/ /* zap trailing spaces */ 2509 for (; *q==' ' && q>plain; q--) /* nop */; 2510 2511 /* second changebar way out east! HACK HACK HACK */ 2512 if (q-plain>100 && *q=='|') { 2513 while (*q=='|' && q>plain) { q--; if (fChangeleft!=-1) ccnt++; } 2514 while ((*q==' ' || *q=='_' || *q=='-') && q>plain) q--; 2515 } 2516 2517 for (r=q; (*r&0xff)==CHARDAGGER; r--) *r='-'; /* convert daggers at end of line to hyphens */ 2518 2519 if (q-plain < scnt) scnt = q-plain+1; 2520 q[1]='\0'; 2521 2522 /* set I for tags below */ 2523 if (indent>=0 && scnt>=indent) scnt-=indent; 2524 if (!fPara && !fIQS) { 2525 if (fChangeleft) I+=(scnt>ccnt)?scnt:ccnt; 2526 else I+=scnt; 2527 } 2528 I0=I; 2529 2530 /*** tags and filler spaces ***/ 2531 2532 iq=0; falluc=1; 2533 for (q=plain; *p; p++) { 2534 2535 iscnt=0; 2536 if (*p==' ') { 2537 for (r=p; *r==' '; r++) { iscnt++; spcsqz++; } 2538 s_sum+=iscnt; s_cnt++; 2539 if (iscnt>1 && !scnt2 && *p==' ') scnt2=iscnt; 2540 if (iscnt>2) { bs_cnt++; bs_sum+=iscnt; } /* keep track of large gaps */ 2541 iscnt--; /* leave last space for tail portion of loop */ 2542 2543 /* write out spaces */ 2544 if (fQS && iscnt<3) { p=r-1; iscnt=0; } /* reduce strings of <3 spaces to 1 */ 2545 /* else if (fQS && iscnt>=3) { replace with tab? } */ 2546 else { 2547 for (i=0; i<iscnt; i++) { p++; *q++=' '; } 2548 } 2549 } /* need to go through if chain for closing off annotations */ 2550 2551 /** backspace-related filtering **/ 2552 2553 /* else */ if (*p=='\b' && p[1]=='_' && q>plain && q[-1]=='+') { 2554 /* bold plus/minus(!) */ 2555 q[-1]=c_plusminus; 2556 while (*p=='\b' && p[1]=='_') p+=2; 2557 continue; 2558 } else if ((*p=='_' && p[1]=='\b' && p[2]!='_' && p[3]!='\b') 2559 || (*p=='\b' && p[1]=='_')) { 2560 /* italics */ 2561 if (tag!=ITALICS && hl>=0) { tagadd(tag, hl, I+iq); hl=-1; } 2562 if (hl==-1) hl=I+iq; 2563 tag=ITALICS; 2564 p+=2; 2565 } else if (*p=='_' && p[2]==p[4] && p[1]=='\b' && p[3]=='\b' && p[2]!='_') { 2566 /* bold italics (for Solaris) */ 2567 for (p+=2; *p==p[2] && p[1]=='\b';) p+=2; 2568 if (tag!=BOLDITALICS && hl>=0) { tagadd(tag, hl, I+iq); hl=-1; } 2569 if (hl==-1) hl=I+iq; 2570 tag=BOLDITALICS; 2571 } else if (*p==p[2] && p[1]=='\b') { 2572 /* boldface */ 2573 while (*p==p[2] && p[1]=='\b') p+=2; 2574 if (tag!=BOLD && hl>=0) { tagadd(tag, hl, I+iq); hl=-1; } 2575 if (hl==-1) hl=I+iq; 2576 tag=BOLD; 2577 } else if (p[1]=='\b' && 2578 ((*p=='o' && p[2]=='+') || 2579 (*p=='+' && p[2]=='o')) ) { 2580 /* bullets */ 2581 p+=2; 2582 while (p[1]=='\b' && (*p=='o' || p[2]=='+') ) p+=2; /* bold bullets(!) */ 2583 *q++=c_bullet; iq++; 2584 continue; 2585 } else if (*p=='\b' && p>plain && p[-1]=='o' && p[1]=='+') { 2586 /* OSF bullets */ 2587 while (*p=='\b' && p[1]=='+') p+=2; /* bold bullets(!) */ 2588 q[-1]=c_bullet; p--; 2589 continue; 2590 } else if (p[1]=='\b' && *p=='+' && p[2]=='_') { 2591 /* plus/minus */ 2592 p+=2; 2593 *q++=c_plusminus; iq++; 2594 continue; 2595 } else if (p[1]=='\b' && *p=='|' && p[2]=='-') { 2596 /* dagger */ 2597 *q++=c_dagger; iq++; 2598 p+=2; continue; 2599 } else if (*p=='\b') { 2600 /* supress unattended backspaces */ 2601 continue; 2602 } else if (*p=='\x1b') { 2603 p++; 2604 if (*p=='[' && isdigit(p[1])) { /* 0/1/22/24/.../8/9/... */ 2605 esccode=0; for (p++; isdigit(*p); p++) esccode = esccode * 10 + *p - '0'; 2606 2607 if (efirst>=0 /*&& (esccode==0 || esccode==1 || esccode==4 || esccode==22 || esccode==24) /*&& hl>=0 && hl2==-1 && tags[MAXTAGS].first<I+iq*/) { 2608 /* doesn't catch tag if spans line -- just make tag and hl static? */ 2609 /*tagadd(tags[MAXTAGS].type, tags[MAXTAGS].first, I+iq);*/ 2610 if (hl==-1 && hl2==-1 && efirst!=-1/*<I+iq*/) 2611 tagadd(etype, efirst, I+iq); 2612 efirst=-1; 2613 } 2614 2615 if (esccode==1 /*&& hl==-1*/) { 2616 /* stash attributes in "invalid" array element */ 2617 efirst=I+iq; etype=BOLD; 2618 /*hl=I+iq; tag=BOLD; -- faces immediate end of range */ 2619 } else if (esccode==4 /*&& hl==-1*/) { 2620 efirst=I+iq; etype=ITALICS; 2621 2622 } /* else skip unrecognized escape codes like 8/9 */ 2623 } 2624 2625 /*assert(*p=='m'); OR if (*p == 'm') ? */ 2626 /*p++; /* ending 'm' -- inc done in overarching for */ 2627 continue; 2628 2629 } else if ((isupper(*p) /*|| *p=='_' || *p=='&'*/) && 2630 (hl>=0 || isupper(p[1]) || (p[1]=='_' && p[2]!='\b') || p[1]=='&')) { 2631 if (hl==-1 && efirst==-1) { hl=I+iq; tag=SMALLCAPS; } 2632 } else { 2633 /* end of tag, one way or another */ 2634 /* collect tags in this pass, interspersed later if need be */ 2635 /* can't handle overlapping tags */ 2636 if (hl>=0) { 2637 if (hl2==-1) tagadd(tag, hl, I+iq); 2638 hl=-1; 2639 } 2640 } 2641 2642 /** non-backspace related filtering **/ 2643 /* case statement here in place of if chain? */ 2644 /* Tk 3.x's text widget tabs too crazy 2645 if (*p==' ' && strncmp(" ",p,5)==0) { 2646 xputchar('\t'); i+=5-1; ci++; continue; 2647 } else 2648 */ 2649 /* copyright symbol: too much work for so little 2650 if (p[i]=='o' && (strncmp("opyright (C) 19",&p[i],15)==0 2651 || strncmp("opyright (c) 19",&p[i],15)==0)) { 2652 printf("opyright \xd3 19"); 2653 tagadd(SYMBOL, ci+9, ci+10); 2654 i+=15-1; ci+=13; continue; 2655 } else 2656 */ 2657 if (*p=='(' && q>plain && (isalnum(q[-1])||strchr(manvalid/*"._-+"*/,q[-1])!=NULL) 2658 && strcoloncmp(&p[1],')',vollist) 2659 /* && p[1]!='s' && p[-1]!='`' && p[-1]!='\'' && p[-1]!='"'*/ ) { 2660 hl2=I+iq; 2661 for (r=q-1; r>=plain && (isalnum(*r)||strchr(manvalid/*"._-+:"*/,*r)!=NULL); r--) 2662 hl2--; 2663 /* else ref to a function? */ 2664 /* maybe save position of opening paren so don't highlight it later */ 2665 } else if (*p==')' && hl2!=-1) { 2666 /* don't overlap tags on man page references */ 2667 while (tagc>0 && tags[tagc-1].last>hl2) tagc--; 2668 tagadd(MANREF, hl2, I+iq+1); 2669 hl2=hl=-1; 2670 } else if (hl2!=-1) { 2671 /* section names are alphanumic or '+' for C++ */ 2672 if (!isalnum(*p) && *p!='+') hl2=-1; 2673 } 2674 2675 2676 /*assert(*p!='\0');*/ 2677 if (!*p) break; /* not just safety check -- check out sgmls.1 */ 2678 2679 *q++=*p; 2680 /* falluc = falluc && (isupper(*p) || isspace(*p) || isdigit(*p) || strchr("-+&_'/()?!.,;",*p)!=NULL);*/ 2681 falluc = falluc && !islower(*p); 2682 if (!scnt2) { *ph++=*p; phraselen++; } 2683 iq+=iscnt+1; 2684 } 2685 if (hl>=0) tagadd(tag, hl, I+iq); 2686 else if (efirst>=0) tagadd(etype, efirst, I+iq); 2687 *q=*ph='\0'; 2688 linelen=iq+ccnt; 2689 2690 2691 /* special case for Solaris: 2692 if line has ONLY <CODE> tags AND they SPAN line, convert to one tag */ 2693 fCodeline=0; 2694 if (tagc && tags[0].first==0 && tags[tagc-1].last==linelen) { 2695 fCodeline=1; 2696 j=0; 2697 /* invariant: at start of a tag */ 2698 for (i=0; fCodeline && i<tagc; i++) { 2699 if (tags[i].type!=BOLDITALICS /*&& tags[i].type!=BOLD*/) fCodeline=0; 2700 else if ((j=tags[i].last)<linelen) { 2701 for (; j < tags[i+1].first ; j++) 2702 if (!isspace(plain[j])) { fCodeline=0; break; } 2703 } 2704 } 2705 } 2706 2707 2708 /* verify tag lists -- in production, compiler should kill with dead code elimination */ 2709 for (i=tagci; i<tagc; i++) { 2710 /* verify valid ranges */ 2711 assert(tags[i].type>NOTAG && tags[i].type<=MANREF); 2712 assert(tags[i].first>=I0 && tags[i].last<=linelen+I0); 2713 assert(tags[i].first<=tags[i].last); 2714 2715 /* verify for no overlap with other tags */ 2716 for (j=i+1; j<tagc; j++) { 2717 assert(tags[i].last<=tags[j].first /*|| tags[i].first>=tags[j].last*/); 2718 } 2719 } 2720 } 2721 2722 2723 /* 2724 buf[] == input text (read only) 2725 plain[] == output (initial, trailing spaces stripped; tabs=>spaces; 2726 underlines, overstrikes => tag array; spaces squeezed, if requested) 2727 ccnt = count of changebars 2728 scnt = count of initial spaces 2729 linelen = length result in plain[] 2730 */ 2731 2732 int fHead=0; 2733 int fFoot=0; 2734 2735 void 2736 preformatted_filter(void) { 2737 const int MINRM=50; /* minimum column for right margin */ 2738 const int MINMID=20; 2739 const int HEADFOOTSKIP=20; 2740 const int HEADFOOTMAX=25; 2741 int curtag; 2742 char *p,*r; 2743 char head[MAXBUF]=""; /* first "word" */ 2744 char foot[MAXBUF]=""; 2745 int header_m=0, footer_m=0; 2746 int headlen=0, footlen=0; 2747 /* int line=1-1; */ 2748 int i,j,k,l,off; 2749 int sect=0,subsect=0,bulpair=0,osubsect=0; 2750 int title=1; 2751 int oscnt=-1; 2752 int empty=0,oempty; 2753 int fcont=0; 2754 int Pnew=0,I0; 2755 float s_avg=0.0; 2756 int spaceout; 2757 int skiplines=0; 2758 int c; 2759 2760 /* try to keep tabeginend[][] in parallel with enum tagtype */ 2761 assert(tagbeginend[ITALICS][0]==BEGINITALICS); 2762 assert(tagbeginend[MANREF][1]==ENDMANREF); 2763 in++; /* lookahead = current character, in points to following */ 2764 2765 /* for (i=0; i<MAXBUF; i++) tabgram[i]=0;*/ 2766 2767 /*if (fMan) */indent=-1; 2768 I=1; 2769 CurLine=1; 2770 (*fn)(BEGINDOC); I0=I; 2771 2772 /* run through each line */ 2773 while (la_gets(buf)!=NULL) { 2774 if (title) I=I0; 2775 /* strip out Ousterhout box: it's confusing the section line counts in TkMan outlining */ 2776 if (fNORM && *buf=='_' 2777 && strncmp(buf,"_________________________________________________________________",65)==0) { 2778 fTclTk = 1; 2779 if (fChangeleft==0) fChangeleft=1; 2780 skiplines = 2; 2781 } 2782 if (skiplines) { skiplines--; AbsLine++; continue; } 2783 filterline(buf,plain); /* ALL LINES ARE FILTERED */ 2784 2785 #if 0 2786 /* dealing with tables in formatted pages is hopeless */ 2787 finTable = fTable && 2788 ((!ncnt && fotable) || 2789 (ncnt && bs_cnt>=2 && bs_cnt<=5 && ((float) bs_sum / (float) bs_cnt)>3.0)); 2790 if (finTable) { 2791 if (!fotable) (*fn)(BEGINTABLE); 2792 } else if (fotable) { 2793 (*fn)(ENDTABLE); 2794 I=I0; tagc=0; filterline(buf,plain); /* rescan first line out of table */ 2795 } 2796 #endif 2797 2798 s_avg=(float) s_sum; 2799 if (s_cnt>=2) { 2800 /* don't count large second space gap */ 2801 if (scnt2) s_avg= (float) (s_sum - scnt2) / (float) (s_cnt-1); 2802 else s_avg= (float) (s_sum) / (float) (s_cnt); 2803 } 2804 2805 p=plain; /* points to current character in plain */ 2806 2807 /*** determine header and global indentation ***/ 2808 if (/*fMan && (*/!fHead || indent==-1/*)*/) { 2809 if (!linelen) continue; 2810 if (!*header) { 2811 /* check for missing first header--but this doesn't catch subsequent pages */ 2812 if (stricmp(p,"NAME")==0 || stricmp(p,"NOMBRE")==0) { /* works because line already filtered */ 2813 indent=scnt; /*filterline(buf,plain);*/ scnt=0; I=I0; fHead=1; 2814 } else { 2815 fHead=1; 2816 (*fn)(BEGINHEADER); 2817 /* grab header and its first word */ 2818 strcpy(header,p); 2819 if ((header_m=HEADFOOTSKIP)>linelen) header_m=0; 2820 strcpy(head,phrase); headlen=phraselen; 2821 la_gets(buf); filterline(buf,plain); 2822 if (linelen) { 2823 strcpy(header2,plain); 2824 if (strincmp(plain,"Digital",7)==0 || strincmp(plain,"OSF",3)==0) { 2825 fFoot=1; 2826 fSubsections=0; 2827 } 2828 } 2829 (*fn)(ENDHEADER); tagc=0; 2830 continue; 2831 } 2832 } else { 2833 /* some idiot pages have a *third* header line, possibly after a null line */ 2834 if (*header && scnt>MINMID) { strcpy(header3,p); ncnt=0; continue; } 2835 /* indent of first line ("NAME") after header sets global indent */ 2836 /* check '<' for Plan 9(?) */ 2837 if (*p!='<') { 2838 indent=scnt; I=I0; scnt=0; 2839 } else continue; 2840 } 2841 /* if (indent==-1) continue;*/ 2842 } 2843 if (!lindent && scnt) lindent=scnt; 2844 /*printf("lindent = %d, scnt=%d\n", lindent,scnt);*/ 2845 2846 2847 /**** for each ordinary line... *****/ 2848 2849 /*** skip over global indentation */ 2850 oempty=empty; empty=(linelen==0); 2851 if (empty) {ncnt++; continue;} 2852 2853 /*** strip out per-page titles ***/ 2854 2855 if (/*fMan && (*/scnt==0 || scnt>MINMID/*)*/) { 2856 /*printf("***ncnt = %d, fFoot = %d, line = %d***", ncnt,fFoot,AbsLine);*/ 2857 if (!fFoot && !isspace(*p) && (scnt>5 || (*p!='-' && *p!='_')) && 2858 /* don't add ncnt -- AbsLine gets absolute line number */ 2859 (((ncnt>=2 && AbsLine/*+ncnt*/>=61/*was 58*/ && AbsLine/*+ncnt*/<70) 2860 || (ncnt>=4 && AbsLine/*+ncnt*/>=59 && AbsLine/*+ncnt*/<74) 2861 || (ncnt && AbsLine/*+ncnt*/>=61 && AbsLine/*+ncnt*/<=66)) 2862 && (/*lookahead!=' ' ||*/ (s_cnt>=1 && s_avg>1.1) || !falluc) ) 2863 ) { 2864 (*fn)(BEGINFOOTER); 2865 /* grab footer and its first word */ 2866 strcpy(footer,p); 2867 /* if ((footer_m=linelen-HEADFOOTSKIP)<0) footer_m=0;*/ 2868 if ((footer_m=HEADFOOTSKIP)>linelen) footer_m=0; 2869 /*grabphrase(p);*/ strcpy(foot,phrase); footlen=phraselen; 2870 /* permit variations at end, as for SGI "Page N", but keep minimum length */ 2871 if (footlen>3) footlen--; 2872 la_gets(buf); filterline(buf,plain); if (linelen) strcpy(footer2,plain); 2873 title=1; 2874 (*fn)(ENDFOOTER); tagc=0; 2875 2876 /* if no header on first page, try again after first footer */ 2877 if (!fFoot && *header=='\0') fHead=0; /* this is dangerous */ 2878 fFoot=1; 2879 continue; 2880 } else 2881 /* a lot of work, but only for a few lines (about 4%) */ 2882 if (fFoot && (scnt==0 || scnt+indent>MINMID) && 2883 ( (headlen && strncmp(head,p,headlen)==0) 2884 || strcmp(header2,p)==0 || strcmp(header3,p)==0 2885 || (footlen && strncmp(foot,p,footlen)==0) 2886 || strcmp(footer2,p)==0 2887 /* try to recognize lines with dates and page numbers */ 2888 /* skip into line */ 2889 || (header_m && header_m<linelen && 2890 strncmp(&header[header_m],&p[header_m],HEADFOOTMAX)==0) 2891 || (footer_m && footer_m<linelen && 2892 strncmp(&footer[footer_m],&p[footer_m],HEADFOOTMAX)==0) 2893 /* skip into line allowing for off-by-one */ 2894 || (header_m && header_m<linelen && 2895 strncmp(&header[header_m],&p[header_m+1],HEADFOOTMAX)==0) 2896 || (footer_m && footer_m<linelen && 2897 strncmp(&footer[footer_m],&p[footer_m+1],HEADFOOTMAX)==0) 2898 /* or two */ 2899 || (header_m && header_m<linelen && 2900 strncmp(&header[header_m],&p[header_m+2],HEADFOOTMAX)==0) 2901 || (footer_m && footer_m<linelen && 2902 strncmp(&footer[footer_m],&p[footer_m+2],HEADFOOTMAX)==0) 2903 /* or with reflected odd and even pages */ 2904 || (headlen && headlen<linelen && 2905 strncmp(head,&p[linelen-headlen],headlen)==0) 2906 || (footlen && footlen<linelen && 2907 strncmp(foot,&p[linelen-footlen],footlen)==0) 2908 )) { 2909 tagc=0; title=1; continue; 2910 } 2911 2912 /* page numbers at end of line */ 2913 for(i=0; p[i] && isdigit(p[i]); i++) 2914 /* empty */; 2915 if (&p[i]!=plain && !p[i]) {title=1; fFoot=1; continue;} 2916 } 2917 2918 /*** interline spacing ***/ 2919 /* multiple \n: paragraph mode=>new paragraph, line mode=>blank lines */ 2920 /* need to chop up lines for Roff */ 2921 2922 /*tabgram[scnt]++;*/ 2923 if (title) ncnt=(scnt!=oscnt || (/*scnt<4 &&*/ isupper(*p))); 2924 itabcnt = scnt/5; 2925 if (CurLine==1) {ncnt=0; tagc=0;} /* gobble all newlines before first text line */ 2926 sect = (scnt==0 && isupper(*p)); 2927 subsect = (fSubsections && (scnt==2||scnt==3)); 2928 if ((sect || subsect) && ncnt>1) ncnt=1; /* single blank line between sections */ 2929 (*fn)(BEGINLINE); 2930 if (/*fPara &&*/ ncnt) Pnew=1; 2931 title=0; /*ncnt=0;--moved down*/ 2932 /*if (finTable) (*fn)(BEGINTABLELINE);*/ 2933 oscnt=scnt; /*fotable=finTable;*/ 2934 2935 /* let output modules decide what to do at the start of a paragraph 2936 if (fPara && !Pnew && (prevcmd==BEGINBODY || prevcmd==BEGINBULTXT)) { 2937 putchar(' '); I++; 2938 } 2939 */ 2940 2941 /*** identify structural sections and notify fn */ 2942 2943 /*if (fMan) {*/ 2944 /* bulpair = (scnt<7 && (*p==c_bullet || *p=='-'));*/ 2945 /* decode the below */ 2946 bulpair = ((!auxindent || scnt!=lindent+auxindent) /*!bulpair*/ 2947 && ((scnt>=2 && scnt2>5) || scnt>=5 || (tagc>0 && tags[0].first==scnt) ) /* scnt>=2?? */ 2948 && (((*p==c_bullet || strchr("-+.",*p)!=NULL || falluc) && (ncnt || scnt2>4)) || 2949 (scnt2-s_avg>=2 && phrase[phraselen-1]!='.') || 2950 (scnt2>3 && s_cnt==1) 2951 )); 2952 if (bulpair) { 2953 if (tagc>0 && tags[0].first==scnt) { 2954 k=tags[0].last; 2955 for (l=1; l<tagc; l++) { 2956 if (tags[l].first - k <=3) 2957 k=tags[l].last; 2958 else break; 2959 } 2960 phraselen=k-scnt; 2961 for (k=phraselen; plain[k]==' ' && k<linelen; k++) /* nothing */; 2962 if (k>=5 && k<linelen) hanging=k; else hanging=-1; 2963 } else if (scnt2) hanging=phraselen+scnt2; 2964 else hanging=5; 2965 } else hanging=0; 2966 2967 /* hanging = bulpair? phraselen+scnt2 : 0;*/ 2968 /*if (bulpair) printf("hanging = %d\n", hanging);*/ 2969 /* maybe, bulpair=0 would be best */ 2970 /*end fMan}*/ 2971 2972 /* certain sections (subsections too?) special, like SEE ALSO */ 2973 /* to make canonical name as plain, all lowercase */ 2974 if (sect /*||subsect -- no widespread subsection names*/) { 2975 for (j=0; (sectheadid=j)<RANDOM; j++) if (strcoloncmp2(plain,'\0',sectheadname[j],0)) break; 2976 } 2977 2978 /* normalized section headers are put into mixed case */ 2979 if (/*fNORM &&*/falluc && (sect || subsect)) casify(plain); 2980 2981 if (sect) { 2982 poppush(BEGINSECTION); (*fn)(BEGINSECTHEAD); 2983 tocadd(plain, BEGINSECTION, CurLine); 2984 } else if (subsect && !osubsect) { 2985 poppush(BEGINSUBSECTION); (*fn)(BEGINSUBSECTHEAD); 2986 tocadd(plain, BEGINSUBSECTION, CurLine); 2987 } else if (bulpair) { 2988 /* used to be just poppush(BEGINBULPAIR); */ 2989 if (!Pbp) poppush(BEGINBULPAIR); 2990 (poppush)(BEGINBULLET); 2991 fIP=1; /*grabphrase(plain);*/ 2992 } else if (Pnew) { 2993 poppush(BEGINBODY); 2994 } 2995 Pnew=0; 2996 oldsectheadid = sectheadid; 2997 2998 2999 /* move change bars to left */ 3000 if (fChangeleft && !fNORM) { 3001 if (fPara) (*fn)(CHANGEBAR); 3002 /* replace initial spaces with changebars */ 3003 else for (i=0; i<ccnt; i++) { /*xputchar('|'); */ (*fn)(CHANGEBAR); } 3004 } 3005 3006 /* show initial spaces */ 3007 if (!fIQS && fcharout) { 3008 spaceout = (scnt>ccnt)?(scnt-ccnt):0; 3009 if (fILQS) { if (spaceout>=lindent) spaceout-=lindent; else spaceout=0; } 3010 if (auxindent) { if (spaceout>=auxindent) spaceout-=auxindent; else spaceout=0; } 3011 if (fNORM) { 3012 if (itabcnt>0) (*fn)(ITAB); 3013 for (i=0; i<(scnt%5); i++) putchar(' '); 3014 } else printf("%*s",spaceout,""); 3015 } 3016 3017 3018 /*** iterate over each character in line, ***/ 3019 /*** handling underlining, tabbing, copyrights ***/ 3020 3021 off=(!fIQS&&!fPara)?scnt:0; 3022 for (i=0, p=plain, curtag=0, fcont=0; *p; p++,i++,fcont=0) { 3023 /* interspersed presentation signals */ 3024 /* start tags in reverse order of addition (so structural first) */ 3025 if (curtag<tagc && i+I0+off==tags[curtag].first) { 3026 for (r=hitxt, j=tags[curtag].last-tags[curtag].first, hitxt[j]='\0'; j; j--) 3027 hitxt[j-1]=p[j-1]; 3028 (*fn)(tagbeginend[tags[curtag].type][0]); 3029 } 3030 3031 /* special characters */ 3032 switch(*p) { 3033 case '"': 3034 if (p==plain || isspace(p[-1])) { (*fn)(CHARLQUOTE); fcont=1; } 3035 else if (isspace(p[1])) { (*fn)(CHARRQUOTE); fcont=1; } 3036 break; 3037 case '\'': 3038 if (p==plain || isspace(p[-1])) { (*fn)(CHARLSQUOTE); fcont=1; } 3039 else if (isspace(p[1])) { (*fn)(CHARRSQUOTE); fcont=1; } 3040 break; 3041 case '-': 3042 /* check for -opt => \-opt */ 3043 if (p==plain || (isspace(p[-1]) && !isspace(p[1]))) { 3044 (*fn)(CHARDASH); fcont=1; 3045 } 3046 break; 3047 } 3048 3049 /* troublemaker characters */ 3050 c = (*p)&0xff; 3051 if (!fcont && fcharout) { 3052 if (strchr(escchars,c)!=NULL) { 3053 putchar('\\'); putchar(c); I++; 3054 } else if (strchr(trouble,c)!=NULL) { 3055 (*fn)(c); fcont=1; 3056 } else { 3057 putchar(c); I++; 3058 } 3059 } 3060 3061 /*default:*/ 3062 if (curtag<tagc && i+I0+off+1==tags[curtag].last) { 3063 (*fn)(tagbeginend[tags[curtag].type][1]); 3064 curtag++; 3065 } 3066 3067 if (fIP && ((*p==' ' && i==phraselen) || *p=='\0')) { 3068 p++; /* needed but why? */ 3069 (*fn)(ENDBULLET); fIP=0; 3070 if (*p!='\0') { 3071 /*oscnt+=phraselen;*/ 3072 oscnt+=i; 3073 for (r=p; *r==' '; r++) { 3074 oscnt++; 3075 /* 3076 i++; 3077 if (fQS || !fcharout) p++; 3078 */ 3079 } 3080 } 3081 p--; /* to counteract increment in loop */ 3082 3083 poppush(BEGINBULTXT); 3084 } 3085 } 3086 3087 3088 /*** end of line in buf[] ***/ 3089 /*** deal with section titles, hyperlinks ***/ 3090 3091 if (sect) { (*fn)(ENDSECTHEAD); Pnew=1; } 3092 else if (subsect) { (*fn)(ENDSUBSECTHEAD); Pnew=1; } 3093 else if (fIP) { (*fn)(ENDBULLET); fIP=0; poppush(BEGINBULTXT); } 3094 /* oscnt not right here */ 3095 else if (scnt+linelen+spcsqz<MINRM /*&& ncnt*/ && lookahead!='\n' 3096 && prevcmd!=BEGINBULTXT && prevcmd!=ENDSUBSECTHEAD && prevcmd!=ENDSUBSECTHEAD) 3097 (*fn)(SHORTLINE); 3098 osubsect=subsect; 3099 3100 /*if (finTable) (*fn)(ENDTABLELINE);*/ 3101 /*if (!fPara)*/ (*fn)(ENDLINE); tagc=0; 3102 ncnt=0; 3103 I0=I; /* save I here in case skipping lines screws it up */ 3104 } 3105 3106 /* wrap up at end */ 3107 pop(ENDDOC); /* clear up all tags on stack */ 3108 (*fn)(ENDDOC); 3109 } 3110 3111 3112 3113 /* 3114 * SOURCE CODE PARSING 3115 * for better transcription short of full nroff interpreter 3116 * 3117 * Macros derived empirically, except for weird register ones that were looked up in groff 3118 * 3119 * buffer usage 3120 * buf = incoming text from man page file 3121 * plain = "second pass" buffer used to identify man page references 3122 * 3123 * test pages 3124 * Solaris: fdisk.1m, fcntl.2, curs_getwch.3x, locale.5 (numbered lists), 3125 * getservbyname.3n (font size changes) 3126 */ 3127 3128 /* macros */ 3129 /* 3130 /* put as much in here, as opposed to in code, as possible. 3131 less expensive and here they can be overridden by other macros */ 3132 /*const int macromax=100; -- dumb compiler */ 3133 #define MACROMAX 1000 3134 struct { char *key; char *subst; } macro[MACROMAX] = { 3135 /* Solaris */ 3136 {"NA", ".SH NAME"}, 3137 {"SB", "\\s-2\\fB\\$1\\fR\\s0"}, 3138 /* HP-UX */ 3139 /* 3140 {"SM", "\\s-2\\$1\\s0"}, 3141 {"C", "\\f3\\$1\\fR"}, 3142 {"CR", "\\f3\\$1\\fR\\$2"}, 3143 {"CI", "\\f3\\$1\\fI\\$2\\fR"}, 3144 {"RC", "\\fR\\$1\\f3\\$2\\fR"}, 3145 */ 3146 /* SGI -- doesn't ship man page source */ 3147 3148 /* 4.4BSD - http://intergate.sonyinteractive.com/cgi-bin/manlink/7/mdoc */ 3149 /* scads more, but for them definition in -mandoc is sufficient */ 3150 /* 3151 {"Dt", ".TH \\$1 \\$2 \\$3"}, 3152 {"Sh", ".SH \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9"}, 3153 {"Ss", ".SS \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9"}, 3154 {"Pp", ".P"}, 3155 {"Nm", ".BR \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9"}, /* name * / 3156 {"Ar", ".IR \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9"}, 3157 */ 3158 { NULL, NULL } 3159 }; 3160 /* what all can be represented as a macro? */ 3161 int macrocnt=-1; /* length of table counted at start */ 3162 3163 char *macnotfound[MACROMAX]; 3164 int macnotcnt=0; 3165 3166 #define SUBSTMAX 1000 3167 /* "*" or "*(" prefixed */ 3168 struct { char *key; char *subst; } subst[SUBSTMAX] = { 3169 {"rq", "'"}, {"lq", "`"}, {"L\"", "``"}, {"R\"", "''"}, 3170 {"L", "\\f3"}, {"E", "\\f2"}, {"V", "\\f4"}, {"O", "\\f1"} 3171 }; 3172 int substcnt=8; 3173 3174 #define REGISTERMAX 1000 3175 struct { char *name; char *value; } reg[REGISTERMAX]; 3176 int regcnt=0; 3177 3178 /* special characters */ 3179 const struct { char key[4]; unsigned char subst[4]; } spec[] = { 3180 { "**", "*" }, 3181 { "+-", { CHARPLUSMINUS, '\0' }}, 3182 { "12", { CHAR12, '\0' }}, 3183 { "14", { CHAR14, '\0' }}, 3184 { "34", { CHAR34, '\0' }}, 3185 { "aa", { CHARACUTE, '\0' }}, 3186 { "ap", "~" }, 3187 { "br", "|" }, 3188 { "bu", { CHARBULLET, '\0' }}, 3189 { "bv", "|" }, 3190 { "ci", "O" }, 3191 { "co", { CHARCOPYR, '\0' }}, 3192 { "ct", { CHARCENT, '\0' }}, 3193 { "de", { CHARDEG, '\0' }}, 3194 { "dg", { CHARDAGGER, '\0' }}, 3195 { "di", { CHARDIV, '\0' }}, 3196 { "em", "--" }, 3197 { "eq", "=" }, 3198 { "hy", "-" }, 3199 { "mi", "-" }, 3200 { "mu", { CHARMUL, '\0' }}, 3201 { "no", { CHARNOT, '\0' }}, 3202 { "or", "|" }, 3203 { "pl", "+" }, 3204 { "rg", { CHARREGTM, '\0' }}, 3205 { "ru", "_" }, 3206 { "sc", { CHARSECT, '\0' }}, 3207 { "sl", "/" }, 3208 { "ua", "^" }, 3209 { "ul", "_" } 3210 }; 3211 #define speccnt (sizeof spec / sizeof spec[0]) 3212 3213 /* tbl line descriptions */ 3214 char *tbl[20][20]; /* space enough for twenty description lines, twenty parts each */ 3215 int tblc=0; 3216 int tbli; 3217 3218 int fsourceTab = 0, fosourceTab=0; 3219 int supresseol=0; 3220 int finitDoc=0; 3221 int sublevel=0; 3222 3223 char * 3224 source_gets(void) { 3225 char *p,*q; 3226 char *ret = (*in)?buf:NULL; 3227 int i; 3228 char tmpbuf[MAXBUF]; 3229 char name[3]; 3230 3231 if (!sublevel) AbsLine++; 3232 3233 p = tmpbuf; 3234 falluc = 1; 3235 while (1) { 3236 /* collect characters in line */ 3237 while (*in && *in!='\n') { 3238 if (p[0]=='\\' && p[1]=='\n') p+=2; /* \<newline> */ 3239 falluc = falluc && !islower(*in); 3240 *p++ = *in++; 3241 } 3242 if (*in) in++; 3243 *p='\0'; 3244 3245 /* normalize commands */ 3246 p=tmpbuf; q=buf; /* copy from tmpbuf to buf */ 3247 /* no spaces between command-initiating period and command letters */ 3248 if (*p=='\'') { *p='.'; } /* what's the difference? */ 3249 if (*p=='.') { *q++ = *p++; while (isspace(*p)) p++; } 3250 3251 3252 /* convert lines with tabs to tables? */ 3253 fsourceTab=0; 3254 3255 /* if comment at start of line, OK */ 3256 /* dynamically determine iff Tcl/Tk page by scanning comments */ 3257 if (*p=='\\' && *(p+1)=='"') { 3258 if (!fTclTk && strstr(p+1,"supplemental macros used in Tcl/Tk")!=NULL) fTclTk=1; 3259 p+=2; 3260 } 3261 3262 while (*p) { 3263 if (*p=='\t') fsourceTab++; 3264 if (*p=='\\') { 3265 p++; 3266 if (*p=='n') { 3267 p++; 3268 if (*p=='(') { 3269 p++; name[0]=*p++; name[1]=*p++; name[2]='\0'; 3270 } else { 3271 name[0]=*p++; name[1]='\0'; 3272 } 3273 *q='0'; *(q+1)='\0'; /* defaults to 0, in case doesn't exist */ 3274 for (i=0; i<regcnt; i++) { 3275 if (strcmp(reg[i].name,name)==0) { 3276 strcpy(q, reg[i].value); 3277 break; 3278 } 3279 } 3280 q+=strlen(q); 3281 } else if (*p=='"') { /* comment in Digital UNIX, OK elsewhere? */ 3282 *p='\0'; 3283 q--; while (q>buf && isspace(*q)) q--; /* trim tailing whitespace */ 3284 q++; *q='\0'; 3285 } else { 3286 /* verbatim character (often a backslash) */ 3287 *q++ = '\\'; /* postpone interpretation (not the right thing but...) */ 3288 *q++ = *p++; 3289 } 3290 } else *q++ = *p++; 3291 } 3292 3293 /* dumb Digital--later */ 3294 /*if (q-3>plain && q[-1]=='{' && q[-2]=='\\' && q[-3]==' ') q[-3]='\n';*/ 3295 3296 /* close off buf */ 3297 *q='\0'; 3298 3299 /*if (q>buf && q[-1]=='\\' && *in=='.') { /* append next line * /} else break;*/ 3300 break; 3301 } 3302 3303 /*printf("*ret = |%s|\n", ret!=NULL?ret : "NULL");*/ 3304 return ret; 3305 } 3306 3307 3308 /* dump characters from buffer, signalling right tags along the way */ 3309 /* all this work to introduce an internal second pass to recognize man page references */ 3310 /* now for HTTP references too */ 3311 3312 int sI=0; 3313 /* use int linelen from up top */ 3314 int fFlush=1; 3315 3316 void 3317 source_flush(void) { 3318 int i,j; 3319 char *p,*q,*r; 3320 int c; 3321 int manoff,posn; 3322 3323 if (!sI) return; 3324 plain[sI] = '\0'; 3325 3326 /* flush called often enough that all man page references are at end of text to be flushed */ 3327 /* find man page ref */ 3328 if (sI>=4/*+1*/ && (plain[sI-(manoff=1)-1]==')' || plain[sI-(manoff=0)-1]==')')) { 3329 for (q=&plain[sI-manoff-1-1]; q>plain && isalnum(*q) && *q!='('; q--) /* nada */; 3330 if (*q=='(' && strcoloncmp(&q[1],')',vollist)) { 3331 r=q-1; 3332 if (*r==' ' && (sectheadid==SEEALSO || /*single letter volume */ *(q+2)==')' || *(q+3)==')')) r--; /* permitted single intervening space */ 3333 for ( ; r>=plain && (isalnum(*r) || strchr(manvalid,*r)!=NULL); r--) /* nada */; 3334 r++; 3335 if (isalpha(*r) && r<q) { 3336 /* got one: clear out tags and spaces to make normalized form */ 3337 posn = r-plain; 3338 /*while (tagc && tags[tagc-1].first >= posn) tagc--;*/ 3339 3340 /* add MANREF tags */ 3341 strcpy(hitxt,r); 3342 tagadd(BEGINMANREF, posn, 0); 3343 /* already generated other start tags, so move BEGINMANREF to start in order to be well nested (ugh) */ 3344 tagtmp = tags[tagc-1]; for (j=tagc-1; j>0; j--) tags[j]=tags[j-1]; tags[0]=tagtmp; 3345 tagadd(ENDMANREF, sI-manoff-1+1, 0); 3346 } 3347 } 3348 3349 /* HTML hyperlinks */ 3350 } else if (fURL && sI>=4 && (p=strstr(plain,"http"))!=NULL) { 3351 i = p-plain; 3352 tagadd(BEGINMANREF, i, 0); tagtmp = tags[tagc-1]; for (j=tagc-1; j>0; j--) tags[j]=tags[j-1]; tags[0]=tagtmp; 3353 for (j=0; i<sI && !isspace(*p) && *p!='"' && *p!='>'; i++,j++) hitxt[j] = *p++; 3354 hitxt[j]='\0'; 3355 tagadd(ENDMANREF, i, 0); 3356 } 3357 3358 if (!fFlush) return; 3359 3360 /* output text */ 3361 for (i=0,j=0,p=plain; i<sI && *p; i++,p++) { 3362 if (!linelen) (*fn)(BEGINLINE); /* issue BEGINLINE when know will be chars on line */ 3363 3364 /* dump tags */ 3365 /*for ( ; j<tagc && tags[j].first == i; j++) (*fn)(tags[j].type);*/ 3366 for (j=0; j<tagc; j++) if (tags[j].first == i) (*fn)((enum command)tags[j].type); 3367 3368 /* dump text */ 3369 c = (*p)&0xff; /* just make c unsigned? */ 3370 if (strchr(escchars,c)!=NULL) { 3371 xputchar('\\'); xputchar(c); 3372 if (fcharout) linelen++; 3373 } else if (strchr(trouble,c)!=NULL) { 3374 (*fn)(c); 3375 } else if (linelen>=LINEBREAK && c==' ') { (*fn)(ENDLINE); linelen=0; 3376 } else { /* normal character */ 3377 xputchar(c); 3378 if (fcharout) linelen++; 3379 } 3380 3381 /*if (linelen>=LINEBREAK && c==' ') { (*fn)(ENDLINE); linelen=0; } -- leaves space at end of line*/ 3382 } 3383 /* dump tags at end */ 3384 /*for ( ; j<tagc && tags[j].first == sI; j++) (*fn)(tags[j].type);*/ 3385 for (j=0; j<tagc; j++) if (tags[j].first==sI) (*fn)((enum command)tags[j].type); 3386 3387 sI=0; tagc=0; 3388 } 3389 3390 3391 /* source_out stuffs characters in a buffer */ 3392 char * 3393 source_out0(char *p, char end) { 3394 /* stack of character formattings */ 3395 static enum tagtype styles[20]; 3396 static int style=-1; 3397 int funwind=0; 3398 int i, j; 3399 int len; 3400 int sign; 3401 3402 while (*p && *p!=end) { 3403 if (*p=='\\') { /* escape character */ 3404 switch (*++p) { 3405 3406 case '&': /* no space. used as a no-op sometimes */ 3407 case '^': /* 1/12 em space */ 3408 case '|': /* 1/6 em space */ 3409 case '%': /* hyphenation indicator */ 3410 /* just ignore it */ 3411 p++; 3412 break; 3413 case '0': /* digit width space */ 3414 p++; 3415 sputchar(' '); 3416 break; 3417 case ' ': /* unpaddable space */ 3418 stagadd(CHARNBSP); /* nonbreaking space */ 3419 /*sputchar(' ');*/ 3420 p++; 3421 break; 3422 case 's': /* font size change */ 3423 p++; 3424 sign=1; 3425 if (*p=='-' || *p=='+') if (*p++=='-') sign=-1; 3426 intArg = sign * ((*p++)-'0'); 3427 if (intArg==0) intArg = -fontdelta; /* s0 returns to normal height */ 3428 if (fontdelta || intArg) { source_flush(); (*fn)(FONTSIZE); } 3429 break; 3430 case 'v': /* vertical motion */ 3431 case 'h': /* horizontal motion */ 3432 case 'L': /* vertical line */ 3433 case 'l': /* horizontal line */ 3434 /* ignore */ 3435 p++; 3436 if (*p=='\'') { p++; while (*p++!='\''); } 3437 break; 3438 case '"': /* comment */ 3439 *p='\0'; /* rest of line is comment */ 3440 break; 3441 case 'f': 3442 p++; 3443 switch (*p++) { 3444 case '3': case 'B': /* boldface */ 3445 styles[++style] = BOLD; 3446 stagadd(BEGINBOLD); 3447 break; 3448 case '2': case 'I': /* italics */ 3449 styles[++style] = ITALICS; 3450 stagadd(BEGINITALICS); 3451 break; 3452 case '4': /* bolditalics mode => program code */ 3453 styles[++style] = BOLDITALICS; 3454 stagadd(BEGINBOLDITALICS); 3455 break; 3456 case '1': case '0': case 'R': case 'P': /* back to Roman */ 3457 /*sputchar(' '); -- taken out; not needed, I hope */ 3458 funwind=1; 3459 break; 3460 case '-': 3461 p++; 3462 break; 3463 } 3464 break; 3465 case '(': /* multicharacter macros */ 3466 p++; 3467 for (i=0; i<speccnt; i++) { 3468 if (p[0]==spec[i].key[0] && p[1]==spec[i].key[1]) { 3469 p+=2; 3470 for (j=0; spec[i].subst[j]; j++) sputchar(spec[i].subst[j]); 3471 break; 3472 } 3473 } 3474 break; 3475 case '*': /* strings */ 3476 p++; 3477 if (*p!='(') { /* single character */ 3478 for (i=0; i<substcnt; i++) { 3479 if (*p==subst[i].key[0] && subst[i].key[1]=='\0') { 3480 source_out0(subst[i].subst,'\0'); 3481 break; 3482 } 3483 } 3484 p++; 3485 } else { /* multicharacter macros */ 3486 p++; 3487 for (i=0; i<substcnt; i++) { 3488 len = strlen(subst[i].key); 3489 if (strncmp(p,subst[i].key,len)==0) { 3490 source_out0(subst[i].subst,'\0'); 3491 p+=len; 3492 break; 3493 } 3494 } 3495 } 3496 break; 3497 /*------------------ 3498 } else if (*p=='|') { 3499 stagadd(CHARNBSP); /* nonbreaking space * / 3500 /*sputchar(' ');* / 3501 p++; 3502 -------------------*/ 3503 case 'e': /* escape */ 3504 sputchar('\\'); 3505 p++; 3506 break; 3507 case 'c': 3508 /* supress following carriage return-induced space */ 3509 /* handled in source_gets(); ignore within line => can't because next line might start with a command */ 3510 supresseol=1; 3511 p++; 3512 break; 3513 case '-': /* minus sign */ 3514 sputchar(CHARDASH); 3515 p++; 3516 break; 3517 /*----------------------- 3518 } else if (*p=='^') { 3519 /* end stylings? (found in Solaris) * / 3520 p++; 3521 -------------------*/ 3522 default: /* unknown escaped character */ 3523 sputchar(*p++); 3524 } 3525 3526 } else { /* normal character */ 3527 if (*p) sputchar(*p++); 3528 } 3529 3530 3531 /* unwind character formatting stack */ 3532 if (funwind) { 3533 for ( ; style>=0; style--) { 3534 if (styles[style]==BOLD) stagadd(ENDBOLD); 3535 else if (styles[style]==ITALICS) stagadd(ENDITALICS); 3536 else stagadd(ENDBOLDITALICS); 3537 } /* else error */ 3538 assert(style==-1); 3539 3540 funwind=0; 3541 } 3542 3543 /* check for man page reference and flush buffer if safe */ 3544 /* postpone check until after following character so catch closing tags */ 3545 if ((sI>=4+1 && plain[sI-1-1]==')') || 3546 /* (plain[sI-1]==' ' && (q=strchr(plain,' '))!=NULL && q<&plain[sI-1])) {*/ 3547 (plain[sI-1]==' ' && !isalnum(plain[sI-1-1]))) { 3548 /* regardless, flush buffer */ 3549 source_flush(); 3550 } 3551 } 3552 3553 if (*p && *p!=' ') p++; /* skip over end character */ 3554 return p; 3555 } 3556 3557 /* oh, for function overloading. inlined by compiler, probably */ 3558 char *source_out(char *p) { 3559 return source_out0(p,'\0'); 3560 } 3561 3562 3563 char * 3564 source_out_word(char *p) { 3565 char end = ' '; 3566 3567 while (*p && isspace(*p)) p++; 3568 if (*p=='"' /* || *p=='`' ? */) { 3569 end = *p; 3570 p++; 3571 } 3572 p = source_out0(p,end); 3573 /*while (*p && isspace(*p)) p++;*/ 3574 return p; 3575 } 3576 3577 3578 void 3579 source_struct(enum command cmd) { 3580 source_out("\\fR\\s0"); /* don't let run-on stylings run past structural units */ 3581 source_flush(); 3582 if (cmd==SHORTLINE) linelen=0; 3583 (*fn)(cmd); 3584 } 3585 3586 #define checkcmd(str) strcmp(cmd,str)==0 3587 3588 int finnf=0; 3589 3590 void source_line(char *p); 3591 void 3592 source_subfile(char *newin) { 3593 char *p; 3594 char *oldin = in; 3595 3596 sublevel++; 3597 3598 in = newin; 3599 while ((p=source_gets())!=NULL) { 3600 source_line(p); 3601 } 3602 in = oldin; 3603 3604 sublevel--; 3605 } 3606 3607 /* have to delay acquisition of list tag */ 3608 void 3609 source_list(void) { 3610 static int oldlisttype; /* OK to have just one because nested lists done with RS/RE */ 3611 char *q; 3612 int i; 3613 3614 /* guard against empty bullet */ 3615 for (i=0, q=plain; i<sI && isspace(*q); q++,i++) /* empty */; 3616 if (i==sI) return; 3617 3618 assert(finlist); 3619 3620 fFlush=1; 3621 3622 /* try to determine type of list: DL, OL, UL */ 3623 q=plain; plain[sI]='\0'; 3624 if (/*c==CHARBULLET || q=='-' -- command line opts! ||*/ *q=='.' || *q&0x80) { 3625 listtype=UL; 3626 q++; 3627 } else { 3628 if (strchr("[(",*q)) q++; 3629 while (isdigit(*q)) { listtype=OL; q++; } /* I hope this gives the right number */ 3630 if (*q=='.') q++; 3631 if (strchr(")]",*q)) q++; 3632 if (*q=='.') q++; 3633 while (isspace(*q)) q++; 3634 if (*q) listtype=DL; 3635 } 3636 oldlisttype = listtype; 3637 3638 /* interpretation left to output formats based on listtype (HTML: DL, OL, UL) */ 3639 i = sI; sI=0; 3640 if (!Pbp || listtype!=oldlisttype) poppush(BEGINBULPAIR); 3641 poppush(BEGINBULLET); 3642 /*if (tphp) source_line(p); else source_out_word(p);*/ 3643 /*if (listtype!=OL && listtype!=UL)*/ sI=i; 3644 source_struct(ENDBULLET); Pbul=0; /* handled immediately below */ 3645 poppush(BEGINBULTXT); 3646 3647 finlist=0; 3648 } 3649 3650 static int inComment=0; 3651 static int isComment=0; 3652 3653 void 3654 source_command(char *p) { 3655 static int lastif=1; 3656 int mylastif; 3657 char *cmd=p; 3658 char *q; 3659 int i,j,endch; 3660 int fid; 3661 struct stat fileinfo; 3662 char *sobuf; 3663 char *macroArg[9]; 3664 char endig[10]; 3665 int err=0; 3666 char ch; 3667 int tphp=0; 3668 int ie=0; 3669 int cond,invcond=0; 3670 char delim,op; 3671 char if0[80], if1[80]; 3672 float nif0, nif1; 3673 int insertat; 3674 char macrobuf[MAXBUF]; /* local so can have nested macros */ 3675 static char ft='\0'; 3676 static int fTableCenter=0; 3677 3678 /* should centralize command matching (binary search?), pointer bumping here 3679 if for no other reason than to catch conflicts -- and allow overrides? */ 3680 /* parse out command */ 3681 while (*p && !isspace(*p)) p++; 3682 if (*p) { *p='\0'; p++; } 3683 /* should set up argv, argc for command arguments--it's regular enough that everyone doesn't have to do it itself */ 3684 3685 if (isComment) { 3686 /* maybe have BEGINCOMMENT, ENDCOMMENT, COMMENTLINE */ 3687 supresseol=0; 3688 if (!inComment) { source_flush(); source_struct(BEGINCOMMENT); inComment=1; } 3689 source_struct(COMMENTLINE); 3690 printf("%s\n", p); /* could contain --> or other comment closer, but unlikely */ 3691 3692 /* structural commands */ 3693 } else if (checkcmd("TH")) { 3694 /* sample: .TH CC 1 "Dec 1990" */ 3695 /* overrides command line -- should fix this */ 3696 if (!finitDoc) { 3697 while (isspace(*p)) p++; 3698 if (*p) { 3699 /* name */ 3700 q=strchr(p, ' '); if (q!=NULL) *q++='\0'; 3701 strcpy(manName, p); 3702 /* number */ 3703 p = q; 3704 if (p!=NULL) { 3705 while (isspace(*p)) p++; 3706 if (*p) { q=strchr(p,' '); if (q!=NULL) *q++='\0'; } 3707 } 3708 strcpy(manSect, p!=NULL? p: "?"); 3709 } 3710 sI=0; 3711 finitDoc=1; 3712 (*fn)(BEGINDOC); 3713 /* emit information in .TH line? */ 3714 } /* else complain about multiple definitions? */ 3715 3716 } else if (checkcmd("SH") || checkcmd("Sh")) { /* section title */ 3717 while (indent) { source_command("RE"); } 3718 source_flush(); 3719 3720 pop(BEGINSECTION); /* before reset sectheadid */ 3721 3722 if (*p) { 3723 if (*p=='"') { p++; q=p; while (*q && *q!='"') q++; *q='\0'; } 3724 finnf=0; 3725 for (j=0; (sectheadid=j)<RANDOM; j++) if (strcoloncmp2(p,'\0',sectheadname[j],0)) break; 3726 if (!finitDoc) { 3727 /* handle missing .TH */ 3728 /* if secthead!=NAME -- insist on this? 3729 fprintf(stderr, "Bogus man page: no .TH or \".SH NAME\" lines\n"); 3730 exit(1); 3731 */ 3732 (*fn)(BEGINDOC); 3733 finitDoc=1; 3734 } 3735 poppush(BEGINSECTION); source_struct(BEGINSECTHEAD); 3736 fFlush=0; 3737 if (falluc) casify(p); 3738 source_out(p); /* people forget the quotes around multiple words */ 3739 while (isspace(plain[--sI])) /*nada*/; 3740 plain[++sI]='\0'; tocadd(plain, BEGINSECTION, CurLine); /* flushed with source_struct above */ 3741 fFlush=1; 3742 source_struct(ENDSECTHEAD); 3743 } 3744 } else if (checkcmd("SS")) { /* subsection title */ 3745 while (indent) { source_command("RE"); } 3746 source_flush(); 3747 3748 if (*p) { 3749 if (*p=='"') { p++; q=p; while (*q && *q!='"') q++; *q='\0'; } 3750 finnf=0; 3751 source_flush(); 3752 poppush(BEGINSUBSECTION); source_struct(BEGINSUBSECTHEAD); 3753 fFlush=0; 3754 3755 if (falluc) casify(p); 3756 source_out(p); /* people forget the quotes around multiple words */ 3757 while (isspace(plain[--sI])) /*nada*/; 3758 plain[++sI]='\0'; tocadd(plain, BEGINSUBSECTION, CurLine); 3759 fFlush=1; 3760 source_struct(ENDSUBSECTHEAD); 3761 } 3762 3763 } else if (checkcmd("P") || checkcmd("PP") || checkcmd("LP")) { /* new paragraph */ 3764 source_flush(); 3765 poppush(BEGINBODY); 3766 3767 } else if ((tphp=checkcmd("TP")) || (tphp=checkcmd("HP")) || checkcmd("IP") || checkcmd("LI")) { 3768 /* TP, HP: indented paragraph, tag on next line (DL list) */ 3769 /* IP, LI: tag as argument */ 3770 source_flush(); 3771 fFlush=0; 3772 finlist=1; 3773 if (!tphp) { source_out_word(p); source_list(); } 3774 /* lists terminated only at start of non-lists */ 3775 } else if (checkcmd("RS")) { /* set indent */ 3776 source_struct(BEGININDENT); 3777 indent++; 3778 } else if (checkcmd("RE")) { 3779 if (indent) indent--; 3780 pop(ENDINDENT); 3781 source_struct(ENDINDENT); 3782 /* 3783 } else if (checkcmd("Xr")) { 3784 /* 4.4BSD man ref * / 3785 supresseol=0; 3786 p=source_out_word(p); 3787 source_out("("); 3788 p=source_out_word(p); 3789 source_out(")"); 3790 */ 3791 3792 } else if (checkcmd("nf")) { 3793 source_struct(SHORTLINE); 3794 finnf=1; 3795 source_struct(BEGINCODEBLOCK); 3796 } else if (checkcmd("fi")) { 3797 source_struct(ENDCODEBLOCK); 3798 finnf=0; 3799 } else if (checkcmd("br")) { 3800 source_struct(SHORTLINE); 3801 } else if (checkcmd("sp") || checkcmd("SP")) { /* blank lines */ 3802 /*if (!finTable) {*/ 3803 if (finnf) source_struct(SHORTLINE); else source_struct(BEGINBODY); 3804 /*}*/ 3805 } else if (checkcmd("ta")) { /* set tab stop(s?) */ 3806 /* argument is tab stop -- handle these as tables => leave to output format */ 3807 /* HTML handles tables but not tabs, Tk's text tabs but not tables */ 3808 /* does cause a linebreak */ 3809 stagadd(BEGINBODY); 3810 } else if (checkcmd("ce")) { 3811 /* get line count, recursively filter for that many lines */ 3812 if (sscanf(p, "%d", &i)) { 3813 source_struct(BEGINCENTER); 3814 for (; i>0 && (p=source_gets())!=NULL; i--) source_line(p); 3815 source_struct(ENDCENTER); 3816 } 3817 3818 /* limited selection of control structures */ 3819 } else if (checkcmd("if") || (checkcmd("ie"))) { /* if <test> cmd, if <test> command and else on next line */ 3820 supresseol=1; 3821 ie = checkcmd("ie"); 3822 mylastif=lastif; 3823 3824 if (*p=='!') { invcond=1; p++; } 3825 3826 if (*p=='n') { cond=1; p++; } /* masquerading as nroff the right thing to do? */ 3827 else if (*p=='t') { cond=0; p++; } 3828 else if (*p=='(' || *p=='-' || *p=='+' || isdigit(*p)) { 3829 if (*p=='(') p++; 3830 nif0=atof(p); 3831 if (*p=='-' || *p=='+') p++; while (isdigit(*p)) p++; 3832 op = *p++; /* operator: =, >, < */ 3833 if (op==' ') { 3834 cond = (nif0!=0); 3835 } else { 3836 nif1=atoi(p); 3837 while (isdigit(*p)) p++; 3838 if (*p==')') p++; 3839 if (op=='=') cond = (nif0==nif1); 3840 else if (op=='<') cond = (nif0<nif1); 3841 else /* op=='>' -- ignore >=, <= */ cond = (nif0>nif1); 3842 } 3843 } else if (!isalpha(*p)) { /* usually quote, ^G in Digital UNIX */ 3844 /* gobble up comparators between delimiters */ 3845 delim = *p++; 3846 q = if0; while (*p!=delim) { *q++=*p++; } *q='\0'; p++; 3847 q = if1; while (*p!=delim) { *q++=*p++; } *q='\0'; p++; 3848 cond = (strcmp(if0,if1)==0); 3849 } else cond=0; /* a guess, seems to be right bettern than half the time */ 3850 if (invcond) cond=1-cond; 3851 while (isspace(*p)) p++; 3852 3853 lastif = cond; 3854 if (strncmp(p,"\\{",2)==0) { /* rather than handle groups here, have turn on/off output flag? */ 3855 p+=2; while (isspace(*p)) p++; 3856 while (strncmp(p,".\\}",3)!=0 || strncmp(p,"\\}",2)!=0 /*Solaris*/) { 3857 if (cond) source_line(p); 3858 if ((p=source_gets())==NULL) break; 3859 } 3860 } else if (cond) source_line(p); 3861 3862 if (ie) source_line(source_gets()); /* do else part with prevailing lastif */ 3863 3864 lastif=mylastif; 3865 3866 } else if (checkcmd("el")) { 3867 mylastif=lastif; 3868 3869 /* should centralize gobbling of groups */ 3870 cond = lastif = !lastif; 3871 if (strncmp(p,"\\{",2)==0) { 3872 p+=2; while (isspace(*p)) p++; 3873 while (strncmp(p,".\\}",3)!=0 || strncmp(p,"\\}",2)!=0 /*Solaris*/) { 3874 if (cond) source_line(p); 3875 if ((p=source_gets())==NULL) break; 3876 } 3877 } else if (cond) source_line(p); 3878 3879 lastif=mylastif; 3880 3881 } else if (checkcmd("ig")) { /* "ignore group" */ 3882 strcpy(endig,".."); if (*p) { endig[0]='.'; strcpy(&endig[1],p); } 3883 while ((p=source_gets())!=NULL) { 3884 if (strcmp(p,endig)==0) break; 3885 if (!lastif) source_line(p); /* usually ignore line, except in one weird case */ 3886 } 3887 3888 3889 /* macros and substitutions */ 3890 } else if (checkcmd("de")) { 3891 /* grab key */ 3892 q=p; while (*q && !isspace(*q)) q++; *q='\0'; 3893 3894 /* if already have a macro of that name, override it */ 3895 /* could use a good dictionary class */ 3896 for (insertat=0; insertat<macrocnt; insertat++) { 3897 if (strcmp(p,macro[insertat].key)==0) break; 3898 } 3899 if (insertat==macrocnt) macrocnt++; 3900 3901 /* should replace one with same name, if one exists */ 3902 macro[insertat].key = mystrdup(p); 3903 3904 /* build up macro in plain[] ... */ 3905 /* everything until ".." line part of macro */ 3906 q=plain; i=0; 3907 while ((p=source_gets())!=NULL) { 3908 if (strcmp(p,"..")==0) break; 3909 while (*p) { /* append string, interpreting quotes along the way--just double backslash to single now */ 3910 if (*p=='\\' && p[1]=='\\') p++; 3911 *q++=*p++; 3912 } 3913 *q++='\n'; 3914 } 3915 *q='\0'; 3916 3917 /* ... then copy once have whole thing */ 3918 macro[insertat].subst = mystrdup(plain); 3919 /*fprintf(stderr, "defining macro %s as %s\n", macro[insertat].key, macro[insertat].subst);*/ 3920 sI=0; 3921 3922 } else if (checkcmd("rm")) { /* remove macro definition, can have multiple arguments */ 3923 for (i=0; i<macrocnt; i++) { /* moot as new definitions replace old when conflicts */ 3924 if (strcmp(p,macro[i].key)) { 3925 macro[i] = macro[--macrocnt]; 3926 break; 3927 } 3928 } 3929 3930 } else if (checkcmd("ds")) { /* text substitutions (like macros) */ 3931 /* /usr/sww/man/man1/CC.1 a good test of this */ 3932 q = strchr(p,' '); 3933 if (q!=NULL) { 3934 *q='\0'; while (*++q) /*nada*/; 3935 if (*q=='"') q++; 3936 if (substcnt<SUBSTMAX) { 3937 subst[substcnt].key = mystrdup(p); subst[substcnt].subst = mystrdup(q); substcnt++; 3938 } 3939 /*fprintf(stderr, "defining substitution: name=%s, body=%s\n", p, q);*/ 3940 } 3941 3942 } else if (checkcmd("so")) { 3943 /* assuming in .../man like nroff, source in file and execute it as nested file, */ 3944 /* so nested .so's OK */ 3945 3946 err = 1; /* assume error unless successful */ 3947 if (fTclTk) { 3948 err=0; 3949 } else if (stat(p, &fileinfo)==0) { 3950 sobuf = malloc(fileinfo.st_size + 1); 3951 if (sobuf!=NULL) { 3952 /* suck in entire file */ 3953 fid = open(p, O_RDONLY); 3954 if (fid!=-1) { 3955 if (read(fid, sobuf, fileinfo.st_size) == fileinfo.st_size) { 3956 sobuf[fileinfo.st_size]='\0'; 3957 /* dumb Digital puts \\} closers on same line */ 3958 for (q=sobuf; (q=strstr(q," \\}"))!=NULL; q+=3) *q='\n'; 3959 source_subfile(sobuf); 3960 err = 0; 3961 } 3962 close(fid); 3963 } 3964 free(sobuf); 3965 } 3966 } 3967 3968 if (err) { 3969 fprintf(stderr, "%s: couldn't read in referenced file %s.\n", argv0, p); 3970 if (strchr(p,'/')==NULL) { 3971 fprintf(stderr, "\tTry cd'ing into same directory of man page first.\n"); 3972 } else if (strchr(p,'/')==strrchr(p,'.')) { 3973 fprintf(stderr, "\tTry cd'ing into parent directory of man page first.\n"); 3974 } else { 3975 fprintf(stderr, "\tTry cd'ing into ancestor directory that makes relative path valid first.\n"); 3976 } 3977 exit(1); 3978 } 3979 3980 3981 /* character formatting */ 3982 /* reencode m/any as macro definitions? would like to but the below don't have "words" */ 3983 } else if (checkcmd("ft")) { /* change font, next char is R,I,B. P=previous not supported */ 3984 if (ft=='B') stagadd(ENDBOLD); else if (ft=='I') stagadd(ENDITALICS); 3985 ft = *p++; 3986 if (ft=='B') stagadd(BEGINBOLD); else if (ft=='I') stagadd(BEGINITALICS); 3987 } else if (checkcmd("B")) { 3988 supresseol=0; 3989 stagadd(BEGINBOLD); p = source_out_word(p); source_out(p); stagadd(ENDBOLD); 3990 } else if (checkcmd("I")) { 3991 supresseol=0; 3992 stagadd(BEGINITALICS); p = source_out_word(p); stagadd(ENDITALICS); 3993 source_out(p); 3994 } else if (checkcmd("BI")) { 3995 supresseol=0; 3996 while (*p) { 3997 stagadd(BEGINBOLD); p = source_out_word(p); stagadd(ENDBOLD); 3998 if (*p) { stagadd(BEGINITALICS); p = source_out_word(p); stagadd(ENDITALICS); } 3999 } 4000 } else if (checkcmd("IB")) { 4001 supresseol=0; 4002 while (*p) { 4003 stagadd(BEGINITALICS); p = source_out_word(p); stagadd(ENDITALICS); 4004 if (*p) { stagadd(BEGINBOLD); p = source_out_word(p); stagadd(ENDBOLD); } 4005 } 4006 } else if (checkcmd("RB")) { 4007 supresseol=0; 4008 while (*p) { 4009 p = source_out_word(p); 4010 if (*p) { stagadd(BEGINBOLD); p = source_out_word(p); stagadd(ENDBOLD); } 4011 } 4012 } else if (checkcmd("BR")) { 4013 supresseol=0; 4014 while (*p) { 4015 stagadd(BEGINBOLD); p = source_out_word(p); stagadd(ENDBOLD); 4016 p = source_out_word(p); 4017 } 4018 } else if (checkcmd("IR")) { 4019 supresseol=0; 4020 while (*p) { 4021 stagadd(BEGINITALICS); p=source_out_word(p); stagadd(ENDITALICS); 4022 p=source_out_word(p); 4023 } 4024 } else if (checkcmd("RI")) { 4025 supresseol=0; 4026 while (*p) { 4027 p=source_out_word(p); 4028 stagadd(BEGINITALICS); p=source_out_word(p); stagadd(ENDITALICS); 4029 } 4030 4031 4032 /* HP-UX */ 4033 } else if (checkcmd("SM")) { 4034 supresseol=0; source_out("\\s-1"); while (*p) p=source_out(p); source_out("\\s0"); 4035 } else if (checkcmd("C")) { 4036 supresseol=0; 4037 stagadd(BEGINCODE); while (*p) p=source_out_word(p); stagadd(ENDCODE); 4038 } else if (checkcmd("CR")) { 4039 supresseol=0; 4040 while (*p) { 4041 stagadd(BEGINCODE); p=source_out_word(p); stagadd(ENDCODE); 4042 if (*p) p=source_out_word(p); 4043 } 4044 } else if (checkcmd("RC")) { 4045 supresseol=0; 4046 while (*p) { 4047 p=source_out_word(p); 4048 if (*p) { stagadd(BEGINCODE); p=source_out_word(p); stagadd(ENDCODE); } 4049 } 4050 } else if (checkcmd("CI")) { 4051 supresseol=0; 4052 while (*p) { 4053 stagadd(BEGINCODE); p=source_out_word(p); stagadd(ENDCODE); 4054 if (*p) { stagadd(BEGINITALICS); p=source_out_word(p); stagadd(ENDITALICS); } 4055 } 4056 4057 4058 /* tables */ 4059 } else if (checkcmd("TS")) { 4060 tblc=0; /*tblspanmax=0;*/ tableSep='\0'; /* need to reset each time because tabbed lines (.ta) made into tables too */ 4061 while ((p = source_gets())!=NULL) { 4062 if ((q=strstr(p,"tab"))!=NULL) { /* "tab(" or "tab (". table entry separator */ 4063 p=(q+3); while (isspace(*p)) p++; 4064 p++; /* jump over '(' */ 4065 tableSep=*p; 4066 continue; 4067 } 4068 if (strincmp(p,"center",strlen("center"))==0) { /* center entire table; should look for "left" and "right", probably */ 4069 fTableCenter=1; source_struct(BEGINCENTER); 4070 p+=strlen("center"); while (isspace(*p)) p++; 4071 continue; 4072 } 4073 if (p[strlen(p)-1]==';') { tblc=0; continue; } /* HP has a prequel terminated by ';' */ 4074 4075 for (i=0; *p; i++,p=q) { 4076 if (*p=='.') break; 4077 if (*p=='f') p+=2; /* DEC sets font here */ 4078 q=p+1; 4079 if (strchr("lrcn",*q)==NULL) { /* dumb DEC doesn't put spaces between them */ 4080 while (*q && *q!='.' && !isspace(*q)) q++; 4081 } 4082 ch=*q; *q='\0'; 4083 tbl[tblc][i] = mystrdup(p); 4084 tbl[tblc][i+1] = ""; /* mark end */ 4085 *q=ch; 4086 while (*q && isspace(*q)) q++; 4087 } 4088 /*if (i>tblspanmax) tblspanmax=i;*/ 4089 tbl[tblc++][i]=""; /* mark end */ 4090 if (*p=='.') break; 4091 } 4092 tbli=0; 4093 source_struct(BEGINTABLE); 4094 4095 while ((p=source_gets())!=NULL) { 4096 if (strncmp(p,".TE",3)==0) break; 4097 if (*p=='.') { source_line(p); continue; } 4098 4099 /* count number of entries on line. if >1, can use to set tableSep */ 4100 insertat=0; for (j=0; *tbl[tbli][j]; j++) if (*tbl[tbli][j]!='s') insertat++; 4101 if (!tableSep && insertat>1) if (fsourceTab) tableSep='\t'; else tableSep='@'; 4102 source_struct(BEGINTABLELINE); 4103 if (strcmp(p,"_")==0 || /* double line */ strcmp(p,"=")==0) { 4104 source_out(" "); 4105 /*stagadd(HR);*/ /* empty row -- need ROWSPAN for HTML */ 4106 continue; 4107 } 4108 4109 for (i=0; *tbl[tbli][i] && *p; i++) { 4110 tblcellspan=1; 4111 tblcellformat = tbl[tbli][i]; 4112 if (*tblcellformat=='^') { /* vertical span => blank entry */ 4113 tblcellformat="l"; 4114 } else if (*tblcellformat=='|') { 4115 /* stagadd(VBAR); */ 4116 continue; 4117 } else if (strchr("lrcn", *tblcellformat)==NULL) { 4118 tblcellformat="l"; 4119 /*continue;*/ 4120 } 4121 4122 while (strncmp(tbl[tbli][i+1],"s",1)==0) { tblcellspan++; i++; } 4123 4124 source_struct(BEGINTABLEENTRY); 4125 if (toupper(tblcellformat[1])=='B') stagadd(BEGINBOLD); 4126 else if (toupper(tblcellformat[1])=='I') stagadd(BEGINITALICS); 4127 /* not supporting DEC's w(<num><unit>) */ 4128 4129 if (strcmp(p,"T{")==0) { /* DEC, HP */ 4130 while (strncmp(p=source_gets(),"T}",2)!=0) source_line(p); 4131 p+=2; if (*p) p++; 4132 } else { 4133 p = source_out0(p, tableSep); 4134 } 4135 if (toupper(tblcellformat[1])=='B') stagadd(ENDBOLD); 4136 else if (toupper(tblcellformat[1])=='I') stagadd(ENDITALICS); 4137 source_struct(ENDTABLEENTRY); 4138 } 4139 if (tbli+1<tblc) tbli++; 4140 source_struct(ENDTABLELINE); 4141 } 4142 source_struct(ENDTABLE); 4143 if (fTableCenter) { source_struct(ENDCENTER); fTableCenter=0; } 4144 4145 4146 } else if (checkcmd("nr")) { 4147 q=p; while (*q && !isspace(*q)) q++; *q='\0'; q++; 4148 4149 for (insertat=0; insertat<regcnt; insertat++) { 4150 if (strcmp(reg[insertat].name,p)==0) break; 4151 } 4152 if (insertat==regcnt) { regcnt++; reg[insertat].name = mystrdup(p); } /* else use same name */ 4153 p=q; 4154 if (*q=='+' || *q=='-') q++; /* accept signed, floating point numbers */ 4155 if (*q=='.') q++; 4156 if (!*q || isspace(*q)) { *q++='0'; *q++='\0'; } 4157 while (isdigit(*q)) { q++; } *q='\0'; /* ignore units */ 4158 reg[insertat].value = mystrdup(p); 4159 4160 } else if (checkcmd("EQ")) { /* eqn not supported */ 4161 source_out("\\s-1\\fBeqn not supported\\fR\\s0"); 4162 while ((p=source_gets())!=NULL) { 4163 if (strncmp(p,".EN",3)==0) break; 4164 } 4165 4166 4167 4168 /* Tcl/Tk macros */ 4169 } else if (fTclTk && (checkcmd("VS") || checkcmd("VE"))) { 4170 /* nothing for sidebars */ 4171 } else if (fTclTk && checkcmd("OP")) { 4172 source_struct(BEGINBODY); 4173 for (i=0; i<3; i++) { 4174 if (fcharout) { source_out(tcltkOP[i]); source_out(": "); } 4175 stagadd(BEGINBOLD); p=source_out_word(p); stagadd(ENDBOLD); 4176 source_struct(SHORTLINE); 4177 } 4178 source_struct(BEGINBODY); 4179 4180 } else if (fTclTk && checkcmd("BS")) { /* box */ 4181 /*source_struct(HR); -- ugh, no Ouster box */ 4182 } else if (fTclTk && checkcmd("BE")) { 4183 /*source_struct(HR);*/ 4184 4185 } else if (fTclTk && (checkcmd("CS")||checkcmd("DS"))) { /* code excerpt */ 4186 /* respect line ends, set in teletype */ 4187 /* source_struct(SHORTLINE); -- done as part of CS's ENDLINE */ 4188 finnf=1; 4189 source_struct(SHORTLINE); 4190 if (checkcmd("DS")) source_line(".P"); 4191 stagadd(BEGINCODE); 4192 } else if (fTclTk && (checkcmd("CE")||checkcmd("DE"))) { 4193 stagadd(ENDCODE); 4194 finnf=0; 4195 4196 } else if (fTclTk && checkcmd("SO")) { 4197 source_struct(BEGINSECTION); 4198 source_struct(BEGINSECTHEAD); source_out("STANDARD OPTIONS"); source_struct(ENDSECTHEAD); 4199 tblc=1; tbl[0][0]=tbl[0][1]=tbl[0][2]="l"; tbl[0][3]=""; 4200 source_struct(BEGINTABLE); 4201 while (1) { 4202 p = source_gets(); 4203 if ((strncmp(p,".SE",3))==0) break; 4204 tblcellformat = "l"; 4205 source_struct(BEGINTABLELINE); 4206 if (*p=='.') { 4207 source_command(p); 4208 } else { 4209 while (*p) { 4210 source_struct(BEGINTABLEENTRY); 4211 p = source_out0(p, '\t'); 4212 source_struct(ENDTABLEENTRY); 4213 } 4214 } 4215 source_struct(ENDTABLELINE); 4216 } 4217 source_struct(ENDTABLE); 4218 source_struct(ENDSECTION); 4219 4220 } else if (fTclTk && checkcmd("AP")) { /* arguments */ 4221 source_struct(BEGINBODY); 4222 p = source_out_word(p); source_out(" "); 4223 stagadd(BEGINITALICS); p = source_out_word(p); stagadd(ENDITALICS); source_out("\t"); 4224 source_out("("); p = source_out_word(p); source_out(")"); 4225 source_struct(SHORTLINE); 4226 source_out("\t"); 4227 } else if (fTclTk && checkcmd("AS")) { /* arguments */ 4228 4229 /* let these be defined as macros. if they're not, they're just caught as unrecognized macros 4230 } else if (checkcmd("ll") || checkcmd("IX") || 4231 checkcmd("nh")||checkcmd("hy")||checkcmd("hc")||checkcmd("hw") /* hyphenation * / 4232 ) { /* unsupported macros -- usually roff specific * / 4233 4234 fprintf(stderr, "macro \"%s\" not supported -- ignoring\n", cmd); 4235 */ 4236 4237 } else { /* could be a macro definition */ 4238 supresseol=0; 4239 4240 for (i=0; i<macrocnt; i++) { 4241 if (macro[i].key == NULL) continue; /* !!! how does this happen? */ 4242 if (checkcmd(macro[i].key)) { 4243 4244 /* it is, collect arguments */ 4245 for (j=0; j<9; j++) macroArg[j]=""; 4246 for (j=0; p!=NULL && *p && j<9; j++, p=q) { 4247 endch=' '; if (*p=='"') { endch='"'; p++; } 4248 q = strchr(p,endch); 4249 if (q!=NULL) { 4250 *q++='\0'; 4251 if (*q && endch!=' ') q++; 4252 } 4253 macroArg[j] = p; 4254 } 4255 4256 /* instantiate that text, substituting \\[1-9]'s */ 4257 p = macro[i].subst; 4258 q = macrobuf; /* allocated on stack */ 4259 while (*p) { 4260 if (*p=='\\') { 4261 p++; 4262 if (*p=='t') { 4263 *q++ = '\t'; 4264 p++; 4265 } else if (*p=='$' && isdigit(p[1])) { 4266 j = p[1]-'0'-1; /* convert to ASCII and align with macroArg array */ 4267 p+=2; 4268 /* *q++='"'; -- no */ 4269 strcpy(q, macroArg[j]); q += strlen(q); 4270 /* *q++='"'; -- no */ 4271 4272 } else { 4273 *q++ = '\\'; 4274 } 4275 } else { 4276 *q++ = *p++; 4277 } 4278 } 4279 *q='\0'; 4280 4281 /* execute that text */ 4282 /*fprintf(stderr, "for macro %s, substituted text is \n%s\n", macro[i].key, macrobuf);*/ 4283 source_subfile(macrobuf); 4284 4285 break; 4286 } 4287 } 4288 4289 /* macro not found */ 4290 if (i==macrocnt) { 4291 /* report missing macros only once */ 4292 for (j=0; j<macnotcnt; j++) if (strcmp(macnotfound[j],cmd)==0) break; 4293 if (j==macnotcnt) { 4294 if (!fQuiet) fprintf(stderr, "macro \"%s\" not recognized -- ignoring\n", cmd); 4295 q = malloc(strlen(cmd)+1); strcpy(q,cmd); 4296 macnotfound[macnotcnt++] = q; 4297 } 4298 } 4299 } /* else command is unrecognized -- ignore it: we're not a complete [tn]roff implementation */ 4300 4301 /* popular but meaningless commands: .ne (need <n> lines--on infinite scroll */ 4302 } 4303 4304 4305 void 4306 source_line(char *p) { 4307 /*stagadd(BEGINLINE);*/ 4308 char *cmd=p; 4309 if (p==NULL) return; /* bug somewhere else, but where? */ 4310 4311 isComment = (/*checkcmd("") ||*/ checkcmd("\\\"") || /*DEC triple dot*/checkcmd("..")); 4312 if (inComment && !isComment) { source_struct(ENDCOMMENT); inComment=0; } /* special case to handle transition */ 4313 4314 #if 0 4315 if (*p!='.' && *p!='\'' && !finlist) { 4316 if (fsourceTab && !fosourceTab) { 4317 tblc=1; tbli=0; tableSep='\t'; 4318 tbl[0][0]=tbl[0][1]=tbl[0][2]=tbl[0][3]=tbl[0][4]=tbl[0][5]=tbl[0][6]=tbl[0][7]=tbl[0][8]="l"; 4319 source_struct(BEGINTABLE); finTable=1; 4320 } else if (!fsourceTab && fosourceTab) { 4321 source_struct(ENDTABLE); finTable=0; 4322 } 4323 fosourceTab=fsourceTab; 4324 } 4325 #endif 4326 4327 if (*p=='.' /*|| *p=='\'' -- normalized */) { /* command == starts with "." */ 4328 p++; 4329 supresseol=1; 4330 source_command(p); 4331 4332 } else if (!*p) { /* blank line */ 4333 /*source_command("P");*/ 4334 ncnt=1; source_struct(BEGINLINE); ncnt=0; /* empty line => paragraph break */ 4335 4336 #if 0 4337 } else if (fsourceTab && !finlist /* && pmode */) { /* can't handle tabs, so try tables */ 4338 source_struct(BEGINTABLE); 4339 tblcellformat = "l"; 4340 do { 4341 source_struct(BEGINTABLELINE); 4342 while (*p) { 4343 source_struct(BEGINTABLEENTRY); 4344 p = source_out0(p, '\t'); 4345 source_struct(ENDTABLEENTRY); 4346 } 4347 source_struct(ENDTABLELINE); 4348 } while ((p=source_gets())!=NULL && fsourceTab); 4349 source_struct(ENDTABLE); 4350 source_line(p); 4351 #endif 4352 4353 } else { /* otherwise normal text */ 4354 source_out(p); 4355 if (finnf || isspace(*cmd)) source_struct(SHORTLINE); 4356 } 4357 4358 if (!supresseol && !finnf) { source_out(" "); if (finlist) source_list(); } 4359 supresseol=0; 4360 /*stagadd(ENDLINE);*/ 4361 } 4362 4363 4364 void 4365 source_filter(void) { 4366 char *p = in, *q; 4367 char *oldv,*newv,*shiftp,*shiftq,*endq; 4368 int lenp,lenq; 4369 int i,on1,on2,nn1,nn2,first; 4370 int insertcnt=0, deletecnt=0, insertcnt0; 4371 int nextDiffLine=-1; 4372 char diffcmd, tmpc, tmpendq; 4373 4374 AbsLine=0; 4375 4376 /* just count length of macro table! */ 4377 for (i=0; macro[i].key!=NULL; i++) /*empty*/; 4378 macrocnt = i; 4379 4380 /* dumb Digital puts \\} closers on same line */ 4381 for (p=in; (p=strstr(p," \\}"))!=NULL; p+=3) *p='\n'; 4382 4383 sI=0; 4384 /* (*fn)(BEGINDOC); -- done at .TH or first .SH */ 4385 4386 4387 /* was: source_subfile(in); */ 4388 while (fDiff && fgets(diffline, MAXBUF, difffd)!=NULL) { 4389 /* requirements: no context lines, no errors in files, ... 4390 change-command: 8a12,15 or 5,7c8,10 or 5,7d3 4391 < from-file-line 4392 < from-file-line... 4393 -- 4394 > to-file-line 4395 > to-file-line... 4396 */ 4397 for (q=diffline; ; q++) { diffcmd=*q; if (diffcmd=='a'||diffcmd=='c'||diffcmd=='d') break; } 4398 if (sscanf(diffline, "%d,%d", &on1,&on2)==1) on2=on1-1+(diffcmd=='d'||diffcmd=='c'); 4399 if (sscanf(++q, "%d,%d", &nn1,&nn2)==1) nn2=nn1-1+(diffcmd=='a'||diffcmd=='c'); 4400 4401 deletecnt = on2-on1+1; 4402 insertcnt = nn2-nn1+1; 4403 4404 nextDiffLine = nn1; 4405 /*assert(nextDiffLine>=AbsLine); -- can happen if inside a macro? */ 4406 if (nextDiffLine<AbsLine) continue; 4407 4408 while (AbsLine<nextDiffLine && (p=source_gets())!=NULL) { 4409 source_line(p); 4410 } 4411 4412 insertcnt0=insertcnt+1; /* eat duplicate insert lines and '---' too */ 4413 diffline2[0] = '\0'; 4414 while (insertcnt && deletecnt) { 4415 if (ungetc(fgetc(difffd),difffd)=='<') { fgetc(difffd); fgetc(difffd); } /* skip '<' */ 4416 /* fill buffer with old line -- but replace if command */ 4417 /* stay away from commands -- too careful if .B <word> */ 4418 do { 4419 p = oldv = fgets(diffline, MAXBUF, difffd); 4420 p[strlen(p)-1]='\0'; /* fgets's \n ending => \0 */ 4421 deletecnt--; 4422 } while (deletecnt && *p=='.'); /* throw out commands in old version */ 4423 4424 q = newv = source_gets(); 4425 insertcnt--; 4426 while (insertcnt && *q=='.') { 4427 source_line(q); 4428 insertcnt--; 4429 } 4430 4431 if (*p=='.' || *q=='.') break; 4432 4433 4434 /* make larger chunk for better diff -- but still keep away from commands */ 4435 lenp=strlen(p); lenq=strlen(q); 4436 while (deletecnt && MAXBUF-lenq>80*2) { 4437 fgetc(difffd); fgetc(difffd); /* skip '<' */ 4438 if (ungetc(fgetc(difffd),difffd)=='.') break; 4439 p=&diffline[lenp]; *p++=' '; lenp++; 4440 fgets(p, MAXBUF-lenp, difffd); p[strlen(p)-1]='\0'; lenp+=strlen(p); 4441 deletecnt--; 4442 } 4443 4444 while (insertcnt && *in!='.' && MAXBUF-lenq>80*2) { 4445 if (newv!=diffline2) { strcpy(diffline2,q); newv=diffline2; } 4446 q=source_gets(); diffline2[lenq]=' '; lenq++; 4447 strcpy(&diffline2[lenq],q); lenq+=strlen(q); 4448 insertcnt--; 4449 } 4450 4451 /* common endings */ 4452 p = &p[strlen(oldv)]; q=&q[strlen(newv)]; 4453 while (p>oldv && q>newv && p[-1]==q[-1]) { p--; q--; } 4454 if ((p>oldv && p[-1]=='\\') || (q>newv && q[-1]=='\\')) 4455 while (*p && *q && !isspace(*p)) { p++; q++; } /* steer clear of escapes */ 4456 tmpendq=*q; *p=*q='\0'; endq=q; 4457 4458 p=oldv; q=newv; 4459 while (*p && *q) { 4460 /* common starts */ 4461 newv=q; while (*p && *q && *p==*q) { p++; q++; } 4462 if (q>newv) { 4463 tmpc=*q; *q='\0'; source_line(newv); *q=tmpc; 4464 } 4465 4466 /* too hard to read */ 4467 /* difference: try to find hunk of p in remainder of q */ 4468 if (strlen(p)<15 || (shiftp=strchr(&p[15],