"Fossies" - the Fresh Open Source Software Archive

Member "mod_proxy_html/mod_proxy_html.c" (30 Oct 2009, 38390 Bytes) of package /linux/www/apache_httpd_modules/old/mod_proxy_html.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /********************************************************************
    2      Copyright (c) 2003-9, WebThing Ltd
    3      Author: Nick Kew <nick@webthing.com>
    4 
    5 This program is free software; you can redistribute it and/or modify
    6 it under the terms of the GNU General Public License  Version 2,
    7 as published by the Free Software Foundation.
    8 
    9 This program is distributed in the hope that it will be useful,
   10 but WITHOUT ANY WARRANTY; without even the implied warranty of
   11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12 GNU General Public License for more details.
   13 
   14 You can obtain a copy of the GNU General Poblic License Version 2
   15 from http://www.gnu.org/licenses/old-licenses/gpl-2.0.html or
   16 http://apache.webthing.com/COPYING.txt
   17 
   18 *********************************************************************/
   19 
   20 /**** NOTICE TO PACKAGERS
   21  *
   22  * This module now relies on mod_xml2enc for i18n support.
   23  * You should make mod_xml2enc a dependency in your packages.
   24  */
   25 
   26 /* End of Notices */
   27 
   28 
   29 
   30 
   31 /*  GO_FASTER
   32 
   33     You can #define GO_FASTER to disable informational logging.
   34     This disables the ProxyHTMLLogVerbose option altogether.
   35 
   36     Default is to leave it undefined, and enable verbose logging
   37     as a configuration option.  Binaries are supplied with verbose
   38     logging enabled.
   39 */
   40 
   41 #ifdef GO_FASTER
   42 #define VERBOSE(x)
   43 #define VERBOSEB(x)
   44 #else
   45 #define VERBOSE(x) if (verbose) x
   46 #define VERBOSEB(x) if (verbose) {x}
   47 #endif
   48 
   49 /* 3.1.2 - trivial changes to fix compile on Windows */
   50 #define VERSION_STRING "proxy_html/3.1.2"
   51 
   52 #include <ctype.h>
   53 
   54 /* libxml2 */
   55 #include <libxml/HTMLparser.h>
   56 
   57 /* apache */
   58 #include <http_protocol.h>
   59 #include <http_config.h>
   60 #include <http_log.h>
   61 #include <apr_strings.h>
   62 #include <apr_hash.h>
   63 #include <apr_strmatch.h>
   64 
   65 #include <apr_optional.h>
   66 #include <mod_xml2enc.h>
   67 #include <http_request.h>
   68 
   69 /* To support Apache 2.1/2.2, we need the ap_ forms of the
   70  * regexp stuff, and they're now used in the code.
   71  * To support 2.0 in the same compile, * we #define the
   72  * AP_ versions if necessary.
   73  */
   74 #ifndef AP_REG_ICASE
   75 /* it's 2.0, so we #define the ap_ versions */
   76 #define ap_regex_t regex_t
   77 #define ap_regmatch_t regmatch_t
   78 #define AP_REG_EXTENDED REG_EXTENDED
   79 #define AP_REG_ICASE REG_ICASE
   80 #define AP_REG_NOSUB REG_NOSUB
   81 #define AP_REG_NEWLINE REG_NEWLINE
   82 #define APACHE20
   83 #define ap_register_output_filter_protocol(a,b,c,d,e) ap_register_output_filter(a,b,c,d)
   84 #else
   85 #define APACHE22
   86 #endif
   87 
   88 /* globals set once at startup */
   89 static ap_regex_t* seek_meta ;
   90 static const apr_strmatch_pattern* seek_content ;
   91 static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
   92 static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
   93 
   94 module AP_MODULE_DECLARE_DATA proxy_html_module ;
   95 
   96 #define M_HTML          0x01
   97 #define M_EVENTS        0x02
   98 #define M_CDATA         0x04
   99 #define M_REGEX         0x08
  100 #define M_ATSTART       0x10
  101 #define M_ATEND         0x20
  102 #define M_LAST          0x40
  103 #define M_NOTLAST       0x80
  104 #define M_INTERPOLATE_TO    0x100
  105 #define M_INTERPOLATE_FROM  0x200
  106 
  107 typedef struct {
  108   const char* val;
  109 } tattr;
  110 typedef struct {
  111   unsigned int start ;
  112   unsigned int end ;
  113 } meta ;
  114 typedef struct {
  115   const char* env;
  116   const char* val;
  117   int rel;
  118 } rewritecond;
  119 typedef struct urlmap {
  120   struct urlmap* next ;
  121   unsigned int flags ;
  122   unsigned int regflags ;
  123   union {
  124     const char* c ;
  125     ap_regex_t* r ;
  126   } from ;
  127   const char* to ;
  128   rewritecond* cond;
  129 } urlmap ;
  130 typedef struct {
  131   urlmap* map ;
  132   const char* doctype ;
  133   const char* etag ;
  134   unsigned int flags ;
  135   size_t bufsz ;
  136   apr_hash_t* links;
  137   apr_array_header_t* events;
  138   const char* charset_out;
  139   int extfix ;
  140   int metafix ;
  141   int strip_comments ;
  142   int interp;
  143   int enabled;
  144 #ifndef GO_FASTER
  145   int verbose ;
  146 #endif
  147 } proxy_html_conf ;
  148 typedef struct {
  149   ap_filter_t* f ;
  150   proxy_html_conf* cfg ;
  151   htmlParserCtxtPtr parser ;
  152   apr_bucket_brigade* bb ;
  153   char* buf ;
  154   size_t offset ;
  155   size_t avail ;
  156   const char* encoding;
  157   urlmap* map;
  158 } saxctxt ;
  159 
  160 
  161 #define NORM_LC 0x1
  162 #define NORM_MSSLASH 0x2
  163 #define NORM_RESET 0x4
  164 static htmlSAXHandler sax ;
  165 
  166 typedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t ;
  167 
  168 static const char* const fpi_html =
  169     "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
  170 static const char* const fpi_html_legacy =
  171     "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
  172 static const char* const fpi_xhtml =
  173     "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
  174 static const char* const fpi_xhtml_legacy =
  175     "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
  176 static const char* const html_etag = ">" ;
  177 static const char* const xhtml_etag = " />" ;
  178 /*#define DEFAULT_DOCTYPE fpi_html */
  179 static const char* const DEFAULT_DOCTYPE = "" ;
  180 #define DEFAULT_ETAG html_etag
  181 
  182 static void normalise(unsigned int flags, char* str) {
  183   char* p ;
  184   if ( flags & NORM_LC )
  185     for ( p = str ; *p ; ++p )
  186       if ( isupper(*p) )
  187     *p = tolower(*p) ;
  188 
  189   if ( flags & NORM_MSSLASH )
  190     for ( p = ap_strchr(str, '\\') ; p ; p = ap_strchr(p+1, '\\') )
  191       *p = '/' ;
  192 
  193 }
  194 #define consume_buffer(ctx,inbuf,bytes,flag) \
  195     htmlParseChunk(ctx->parser, inbuf, bytes, flag)
  196 
  197 #define AP_fwrite(ctx,inbuf,bytes,flush) \
  198     ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
  199 
  200 /* This is always utf-8 on entry.  We can convert charset within FLUSH */
  201 #define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0) ; begin = i+1
  202 static void pcharacters(void* ctxt, const xmlChar *uchars, int length) {
  203   const char* chars = (const char*) uchars;
  204   saxctxt* ctx = (saxctxt*) ctxt ;
  205   int i ;
  206   int begin ;
  207   for ( begin=i=0; i<length; i++ ) {
  208     switch (chars[i]) {
  209       case '&' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&amp;") ; break ;
  210       case '<' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&lt;") ; break ;
  211       case '>' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&gt;") ; break ;
  212       case '"' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&quot;") ; break ;
  213       default : break ;
  214     }
  215   }
  216   FLUSH ;
  217 }
  218 static void preserve(saxctxt* ctx, const size_t len) {
  219   char* newbuf ;
  220   if ( len <= ( ctx->avail - ctx->offset ) )
  221     return ;
  222   else while ( len > ( ctx->avail - ctx->offset ) )
  223     ctx->avail += ctx->cfg->bufsz ;
  224 
  225   newbuf = realloc(ctx->buf, ctx->avail) ;
  226   if ( newbuf != ctx->buf ) {
  227     if ( ctx->buf )
  228     apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (int(*)(void*))free);
  229     apr_pool_cleanup_register(ctx->f->r->pool, newbuf,
  230     (int(*)(void*))free, apr_pool_cleanup_null);
  231     ctx->buf = newbuf ;
  232   }
  233 }
  234 static void pappend(saxctxt* ctx, const char* buf, const size_t len) {
  235   preserve(ctx, len) ;
  236   memcpy(ctx->buf+ctx->offset, buf, len) ;
  237   ctx->offset += len ;
  238 }
  239 static void dump_content(saxctxt* ctx) {
  240   urlmap* m ;
  241   char* found ;
  242   size_t s_from, s_to ;
  243   size_t match ;
  244   char c = 0 ;
  245   int nmatch ;
  246   ap_regmatch_t pmatch[10] ;
  247   char* subs ;
  248   size_t len, offs ;
  249   urlmap* themap = ctx->map;
  250 #ifndef GO_FASTER
  251   int verbose = ctx->cfg->verbose ;
  252 #endif
  253 
  254   pappend(ctx, &c, 1) ; /* append null byte */
  255     /* parse the text for URLs */
  256   for ( m = themap ; m ; m = m->next ) {
  257     if ( ! ( m->flags & M_CDATA ) )
  258     continue ;
  259     if ( m->flags & M_REGEX ) {
  260       nmatch = 10 ;
  261       offs = 0 ;
  262       while ( ! ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0) ) {
  263     match = pmatch[0].rm_so ;
  264     s_from = pmatch[0].rm_eo - match ;
  265     subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
  266         nmatch, pmatch) ;
  267     s_to = strlen(subs) ;
  268     len = strlen(ctx->buf) ;
  269     offs += match ;
  270     VERBOSEB(
  271       const char* f = apr_pstrndup(ctx->f->r->pool,
  272         ctx->buf + offs , s_from ) ;
  273       ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
  274         "C/RX: match at %s, substituting %s", f, subs) ;
  275     )
  276     if ( s_to > s_from) {
  277       preserve(ctx, s_to - s_from) ;
  278       memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
  279         len + 1 - s_from - offs) ;
  280       memcpy(ctx->buf+offs, subs, s_to) ;
  281     } else {
  282       memcpy(ctx->buf + offs, subs, s_to) ;
  283       memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
  284         len + 1 - s_from - offs) ;
  285     }
  286     offs += s_to ;
  287       }
  288     } else {
  289       s_from = strlen(m->from.c) ;
  290       s_to = strlen(m->to) ;
  291       for ( found = strstr(ctx->buf, m->from.c) ; found ;
  292         found = strstr(ctx->buf+match+s_to, m->from.c) ) {
  293     match = found - ctx->buf ;
  294     if ( ( m->flags & M_ATSTART ) && ( match != 0) )
  295       break ;
  296     len = strlen(ctx->buf) ;
  297     if ( ( m->flags & M_ATEND ) && ( match < (len - s_from) ) )
  298       continue ;
  299     VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
  300         "C: matched %s, substituting %s", m->from.c, m->to) ) ;
  301     if ( s_to > s_from ) {
  302       preserve(ctx, s_to - s_from) ;
  303       memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
  304         len + 1 - s_from - match) ;
  305       memcpy(ctx->buf+match, m->to, s_to) ;
  306     } else {
  307       memcpy(ctx->buf+match, m->to, s_to) ;
  308       memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
  309         len + 1 - s_from - match) ;
  310     }
  311       }
  312     }
  313   }
  314   AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1) ;
  315 }
  316 static void pcdata(void* ctxt, const xmlChar *uchars, int length) {
  317   const char* chars = (const char*) uchars;
  318   saxctxt* ctx = (saxctxt*) ctxt ;
  319   if ( ctx->cfg->extfix ) {
  320     pappend(ctx, chars, length) ;
  321   } else {
  322     /* not sure if this should force-flush
  323      * (i.e. can one cdata section come in multiple calls?)
  324      */
  325     AP_fwrite(ctx, chars, length, 0) ;
  326   }
  327 }
  328 static void pcomment(void* ctxt, const xmlChar *uchars) {
  329   const char* chars = (const char*) uchars;
  330   saxctxt* ctx = (saxctxt*) ctxt ;
  331   if ( ctx->cfg->strip_comments )
  332     return ;
  333 
  334   if ( ctx->cfg->extfix ) {
  335     pappend(ctx, "<!--", 4) ;
  336     pappend(ctx, chars, strlen(chars) ) ;
  337     pappend(ctx, "-->", 3) ;
  338   } else {
  339     ap_fputs(ctx->f->next, ctx->bb, "<!--") ;
  340     AP_fwrite(ctx, chars, strlen(chars), 1) ;
  341     ap_fputs(ctx->f->next, ctx->bb, "-->") ;
  342   }
  343 }
  344 static void pendElement(void* ctxt, const xmlChar* uname) {
  345   saxctxt* ctx = (saxctxt*) ctxt ;
  346   const char* name = (const char*) uname;
  347   const htmlElemDesc* desc = htmlTagLookup(uname);
  348 
  349   if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
  350     /* enforce html */
  351     if (!desc || desc->depr)
  352       return;
  353     
  354   } else if ((ctx->cfg->doctype == fpi_html)
  355         || (ctx->cfg->doctype == fpi_xhtml)) {
  356     /* enforce html legacy */
  357     if (!desc)
  358       return;
  359   }
  360   /* TODO - implement HTML "allowed here" using the stack */
  361   /* nah.  Keeping the stack is too much overhead */
  362 
  363   if ( ctx->offset > 0 ) {
  364     dump_content(ctx) ;
  365     ctx->offset = 0 ;   /* having dumped it, we can re-use the memory */
  366   }
  367   if ( !desc || ! desc->empty ) {
  368     ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
  369   }
  370 }
  371 static void pstartElement(void* ctxt, const xmlChar* uname,
  372         const xmlChar** uattrs ) {
  373 
  374   int required_attrs ;
  375   int num_match ;
  376   size_t offs, len ;
  377   char* subs ;
  378   rewrite_t is_uri ;
  379   const char** a ;
  380   urlmap* m ;
  381   size_t s_to, s_from, match ;
  382   char* found ;
  383   saxctxt* ctx = (saxctxt*) ctxt ;
  384   size_t nmatch ;
  385   ap_regmatch_t pmatch[10] ;
  386 #ifndef GO_FASTER
  387   int verbose = ctx->cfg->verbose ;
  388 #endif
  389   apr_array_header_t *linkattrs;
  390   int i;
  391   const char* name = (const char*) uname;
  392   const char** attrs = (const char**) uattrs;
  393   const htmlElemDesc* desc = htmlTagLookup(uname);
  394   urlmap* themap = ctx->map;
  395 #ifdef HAVE_STACK
  396   const void** descp;
  397 #endif
  398   int enforce = 0;
  399   if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
  400     /* enforce html */
  401     enforce = 2;
  402     if (!desc || desc->depr)
  403       return;
  404     
  405   } else if ((ctx->cfg->doctype == fpi_html)
  406         || (ctx->cfg->doctype == fpi_xhtml)) {
  407     enforce = 1;
  408     /* enforce html legacy */
  409     if (!desc) {
  410       return;
  411     }
  412   }
  413   if (!desc && enforce) {
  414     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
  415         "Bogus HTML element %s dropped", name) ;
  416     return;
  417   }
  418   if (desc && desc->depr && (enforce == 2) ) {
  419     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
  420         "Deprecated HTML element %s dropped", name) ;
  421     return;
  422   }
  423 #ifdef HAVE_STACK
  424   descp = apr_array_push(ctx->stack);
  425   *descp = desc;
  426   /* TODO - implement HTML "allowed here" */
  427 #endif
  428 
  429   ap_fputc(ctx->f->next, ctx->bb, '<') ;
  430   ap_fputs(ctx->f->next, ctx->bb, name) ;
  431 
  432   required_attrs = 0;
  433   if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL))
  434     for (a = desc->attrs_req; *a; a++)
  435       ++required_attrs;
  436 
  437   if ( attrs ) {
  438     linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING) ;
  439     for ( a = attrs ; *a ; a += 2 ) {
  440       if (desc && enforce > 0) {
  441     switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) {
  442       case HTML_INVALID:
  443             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
  444         "Bogus HTML attribute %s of %s dropped", *a, name);
  445         continue;
  446       case HTML_DEPRECATED:
  447             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
  448         "Deprecated HTML attribute %s of %s dropped", *a, name);
  449         continue;
  450       case HTML_REQUIRED:
  451         required_attrs--;   /* cross off the number still needed */
  452         /* fallthrough - required implies valid */
  453       default:
  454         break;
  455     }
  456       }
  457       ctx->offset = 0 ;
  458       if ( a[1] ) {
  459     pappend(ctx, a[1], strlen(a[1])+1) ;
  460     is_uri = ATTR_IGNORE ;
  461     if ( linkattrs ) {
  462       tattr* attrs = (tattr*) linkattrs->elts;
  463           for (i=0; i < linkattrs->nelts; ++i) {
  464         if ( !strcmp(*a, attrs[i].val)) {
  465           is_uri = ATTR_URI ;
  466           break ;
  467         }
  468       }
  469     }
  470     if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix
  471             && (ctx->cfg->events != NULL) ) {
  472           for (i=0; i < ctx->cfg->events->nelts; ++i) {
  473         tattr* attrs = (tattr*) ctx->cfg->events->elts;
  474         if ( !strcmp(*a, attrs[i].val)) {
  475           is_uri = ATTR_EVENT ;
  476           break ;
  477         }
  478       }
  479     }
  480     switch ( is_uri ) {
  481       case ATTR_URI:
  482         num_match = 0 ;
  483         for ( m = themap ; m ; m = m->next ) {
  484           if ( ! ( m->flags & M_HTML ) )
  485         continue ;
  486           if ( m->flags & M_REGEX ) {
  487         nmatch = 10 ;
  488         if ( ! ap_regexec(m->from.r, ctx->buf, nmatch, pmatch, 0) ) {
  489           ++num_match ;
  490           offs = match = pmatch[0].rm_so ;
  491           s_from = pmatch[0].rm_eo - match ;
  492           subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf,
  493             nmatch, pmatch) ;
  494           VERBOSE( {
  495             const char* f = apr_pstrndup(ctx->f->r->pool,
  496             ctx->buf + offs , s_from ) ;
  497             ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
  498             "H/RX: match at %s, substituting %s", f, subs) ;
  499           } )
  500           s_to = strlen(subs) ;
  501           len = strlen(ctx->buf) ;
  502           if ( s_to > s_from) {
  503             preserve(ctx, s_to - s_from) ;
  504             memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
  505             len + 1 - s_from - offs) ;
  506             memcpy(ctx->buf+offs, subs, s_to) ;
  507           } else {
  508             memcpy(ctx->buf + offs, subs, s_to) ;
  509             memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
  510             len + 1 - s_from - offs) ;
  511           }
  512         }
  513           } else {
  514         s_from = strlen(m->from.c) ;
  515         if ( ! strncasecmp(ctx->buf, m->from.c, s_from ) ) {
  516           ++num_match ;
  517           s_to = strlen(m->to) ;
  518           len = strlen(ctx->buf) ;
  519           VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
  520             "H: matched %s, substituting %s", m->from.c, m->to) ) ;
  521           if ( s_to > s_from ) {
  522             preserve(ctx, s_to - s_from) ;
  523             memmove(ctx->buf+s_to, ctx->buf+s_from,
  524             len + 1 - s_from ) ;
  525             memcpy(ctx->buf, m->to, s_to) ;
  526           } else {  /* it fits in the existing space */
  527             memcpy(ctx->buf, m->to, s_to) ;
  528             memmove(ctx->buf+s_to, ctx->buf+s_from,
  529             len + 1 - s_from) ;
  530           }
  531           break ;
  532         }
  533           }
  534           /* URIs only want one match unless overridden in the config */
  535           if ( (num_match > 0) && !( m->flags & M_NOTLAST ) )
  536         break ;
  537         }
  538         break ;
  539       case ATTR_EVENT:
  540         for ( m = themap ; m ; m = m->next ) {
  541           num_match = 0 ;   /* reset here since we're working per-rule */
  542           if ( ! ( m->flags & M_EVENTS ) )
  543         continue ;
  544           if ( m->flags & M_REGEX ) {
  545         nmatch = 10 ;
  546         offs = 0 ;
  547         while ( ! ap_regexec(m->from.r, ctx->buf+offs,
  548             nmatch, pmatch, 0) ) {
  549           match = pmatch[0].rm_so ;
  550           s_from = pmatch[0].rm_eo - match ;
  551           subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
  552             nmatch, pmatch) ;
  553           VERBOSE( {
  554             const char* f = apr_pstrndup(ctx->f->r->pool,
  555             ctx->buf + offs , s_from ) ;
  556             ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
  557             "E/RX: match at %s, substituting %s", f, subs) ;
  558           } )
  559           s_to = strlen(subs) ;
  560           offs += match ;
  561           len = strlen(ctx->buf) ;
  562           if ( s_to > s_from) {
  563             preserve(ctx, s_to - s_from) ;
  564             memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
  565             len + 1 - s_from - offs) ;
  566             memcpy(ctx->buf+offs, subs, s_to) ;
  567           } else {
  568             memcpy(ctx->buf + offs, subs, s_to) ;
  569             memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
  570             len + 1 - s_from - offs) ;
  571           }
  572           offs += s_to ;
  573           ++num_match ;
  574         }
  575           } else {
  576         found = strstr(ctx->buf, m->from.c) ;
  577         if ( (m->flags & M_ATSTART) && ( found != ctx->buf) )
  578           continue ;
  579         while ( found ) {
  580           s_from = strlen(m->from.c) ;
  581           s_to = strlen(m->to) ;
  582           match = found - ctx->buf ;
  583           if ( ( s_from < strlen(found) ) && (m->flags & M_ATEND ) ) {
  584             found = strstr(ctx->buf+match+s_from, m->from.c) ;
  585             continue ;
  586           } else {
  587             found = strstr(ctx->buf+match+s_to, m->from.c) ;
  588           }
  589           VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
  590             "E: matched %s, substituting %s", m->from.c, m->to) ) ;
  591           len = strlen(ctx->buf) ;
  592           if ( s_to > s_from ) {
  593             preserve(ctx, s_to - s_from) ;
  594             memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
  595             len + 1 - s_from - match) ;
  596             memcpy(ctx->buf+match, m->to, s_to) ;
  597           } else {
  598             memcpy(ctx->buf+match, m->to, s_to) ;
  599             memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
  600             len + 1 - s_from - match) ;
  601           }
  602           ++num_match ;
  603         }
  604           }
  605           if ( num_match && ( m->flags & M_LAST ) )
  606         break ;
  607         }
  608         break ;
  609       case ATTR_IGNORE:
  610         break ;
  611     }
  612       }
  613       if ( ! a[1] )
  614     ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
  615       else {
  616 
  617     if ( ctx->cfg->flags != 0 )
  618       normalise(ctx->cfg->flags, ctx->buf) ;
  619 
  620     /* write the attribute, using pcharacters to html-escape
  621        anything that needs it in the value.
  622     */
  623     ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL) ;
  624     pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf)) ;
  625     ap_fputc(ctx->f->next, ctx->bb, '"') ;
  626       }
  627     }
  628   }
  629   ctx->offset = 0 ;
  630   if ( desc && desc->empty )
  631     ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag) ;
  632   else
  633     ap_fputc(ctx->f->next, ctx->bb, '>') ;
  634 
  635   if ((enforce > 0) && (required_attrs > 0)) {
  636     /* if there are more required attributes than we found then complain */
  637     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
  638     "HTML element %s is missing %d required attributes",
  639     name, required_attrs);
  640   }
  641 }
  642 
  643 static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/
  644 #ifndef GO_FASTER
  645         , int verbose
  646 #endif
  647     ) {
  648   meta* ret = NULL ;
  649   size_t offs = 0 ;
  650   const char* p ;
  651   const char* q ;
  652   char* header ;
  653   char* content ;
  654   ap_regmatch_t pmatch[2] ;
  655   char delim ;
  656 
  657   while ( ! ap_regexec(seek_meta, buf+offs, 2, pmatch, 0) ) {
  658     header = NULL ;
  659     content = NULL ;
  660     p = buf+offs+pmatch[1].rm_eo ;
  661     while ( !isalpha(*++p) ) ;
  662     for ( q = p ; isalnum(*q) || (*q == '-') ; ++q ) ;
  663     header = apr_pstrndup(r->pool, p, q-p) ;
  664     if ( strncasecmp(header, "Content-", 8) ) {
  665 /* find content=... string */
  666       p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so,
  667             pmatch[0].rm_eo - pmatch[0].rm_so);
  668       /* if it doesn't contain "content", ignore, don't crash! */
  669       if (p != NULL) {
  670         while (*p) {
  671       p += 7 ;
  672       while ( *p && isspace(*p) )
  673         ++p ;
  674       if ( *p != '=' )
  675         continue ;
  676       while ( *p && isspace(*++p) ) ;
  677       if ( ( *p == '\'' ) || ( *p == '"' ) ) {
  678         delim = *p++ ;
  679         for ( q = p ; *q != delim ; ++q ) ;
  680       } else {
  681         for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
  682       }
  683       content = apr_pstrndup(r->pool, p, q-p) ;
  684       break ;
  685         }
  686       }
  687     } else if ( !strncasecmp(header, "Content-Type", 12) ) {
  688       ret = apr_palloc(r->pool, sizeof(meta) ) ;
  689       ret->start = pmatch[0].rm_so ;
  690       ret->end = pmatch[0].rm_eo ;
  691     }
  692     if ( header && content ) {
  693       VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
  694     "Adding header [%s: %s] from HTML META", header, content) ) ; 
  695       apr_table_setn(r->headers_out, header, content) ;
  696     }
  697     offs += pmatch[0].rm_eo ;
  698   }
  699   return ret ;
  700 }
  701 
  702 static const char* interpolate_vars(request_rec* r, const char* str) {
  703   const char* start;
  704   const char* end;
  705   const char* delim;
  706   const char* before;
  707   const char* after;
  708   const char* replacement;
  709   const char* var;
  710   for (;;) {
  711     start = str ;
  712     if (start = ap_strstr_c(start, "${"), start == NULL)
  713       break;
  714 
  715     if (end = ap_strchr_c(start+2, '}'), end == NULL)
  716       break;
  717 
  718     delim = ap_strchr_c(start, '|');
  719     before = apr_pstrndup(r->pool, str, start-str);
  720     after = end+1;
  721     if (delim) {
  722       var = apr_pstrndup(r->pool, start+2, delim-start-2) ;
  723     } else {
  724       var = apr_pstrndup(r->pool, start+2, end-start-2) ;
  725     }
  726     replacement = apr_table_get(r->subprocess_env, var) ;
  727     if (!replacement) {
  728       if (delim)
  729     replacement = apr_pstrndup(r->pool, delim+1, end-delim-1);
  730       else
  731     replacement = "";
  732     }
  733     str = apr_pstrcat(r->pool, before, replacement, after, NULL);
  734     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
  735             "Interpolating %s  =>  %s", var, replacement) ;
  736   }
  737   return str;
  738 }
  739 static void fixup_rules(saxctxt* ctx) {
  740   const char* thisval;
  741   urlmap* newp;
  742   urlmap* p;
  743   urlmap* prev = NULL;
  744   request_rec* r = ctx->f->r;
  745   int has_cond;
  746 
  747   for (p = ctx->cfg->map; p; p = p->next) {
  748     has_cond = -1;
  749     if (p->cond != NULL) {
  750       thisval = apr_table_get(r->subprocess_env, p->cond->env);
  751       if (!p->cond->val) {
  752     /* required to be "anything" */
  753     if (thisval)
  754       has_cond = 1; /* satisfied */
  755     else
  756       has_cond = 0; /* unsatisfied */
  757       } else {
  758     if (thisval && !strcasecmp(p->cond->val, thisval)) {
  759       has_cond = 1; /* satisfied */
  760     } else {
  761       has_cond = 0; /* unsatisfied */
  762     }
  763       }
  764       if (((has_cond == 0) && (p->cond->rel ==1 ))
  765     || ((has_cond == 1) && (p->cond->rel == -1))) {
  766     continue;  /* condition is unsatisfied */
  767       }
  768     }
  769 
  770     newp = apr_pmemdup(r->pool, p, sizeof(urlmap));
  771 
  772     if (newp->flags & M_INTERPOLATE_FROM) {
  773       newp->from.c = interpolate_vars(r, newp->from.c);
  774       if (!newp->from.c || !*newp->from.c)
  775     continue;   /* don't use empty from-pattern */
  776       if (newp->flags & M_REGEX) {
  777         newp->from.r = ap_pregcomp(r->pool, newp->from.c, newp->regflags) ;
  778       }
  779     }
  780     if (newp->flags & M_INTERPOLATE_TO) {
  781       newp->to = interpolate_vars(r, newp->to);
  782     }
  783     /* evaluate p->cond; continue if unsatisfied */
  784     /* create new urlmap with memcpy and append to map */
  785     /* interpolate from if flagged to do so */
  786     /* interpolate to if flagged to do so */
  787 
  788     if (prev != NULL)
  789       prev->next = newp ;
  790     else
  791       ctx->map = newp ;
  792     prev = newp ;
  793   }
  794 
  795   if (prev)
  796     prev->next = NULL;
  797 }
  798 static saxctxt* check_filter_init (ap_filter_t* f) {
  799   saxctxt* fctx ;
  800   if ( ! f->ctx) {
  801     proxy_html_conf* cfg
  802     = ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
  803     const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
  804 
  805     const char* errmsg = NULL ;
  806     if ( !force ) {
  807       if ( ! f->r->proxyreq ) {
  808         errmsg = "Non-proxy request; not inserting proxy-html filter" ;
  809       } else if ( ! f->r->content_type ) {
  810         errmsg = "No content-type; bailing out of proxy-html filter" ;
  811       } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
  812         strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
  813         errmsg = "Non-HTML content; not inserting proxy-html filter" ;
  814       }
  815     }
  816     if (!cfg->links) {
  817       errmsg = "No links configured: nothing for proxy-html filter to do";
  818     }
  819 
  820     if ( errmsg ) {
  821 #ifndef GO_FASTER
  822       if ( cfg->verbose ) {
  823         ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "%s", errmsg) ;
  824       }
  825 #endif
  826       ap_remove_output_filter(f) ;
  827       return NULL ;
  828     }
  829 
  830     fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
  831     fctx->f = f ;
  832     fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
  833     fctx->cfg = cfg;
  834     apr_table_unset(f->r->headers_out, "Content-Length") ;
  835 
  836     if (cfg->interp)
  837       fixup_rules(fctx);
  838     else
  839       fctx->map = cfg->map;
  840     /* defer dealing with charset_out until after sniffing charset_in
  841      * so we can support setting one to t'other.
  842     */
  843   }
  844   return f->ctx ;
  845 }
  846 static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
  847   apr_bucket* b ;
  848   meta* m = NULL ;
  849   xmlCharEncoding enc ;
  850   const char* buf = 0 ;
  851   apr_size_t bytes = 0 ;
  852 #ifndef USE_OLD_LIBXML2
  853   int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET |
  854     XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING ;
  855 #endif
  856 
  857   saxctxt* ctxt = check_filter_init(f) ;
  858 #ifndef GO_FASTER
  859   int verbose;
  860 #endif
  861   if ( ! ctxt )
  862     return ap_pass_brigade(f->next, bb) ;
  863 #ifndef GO_FASTER
  864   verbose = ctxt->cfg->verbose;
  865 #endif
  866 
  867   for ( b = APR_BRIGADE_FIRST(bb) ;
  868     b != APR_BRIGADE_SENTINEL(bb) ;
  869     b = APR_BUCKET_NEXT(b) ) {
  870     if ( APR_BUCKET_IS_METADATA(b) ) {
  871       if ( APR_BUCKET_IS_EOS(b) ) {
  872         if ( ctxt->parser != NULL ) {
  873       consume_buffer(ctxt, buf, 0, 1);
  874         }
  875         APR_BRIGADE_INSERT_TAIL(ctxt->bb,
  876       apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
  877         ap_pass_brigade(ctxt->f->next, ctxt->bb) ;
  878       } else if ( APR_BUCKET_IS_FLUSH(b) ) {
  879         /* pass on flush, except at start where it would cause
  880          * headers to be sent before doc sniffing
  881          */
  882         if ( ctxt->parser != NULL ) {
  883       ap_fflush(ctxt->f->next, ctxt->bb) ;
  884         }
  885       }
  886     } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
  887           == APR_SUCCESS ) {
  888       if ( ctxt->parser == NULL ) {
  889     const char* cenc;
  890     if (!xml2enc_charset ||
  891         (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) {
  892       if (!xml2enc_charset)
  893         ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
  894         "No i18n support found.  Install mod_xml2enc if required") ;
  895       enc = XML_CHAR_ENCODING_NONE;
  896       ap_set_content_type(f->r, "text/html;charset=utf-8") ;
  897     } else {
  898           /* if we wanted a non-default charset_out, insert the
  899        * xml2enc filter now that we've sniffed it
  900        */
  901       if (ctxt->cfg->charset_out && xml2enc_filter) {
  902         if (*ctxt->cfg->charset_out != '*')
  903               cenc = ctxt->cfg->charset_out;
  904         xml2enc_filter(f->r, cenc, ENCIO_OUTPUT);
  905         ap_set_content_type(f->r,
  906         apr_pstrcat(f->r->pool, "text/html;charset=", cenc, NULL)) ;
  907       } else /* Normal case, everything worked, utf-8 output */
  908         ap_set_content_type(f->r, "text/html;charset=utf-8") ;
  909     }
  910 
  911     ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
  912     ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, 0, enc) ;
  913     buf += 4;
  914     bytes -= 4;
  915     if (ctxt->parser == NULL) {
  916           apr_status_t rv = ap_pass_brigade(f->next, bb) ;
  917           ap_remove_output_filter(f) ;
  918       return rv;
  919     }
  920     apr_pool_cleanup_register(f->r->pool, ctxt->parser,
  921         (int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null) ;
  922 #ifndef USE_OLD_LIBXML2
  923     if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
  924       ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
  925         "Unsupported parser opts %x", xmlopts) ;
  926 #endif
  927     if ( ctxt->cfg->metafix )
  928 #ifndef GO_FASTER
  929       m = metafix(f->r, buf, ctxt->cfg->verbose) ;
  930 #else
  931       m = metafix(f->r, buf) ;
  932 #endif
  933     if ( m ) {
  934       consume_buffer(ctxt, buf, m->start, 0) ;
  935       consume_buffer(ctxt, buf+m->end, bytes-m->end, 0) ;
  936     } else {
  937       consume_buffer(ctxt, buf, bytes, 0) ;
  938     }
  939       } else {
  940     consume_buffer(ctxt, buf, bytes, 0) ;
  941       }
  942     } else {
  943       ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ;
  944     }
  945   }
  946   /*ap_fflush(ctxt->f->next, ctxt->bb) ;    // uncomment for debug */
  947   apr_brigade_cleanup(bb) ;
  948   return APR_SUCCESS ;
  949 }
  950 
  951 static void* proxy_html_config(apr_pool_t* pool, char* x) {
  952   proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ;
  953   ret->doctype = DEFAULT_DOCTYPE ;
  954   ret->etag = DEFAULT_ETAG ;
  955   ret->bufsz = 8192 ;
  956   /* ret->interp = 1; */
  957   /* don't initialise links and events until they get set/used */
  958   return ret ;
  959 }
  960 static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
  961   proxy_html_conf* base = (proxy_html_conf*) BASE ;
  962   proxy_html_conf* add = (proxy_html_conf*) ADD ;
  963   proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ;
  964 
  965   /* don't merge declarations - just use the most specific */
  966   conf->links = (add->links == NULL) ? base->links : add->links;
  967   conf->events = (add->events == NULL) ? base->events : add->events;
  968 
  969   conf->charset_out = (add->charset_out == NULL)
  970     ? base->charset_out : add->charset_out ;
  971 
  972   if ( add->map && base->map ) {
  973     urlmap* a ;
  974     conf->map = NULL ;
  975     for ( a = base->map ; a ; a = a->next ) {
  976       urlmap* save = conf->map ;
  977       conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
  978       conf->map->next = save ;
  979     }
  980     for ( a = add->map ; a ; a = a->next ) {
  981       urlmap* save = conf->map ;
  982       conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
  983       conf->map->next = save ;
  984     }
  985   } else
  986     conf->map = add->map ? add->map : base->map ;
  987 
  988   conf->doctype = ( add->doctype == DEFAULT_DOCTYPE )
  989         ? base->doctype : add->doctype ;
  990   conf->etag = ( add->etag == DEFAULT_ETAG ) ? base->etag : add->etag ;
  991   conf->bufsz = add->bufsz ;
  992   if ( add->flags & NORM_RESET ) {
  993     conf->flags = add->flags ^ NORM_RESET ;
  994     conf->metafix = add->metafix ;
  995     conf->extfix = add->extfix ;
  996     conf->interp = add->interp ;
  997     conf->strip_comments = add->strip_comments ;
  998     conf->enabled = add->enabled;
  999 #ifndef GO_FASTER
 1000     conf->verbose = add->verbose ;
 1001 #endif
 1002   } else {
 1003     conf->flags = base->flags | add->flags ;
 1004     conf->metafix = base->metafix | add->metafix ;
 1005     conf->extfix = base->extfix | add->extfix ;
 1006     conf->interp = base->interp | add->interp ;
 1007     conf->strip_comments = base->strip_comments | add->strip_comments ;
 1008     conf->enabled = add->enabled | base->enabled;
 1009 #ifndef GO_FASTER
 1010     conf->verbose = base->verbose | add->verbose ;
 1011 #endif
 1012   }
 1013   return conf ;
 1014 }
 1015 #define REGFLAG(n,s,c) ( (s&&(ap_strchr_c((s),(c))!=NULL)) ? (n) : 0 )
 1016 #define XREGFLAG(n,s,c) ( (!s||(ap_strchr_c((s),(c))==NULL)) ? (n) : 0 )
 1017 static void comp_urlmap(apr_pool_t* pool, urlmap* newmap,
 1018     const char* from, const char* to, const char* flags, const char* cond) {
 1019   char* eq;
 1020   newmap->flags
 1021     = XREGFLAG(M_HTML,flags,'h')
 1022     | XREGFLAG(M_EVENTS,flags,'e')
 1023     | XREGFLAG(M_CDATA,flags,'c')
 1024     | REGFLAG(M_ATSTART,flags,'^')
 1025     | REGFLAG(M_ATEND,flags,'$')
 1026     | REGFLAG(M_REGEX,flags,'R')
 1027     | REGFLAG(M_LAST,flags,'L')
 1028     | REGFLAG(M_NOTLAST,flags,'l')
 1029     | REGFLAG(M_INTERPOLATE_TO,flags,'V')
 1030     | REGFLAG(M_INTERPOLATE_FROM,flags,'v')
 1031   ;
 1032   if ( ( newmap->flags & M_INTERPOLATE_FROM)
 1033         || ! (newmap->flags & M_REGEX) ) {
 1034     newmap->from.c = from ;
 1035     newmap->to = to ;
 1036   } else {
 1037     newmap->regflags
 1038     = REGFLAG(AP_REG_EXTENDED,flags,'x')
 1039     | REGFLAG(AP_REG_ICASE,flags,'i')
 1040     | REGFLAG(AP_REG_NOSUB,flags,'n')
 1041     | REGFLAG(AP_REG_NEWLINE,flags,'s')
 1042     ;
 1043     newmap->from.r = ap_pregcomp(pool, from, newmap->regflags) ;
 1044     newmap->to = to ;
 1045   }
 1046   if (cond != NULL) {
 1047     char* cond_copy;
 1048     newmap->cond = apr_pcalloc(pool, sizeof(rewritecond));
 1049     if (cond[0] == '!') {
 1050       newmap->cond->rel = -1;
 1051       newmap->cond->env = cond_copy = apr_pstrdup(pool, cond+1);
 1052     } else {
 1053       newmap->cond->rel = 1;
 1054       newmap->cond->env = cond_copy = apr_pstrdup(pool, cond);
 1055     }
 1056     eq = ap_strchr(++cond_copy, '=');
 1057     if (eq) {
 1058       *eq = 0;
 1059       newmap->cond->val = eq+1;
 1060     }
 1061   } else {
 1062     newmap->cond = NULL;
 1063   }
 1064 }
 1065 static const char* set_urlmap(cmd_parms* cmd, void* CFG, const char* args) {
 1066   proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
 1067   urlmap* map ;
 1068   apr_pool_t* pool = cmd->pool;
 1069   urlmap* newmap ;
 1070   const char* usage =
 1071     "Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]";
 1072   const char* from;
 1073   const char* to;
 1074   const char* flags;
 1075   const char* cond = NULL;
 1076   
 1077   if (from = ap_getword_conf(cmd->pool, &args), !from)
 1078     return usage;
 1079   if (to = ap_getword_conf(cmd->pool, &args), !to)
 1080     return usage;
 1081   flags = ap_getword_conf(cmd->pool, &args);
 1082   if (flags && *flags)
 1083     cond = ap_getword_conf(cmd->pool, &args);
 1084   if (cond && !*cond)
 1085     cond = NULL;
 1086 
 1087   /* the args look OK, so let's use them */
 1088   newmap = apr_palloc(pool, sizeof(urlmap) ) ;
 1089   newmap->next = NULL;
 1090   if ( cfg->map ) {
 1091     for ( map = cfg->map ; map->next ; map = map->next ) ;
 1092     map->next = newmap ;
 1093   } else
 1094     cfg->map = newmap ;
 1095 
 1096   comp_urlmap(cmd->pool, newmap, from, to, flags, cond);
 1097   return NULL;
 1098 }
 1099 
 1100 static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t,
 1101     const char* l) {
 1102   proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
 1103   if ( !strcasecmp(t, "xhtml") ) {
 1104     cfg->etag = xhtml_etag ;
 1105     if ( l && !strcasecmp(l, "legacy") )
 1106       cfg->doctype = fpi_xhtml_legacy ;
 1107     else
 1108       cfg->doctype = fpi_xhtml ;
 1109   } else if ( !strcasecmp(t, "html") ) {
 1110     cfg->etag = html_etag ;
 1111     if ( l && !strcasecmp(l, "legacy") )
 1112       cfg->doctype = fpi_html_legacy ;
 1113     else
 1114       cfg->doctype = fpi_html ;
 1115   } else {
 1116     cfg->doctype = apr_pstrdup(cmd->pool, t) ;
 1117     if ( l && ( ( l[0] == 'x' ) || ( l[0] == 'X' ) ) )
 1118       cfg->etag = xhtml_etag ;
 1119     else
 1120       cfg->etag = html_etag ;
 1121   }
 1122   return NULL ;
 1123 }
 1124 static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg) {
 1125   proxy_html_conf* cfg = CFG;
 1126   if ( arg && *arg ) {
 1127     if ( !strcmp(arg, "lowercase") )
 1128       cfg->flags |= NORM_LC ;
 1129     else if ( !strcmp(arg, "dospath") )
 1130       cfg->flags |= NORM_MSSLASH ;
 1131     else if ( !strcmp(arg, "reset") )
 1132       cfg->flags |= NORM_RESET ;
 1133   }
 1134   return NULL ;
 1135 }
 1136 static const char* set_events(cmd_parms* cmd, void* CFG, const char* arg) {
 1137   tattr* attr;
 1138   proxy_html_conf* cfg = CFG;
 1139   if (cfg->events == NULL)
 1140     cfg->events = apr_array_make(cmd->pool, 20, sizeof(tattr));
 1141   attr = apr_array_push(cfg->events) ;
 1142   attr->val = arg;
 1143   return NULL ;
 1144 }
 1145 static const char* set_links(cmd_parms* cmd, void* CFG,
 1146     const char* elt, const char* att) {
 1147   apr_array_header_t* attrs;
 1148   tattr* attr ;
 1149   proxy_html_conf* cfg = CFG;
 1150 
 1151   if (cfg->links == NULL)
 1152     cfg->links = apr_hash_make(cmd->pool);
 1153 
 1154   attrs = apr_hash_get(cfg->links, elt, APR_HASH_KEY_STRING) ;
 1155   if (!attrs) {
 1156     attrs = apr_array_make(cmd->pool, 2, sizeof(tattr*)) ;
 1157     apr_hash_set(cfg->links, elt, APR_HASH_KEY_STRING, attrs) ;
 1158   }
 1159   attr = apr_array_push(attrs) ;
 1160   attr->val = att ;
 1161   return NULL ;
 1162 }
 1163 static const command_rec proxy_html_cmds[] = {
 1164   AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL,
 1165     RSRC_CONF|ACCESS_CONF, "Strings to be treated as scripting events"),
 1166   AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL,
 1167     RSRC_CONF|ACCESS_CONF, "Declare HTML Attributes"),
 1168   AP_INIT_RAW_ARGS("ProxyHTMLURLMap", set_urlmap, NULL,
 1169     RSRC_CONF|ACCESS_CONF, "Map URL From To" ) ,
 1170   AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL,
 1171     RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]" ) ,
 1172   AP_INIT_ITERATE("ProxyHTMLFixups", set_flags, NULL,
 1173     RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath" ) ,
 1174   AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot,
 1175     (void*)APR_OFFSETOF(proxy_html_conf, metafix),
 1176     RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements" ) ,
 1177   AP_INIT_FLAG("ProxyHTMLInterp", ap_set_flag_slot,
 1178     (void*)APR_OFFSETOF(proxy_html_conf, interp),
 1179     RSRC_CONF|ACCESS_CONF,
 1180     "Support interpolation and conditions in URLMaps" ) ,
 1181   AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot,
 1182     (void*)APR_OFFSETOF(proxy_html_conf, extfix),
 1183     RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS" ) ,
 1184   AP_INIT_FLAG("ProxyHTMLStripComments", ap_set_flag_slot,
 1185     (void*)APR_OFFSETOF(proxy_html_conf, strip_comments),
 1186     RSRC_CONF|ACCESS_CONF, "Strip out comments" ) ,
 1187 #ifndef GO_FASTER
 1188   AP_INIT_FLAG("ProxyHTMLLogVerbose", ap_set_flag_slot,
 1189     (void*)APR_OFFSETOF(proxy_html_conf, verbose),
 1190     RSRC_CONF|ACCESS_CONF, "Verbose Logging (use with LogLevel Info)" ) ,
 1191 #endif
 1192   AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot,
 1193     (void*)APR_OFFSETOF(proxy_html_conf, bufsz),
 1194     RSRC_CONF|ACCESS_CONF, "Buffer size" ) ,
 1195   AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot,
 1196     (void*)APR_OFFSETOF(proxy_html_conf, charset_out),
 1197     RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset" ) ,
 1198   AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot,
 1199     (void*)APR_OFFSETOF(proxy_html_conf, enabled),
 1200     RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters" ) ,
 1201   { NULL }
 1202 } ;
 1203 static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2,
 1204     server_rec* s) {
 1205   ap_add_version_component(p, VERSION_STRING) ;
 1206   seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>",
 1207     AP_REG_EXTENDED|AP_REG_ICASE) ;
 1208   seek_content = apr_strmatch_precompile(p, "content", 0);
 1209   memset(&sax, 0, sizeof(htmlSAXHandler));
 1210   sax.startElement = pstartElement ;
 1211   sax.endElement = pendElement ;
 1212   sax.characters = pcharacters ;
 1213   sax.comment = pcomment ;
 1214   sax.cdataBlock = pcdata ;
 1215   xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset);
 1216   xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter);
 1217   if (!xml2enc_charset) {
 1218     ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2,
 1219       "I18n support in mod_proxy_html requires mod_xml2enc. "
 1220       "Without it, non-ASCII characters in proxied pages are "
 1221       "likely to display incorrectly.");
 1222   }
 1223   return OK ;
 1224 }
 1225 static void proxy_html_insert(request_rec* r) {
 1226   proxy_html_conf* cfg
 1227     = ap_get_module_config(r->per_dir_config, &proxy_html_module);
 1228   if (cfg->enabled) {
 1229     if (xml2enc_filter)
 1230       xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS);
 1231     ap_add_output_filter("proxy-html", NULL, r, r->connection);
 1232   }
 1233 }
 1234 static void proxy_html_hooks(apr_pool_t* p) {
 1235   static const char* aszSucc[] = { "mod_filter.c", NULL };
 1236   ap_register_output_filter_protocol("proxy-html", proxy_html_filter,
 1237     NULL, AP_FTYPE_RESOURCE,
 1238     AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH) ;
 1239   ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ;
 1240   ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE) ;
 1241 }
 1242 module AP_MODULE_DECLARE_DATA proxy_html_module = {
 1243     STANDARD20_MODULE_STUFF,
 1244     proxy_html_config,
 1245     proxy_html_merge,
 1246     NULL,
 1247     NULL,
 1248     proxy_html_cmds,
 1249     proxy_html_hooks
 1250 } ;