"Fossies" - the Fresh Open Source Software Archive 
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /********************************************************************
2 Copyright (c) 2003-9, WebThing Ltd
3 Author: Nick Kew <nick@webthing.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License Version 2,
7 as published by the Free Software Foundation.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You can obtain a copy of the GNU General Poblic License Version 2
15 from http://www.gnu.org/licenses/old-licenses/gpl-2.0.html or
16 http://apache.webthing.com/COPYING.txt
17
18 *********************************************************************/
19
20 /**** NOTICE TO PACKAGERS
21 *
22 * This module now relies on mod_xml2enc for i18n support.
23 * You should make mod_xml2enc a dependency in your packages.
24 */
25
26 /* End of Notices */
27
28
29
30
31 /* GO_FASTER
32
33 You can #define GO_FASTER to disable informational logging.
34 This disables the ProxyHTMLLogVerbose option altogether.
35
36 Default is to leave it undefined, and enable verbose logging
37 as a configuration option. Binaries are supplied with verbose
38 logging enabled.
39 */
40
41 #ifdef GO_FASTER
42 #define VERBOSE(x)
43 #define VERBOSEB(x)
44 #else
45 #define VERBOSE(x) if (verbose) x
46 #define VERBOSEB(x) if (verbose) {x}
47 #endif
48
49 /* 3.1.2 - trivial changes to fix compile on Windows */
50 #define VERSION_STRING "proxy_html/3.1.2"
51
52 #include <ctype.h>
53
54 /* libxml2 */
55 #include <libxml/HTMLparser.h>
56
57 /* apache */
58 #include <http_protocol.h>
59 #include <http_config.h>
60 #include <http_log.h>
61 #include <apr_strings.h>
62 #include <apr_hash.h>
63 #include <apr_strmatch.h>
64
65 #include <apr_optional.h>
66 #include <mod_xml2enc.h>
67 #include <http_request.h>
68
69 /* To support Apache 2.1/2.2, we need the ap_ forms of the
70 * regexp stuff, and they're now used in the code.
71 * To support 2.0 in the same compile, * we #define the
72 * AP_ versions if necessary.
73 */
74 #ifndef AP_REG_ICASE
75 /* it's 2.0, so we #define the ap_ versions */
76 #define ap_regex_t regex_t
77 #define ap_regmatch_t regmatch_t
78 #define AP_REG_EXTENDED REG_EXTENDED
79 #define AP_REG_ICASE REG_ICASE
80 #define AP_REG_NOSUB REG_NOSUB
81 #define AP_REG_NEWLINE REG_NEWLINE
82 #define APACHE20
83 #define ap_register_output_filter_protocol(a,b,c,d,e) ap_register_output_filter(a,b,c,d)
84 #else
85 #define APACHE22
86 #endif
87
88 /* globals set once at startup */
89 static ap_regex_t* seek_meta ;
90 static const apr_strmatch_pattern* seek_content ;
91 static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
92 static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
93
94 module AP_MODULE_DECLARE_DATA proxy_html_module ;
95
96 #define M_HTML 0x01
97 #define M_EVENTS 0x02
98 #define M_CDATA 0x04
99 #define M_REGEX 0x08
100 #define M_ATSTART 0x10
101 #define M_ATEND 0x20
102 #define M_LAST 0x40
103 #define M_NOTLAST 0x80
104 #define M_INTERPOLATE_TO 0x100
105 #define M_INTERPOLATE_FROM 0x200
106
107 typedef struct {
108 const char* val;
109 } tattr;
110 typedef struct {
111 unsigned int start ;
112 unsigned int end ;
113 } meta ;
114 typedef struct {
115 const char* env;
116 const char* val;
117 int rel;
118 } rewritecond;
119 typedef struct urlmap {
120 struct urlmap* next ;
121 unsigned int flags ;
122 unsigned int regflags ;
123 union {
124 const char* c ;
125 ap_regex_t* r ;
126 } from ;
127 const char* to ;
128 rewritecond* cond;
129 } urlmap ;
130 typedef struct {
131 urlmap* map ;
132 const char* doctype ;
133 const char* etag ;
134 unsigned int flags ;
135 size_t bufsz ;
136 apr_hash_t* links;
137 apr_array_header_t* events;
138 const char* charset_out;
139 int extfix ;
140 int metafix ;
141 int strip_comments ;
142 int interp;
143 int enabled;
144 #ifndef GO_FASTER
145 int verbose ;
146 #endif
147 } proxy_html_conf ;
148 typedef struct {
149 ap_filter_t* f ;
150 proxy_html_conf* cfg ;
151 htmlParserCtxtPtr parser ;
152 apr_bucket_brigade* bb ;
153 char* buf ;
154 size_t offset ;
155 size_t avail ;
156 const char* encoding;
157 urlmap* map;
158 } saxctxt ;
159
160
161 #define NORM_LC 0x1
162 #define NORM_MSSLASH 0x2
163 #define NORM_RESET 0x4
164 static htmlSAXHandler sax ;
165
166 typedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t ;
167
168 static const char* const fpi_html =
169 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
170 static const char* const fpi_html_legacy =
171 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
172 static const char* const fpi_xhtml =
173 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
174 static const char* const fpi_xhtml_legacy =
175 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
176 static const char* const html_etag = ">" ;
177 static const char* const xhtml_etag = " />" ;
178 /*#define DEFAULT_DOCTYPE fpi_html */
179 static const char* const DEFAULT_DOCTYPE = "" ;
180 #define DEFAULT_ETAG html_etag
181
182 static void normalise(unsigned int flags, char* str) {
183 char* p ;
184 if ( flags & NORM_LC )
185 for ( p = str ; *p ; ++p )
186 if ( isupper(*p) )
187 *p = tolower(*p) ;
188
189 if ( flags & NORM_MSSLASH )
190 for ( p = ap_strchr(str, '\\') ; p ; p = ap_strchr(p+1, '\\') )
191 *p = '/' ;
192
193 }
194 #define consume_buffer(ctx,inbuf,bytes,flag) \
195 htmlParseChunk(ctx->parser, inbuf, bytes, flag)
196
197 #define AP_fwrite(ctx,inbuf,bytes,flush) \
198 ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
199
200 /* This is always utf-8 on entry. We can convert charset within FLUSH */
201 #define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0) ; begin = i+1
202 static void pcharacters(void* ctxt, const xmlChar *uchars, int length) {
203 const char* chars = (const char*) uchars;
204 saxctxt* ctx = (saxctxt*) ctxt ;
205 int i ;
206 int begin ;
207 for ( begin=i=0; i<length; i++ ) {
208 switch (chars[i]) {
209 case '&' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&") ; break ;
210 case '<' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "<") ; break ;
211 case '>' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, ">") ; break ;
212 case '"' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, """) ; break ;
213 default : break ;
214 }
215 }
216 FLUSH ;
217 }
218 static void preserve(saxctxt* ctx, const size_t len) {
219 char* newbuf ;
220 if ( len <= ( ctx->avail - ctx->offset ) )
221 return ;
222 else while ( len > ( ctx->avail - ctx->offset ) )
223 ctx->avail += ctx->cfg->bufsz ;
224
225 newbuf = realloc(ctx->buf, ctx->avail) ;
226 if ( newbuf != ctx->buf ) {
227 if ( ctx->buf )
228 apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (int(*)(void*))free);
229 apr_pool_cleanup_register(ctx->f->r->pool, newbuf,
230 (int(*)(void*))free, apr_pool_cleanup_null);
231 ctx->buf = newbuf ;
232 }
233 }
234 static void pappend(saxctxt* ctx, const char* buf, const size_t len) {
235 preserve(ctx, len) ;
236 memcpy(ctx->buf+ctx->offset, buf, len) ;
237 ctx->offset += len ;
238 }
239 static void dump_content(saxctxt* ctx) {
240 urlmap* m ;
241 char* found ;
242 size_t s_from, s_to ;
243 size_t match ;
244 char c = 0 ;
245 int nmatch ;
246 ap_regmatch_t pmatch[10] ;
247 char* subs ;
248 size_t len, offs ;
249 urlmap* themap = ctx->map;
250 #ifndef GO_FASTER
251 int verbose = ctx->cfg->verbose ;
252 #endif
253
254 pappend(ctx, &c, 1) ; /* append null byte */
255 /* parse the text for URLs */
256 for ( m = themap ; m ; m = m->next ) {
257 if ( ! ( m->flags & M_CDATA ) )
258 continue ;
259 if ( m->flags & M_REGEX ) {
260 nmatch = 10 ;
261 offs = 0 ;
262 while ( ! ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0) ) {
263 match = pmatch[0].rm_so ;
264 s_from = pmatch[0].rm_eo - match ;
265 subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
266 nmatch, pmatch) ;
267 s_to = strlen(subs) ;
268 len = strlen(ctx->buf) ;
269 offs += match ;
270 VERBOSEB(
271 const char* f = apr_pstrndup(ctx->f->r->pool,
272 ctx->buf + offs , s_from ) ;
273 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
274 "C/RX: match at %s, substituting %s", f, subs) ;
275 )
276 if ( s_to > s_from) {
277 preserve(ctx, s_to - s_from) ;
278 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
279 len + 1 - s_from - offs) ;
280 memcpy(ctx->buf+offs, subs, s_to) ;
281 } else {
282 memcpy(ctx->buf + offs, subs, s_to) ;
283 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
284 len + 1 - s_from - offs) ;
285 }
286 offs += s_to ;
287 }
288 } else {
289 s_from = strlen(m->from.c) ;
290 s_to = strlen(m->to) ;
291 for ( found = strstr(ctx->buf, m->from.c) ; found ;
292 found = strstr(ctx->buf+match+s_to, m->from.c) ) {
293 match = found - ctx->buf ;
294 if ( ( m->flags & M_ATSTART ) && ( match != 0) )
295 break ;
296 len = strlen(ctx->buf) ;
297 if ( ( m->flags & M_ATEND ) && ( match < (len - s_from) ) )
298 continue ;
299 VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
300 "C: matched %s, substituting %s", m->from.c, m->to) ) ;
301 if ( s_to > s_from ) {
302 preserve(ctx, s_to - s_from) ;
303 memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
304 len + 1 - s_from - match) ;
305 memcpy(ctx->buf+match, m->to, s_to) ;
306 } else {
307 memcpy(ctx->buf+match, m->to, s_to) ;
308 memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
309 len + 1 - s_from - match) ;
310 }
311 }
312 }
313 }
314 AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1) ;
315 }
316 static void pcdata(void* ctxt, const xmlChar *uchars, int length) {
317 const char* chars = (const char*) uchars;
318 saxctxt* ctx = (saxctxt*) ctxt ;
319 if ( ctx->cfg->extfix ) {
320 pappend(ctx, chars, length) ;
321 } else {
322 /* not sure if this should force-flush
323 * (i.e. can one cdata section come in multiple calls?)
324 */
325 AP_fwrite(ctx, chars, length, 0) ;
326 }
327 }
328 static void pcomment(void* ctxt, const xmlChar *uchars) {
329 const char* chars = (const char*) uchars;
330 saxctxt* ctx = (saxctxt*) ctxt ;
331 if ( ctx->cfg->strip_comments )
332 return ;
333
334 if ( ctx->cfg->extfix ) {
335 pappend(ctx, "<!--", 4) ;
336 pappend(ctx, chars, strlen(chars) ) ;
337 pappend(ctx, "-->", 3) ;
338 } else {
339 ap_fputs(ctx->f->next, ctx->bb, "<!--") ;
340 AP_fwrite(ctx, chars, strlen(chars), 1) ;
341 ap_fputs(ctx->f->next, ctx->bb, "-->") ;
342 }
343 }
344 static void pendElement(void* ctxt, const xmlChar* uname) {
345 saxctxt* ctx = (saxctxt*) ctxt ;
346 const char* name = (const char*) uname;
347 const htmlElemDesc* desc = htmlTagLookup(uname);
348
349 if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
350 /* enforce html */
351 if (!desc || desc->depr)
352 return;
353
354 } else if ((ctx->cfg->doctype == fpi_html)
355 || (ctx->cfg->doctype == fpi_xhtml)) {
356 /* enforce html legacy */
357 if (!desc)
358 return;
359 }
360 /* TODO - implement HTML "allowed here" using the stack */
361 /* nah. Keeping the stack is too much overhead */
362
363 if ( ctx->offset > 0 ) {
364 dump_content(ctx) ;
365 ctx->offset = 0 ; /* having dumped it, we can re-use the memory */
366 }
367 if ( !desc || ! desc->empty ) {
368 ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
369 }
370 }
371 static void pstartElement(void* ctxt, const xmlChar* uname,
372 const xmlChar** uattrs ) {
373
374 int required_attrs ;
375 int num_match ;
376 size_t offs, len ;
377 char* subs ;
378 rewrite_t is_uri ;
379 const char** a ;
380 urlmap* m ;
381 size_t s_to, s_from, match ;
382 char* found ;
383 saxctxt* ctx = (saxctxt*) ctxt ;
384 size_t nmatch ;
385 ap_regmatch_t pmatch[10] ;
386 #ifndef GO_FASTER
387 int verbose = ctx->cfg->verbose ;
388 #endif
389 apr_array_header_t *linkattrs;
390 int i;
391 const char* name = (const char*) uname;
392 const char** attrs = (const char**) uattrs;
393 const htmlElemDesc* desc = htmlTagLookup(uname);
394 urlmap* themap = ctx->map;
395 #ifdef HAVE_STACK
396 const void** descp;
397 #endif
398 int enforce = 0;
399 if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
400 /* enforce html */
401 enforce = 2;
402 if (!desc || desc->depr)
403 return;
404
405 } else if ((ctx->cfg->doctype == fpi_html)
406 || (ctx->cfg->doctype == fpi_xhtml)) {
407 enforce = 1;
408 /* enforce html legacy */
409 if (!desc) {
410 return;
411 }
412 }
413 if (!desc && enforce) {
414 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
415 "Bogus HTML element %s dropped", name) ;
416 return;
417 }
418 if (desc && desc->depr && (enforce == 2) ) {
419 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
420 "Deprecated HTML element %s dropped", name) ;
421 return;
422 }
423 #ifdef HAVE_STACK
424 descp = apr_array_push(ctx->stack);
425 *descp = desc;
426 /* TODO - implement HTML "allowed here" */
427 #endif
428
429 ap_fputc(ctx->f->next, ctx->bb, '<') ;
430 ap_fputs(ctx->f->next, ctx->bb, name) ;
431
432 required_attrs = 0;
433 if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL))
434 for (a = desc->attrs_req; *a; a++)
435 ++required_attrs;
436
437 if ( attrs ) {
438 linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING) ;
439 for ( a = attrs ; *a ; a += 2 ) {
440 if (desc && enforce > 0) {
441 switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) {
442 case HTML_INVALID:
443 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
444 "Bogus HTML attribute %s of %s dropped", *a, name);
445 continue;
446 case HTML_DEPRECATED:
447 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
448 "Deprecated HTML attribute %s of %s dropped", *a, name);
449 continue;
450 case HTML_REQUIRED:
451 required_attrs--; /* cross off the number still needed */
452 /* fallthrough - required implies valid */
453 default:
454 break;
455 }
456 }
457 ctx->offset = 0 ;
458 if ( a[1] ) {
459 pappend(ctx, a[1], strlen(a[1])+1) ;
460 is_uri = ATTR_IGNORE ;
461 if ( linkattrs ) {
462 tattr* attrs = (tattr*) linkattrs->elts;
463 for (i=0; i < linkattrs->nelts; ++i) {
464 if ( !strcmp(*a, attrs[i].val)) {
465 is_uri = ATTR_URI ;
466 break ;
467 }
468 }
469 }
470 if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix
471 && (ctx->cfg->events != NULL) ) {
472 for (i=0; i < ctx->cfg->events->nelts; ++i) {
473 tattr* attrs = (tattr*) ctx->cfg->events->elts;
474 if ( !strcmp(*a, attrs[i].val)) {
475 is_uri = ATTR_EVENT ;
476 break ;
477 }
478 }
479 }
480 switch ( is_uri ) {
481 case ATTR_URI:
482 num_match = 0 ;
483 for ( m = themap ; m ; m = m->next ) {
484 if ( ! ( m->flags & M_HTML ) )
485 continue ;
486 if ( m->flags & M_REGEX ) {
487 nmatch = 10 ;
488 if ( ! ap_regexec(m->from.r, ctx->buf, nmatch, pmatch, 0) ) {
489 ++num_match ;
490 offs = match = pmatch[0].rm_so ;
491 s_from = pmatch[0].rm_eo - match ;
492 subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf,
493 nmatch, pmatch) ;
494 VERBOSE( {
495 const char* f = apr_pstrndup(ctx->f->r->pool,
496 ctx->buf + offs , s_from ) ;
497 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
498 "H/RX: match at %s, substituting %s", f, subs) ;
499 } )
500 s_to = strlen(subs) ;
501 len = strlen(ctx->buf) ;
502 if ( s_to > s_from) {
503 preserve(ctx, s_to - s_from) ;
504 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
505 len + 1 - s_from - offs) ;
506 memcpy(ctx->buf+offs, subs, s_to) ;
507 } else {
508 memcpy(ctx->buf + offs, subs, s_to) ;
509 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
510 len + 1 - s_from - offs) ;
511 }
512 }
513 } else {
514 s_from = strlen(m->from.c) ;
515 if ( ! strncasecmp(ctx->buf, m->from.c, s_from ) ) {
516 ++num_match ;
517 s_to = strlen(m->to) ;
518 len = strlen(ctx->buf) ;
519 VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
520 "H: matched %s, substituting %s", m->from.c, m->to) ) ;
521 if ( s_to > s_from ) {
522 preserve(ctx, s_to - s_from) ;
523 memmove(ctx->buf+s_to, ctx->buf+s_from,
524 len + 1 - s_from ) ;
525 memcpy(ctx->buf, m->to, s_to) ;
526 } else { /* it fits in the existing space */
527 memcpy(ctx->buf, m->to, s_to) ;
528 memmove(ctx->buf+s_to, ctx->buf+s_from,
529 len + 1 - s_from) ;
530 }
531 break ;
532 }
533 }
534 /* URIs only want one match unless overridden in the config */
535 if ( (num_match > 0) && !( m->flags & M_NOTLAST ) )
536 break ;
537 }
538 break ;
539 case ATTR_EVENT:
540 for ( m = themap ; m ; m = m->next ) {
541 num_match = 0 ; /* reset here since we're working per-rule */
542 if ( ! ( m->flags & M_EVENTS ) )
543 continue ;
544 if ( m->flags & M_REGEX ) {
545 nmatch = 10 ;
546 offs = 0 ;
547 while ( ! ap_regexec(m->from.r, ctx->buf+offs,
548 nmatch, pmatch, 0) ) {
549 match = pmatch[0].rm_so ;
550 s_from = pmatch[0].rm_eo - match ;
551 subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
552 nmatch, pmatch) ;
553 VERBOSE( {
554 const char* f = apr_pstrndup(ctx->f->r->pool,
555 ctx->buf + offs , s_from ) ;
556 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
557 "E/RX: match at %s, substituting %s", f, subs) ;
558 } )
559 s_to = strlen(subs) ;
560 offs += match ;
561 len = strlen(ctx->buf) ;
562 if ( s_to > s_from) {
563 preserve(ctx, s_to - s_from) ;
564 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
565 len + 1 - s_from - offs) ;
566 memcpy(ctx->buf+offs, subs, s_to) ;
567 } else {
568 memcpy(ctx->buf + offs, subs, s_to) ;
569 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
570 len + 1 - s_from - offs) ;
571 }
572 offs += s_to ;
573 ++num_match ;
574 }
575 } else {
576 found = strstr(ctx->buf, m->from.c) ;
577 if ( (m->flags & M_ATSTART) && ( found != ctx->buf) )
578 continue ;
579 while ( found ) {
580 s_from = strlen(m->from.c) ;
581 s_to = strlen(m->to) ;
582 match = found - ctx->buf ;
583 if ( ( s_from < strlen(found) ) && (m->flags & M_ATEND ) ) {
584 found = strstr(ctx->buf+match+s_from, m->from.c) ;
585 continue ;
586 } else {
587 found = strstr(ctx->buf+match+s_to, m->from.c) ;
588 }
589 VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
590 "E: matched %s, substituting %s", m->from.c, m->to) ) ;
591 len = strlen(ctx->buf) ;
592 if ( s_to > s_from ) {
593 preserve(ctx, s_to - s_from) ;
594 memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
595 len + 1 - s_from - match) ;
596 memcpy(ctx->buf+match, m->to, s_to) ;
597 } else {
598 memcpy(ctx->buf+match, m->to, s_to) ;
599 memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
600 len + 1 - s_from - match) ;
601 }
602 ++num_match ;
603 }
604 }
605 if ( num_match && ( m->flags & M_LAST ) )
606 break ;
607 }
608 break ;
609 case ATTR_IGNORE:
610 break ;
611 }
612 }
613 if ( ! a[1] )
614 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
615 else {
616
617 if ( ctx->cfg->flags != 0 )
618 normalise(ctx->cfg->flags, ctx->buf) ;
619
620 /* write the attribute, using pcharacters to html-escape
621 anything that needs it in the value.
622 */
623 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL) ;
624 pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf)) ;
625 ap_fputc(ctx->f->next, ctx->bb, '"') ;
626 }
627 }
628 }
629 ctx->offset = 0 ;
630 if ( desc && desc->empty )
631 ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag) ;
632 else
633 ap_fputc(ctx->f->next, ctx->bb, '>') ;
634
635 if ((enforce > 0) && (required_attrs > 0)) {
636 /* if there are more required attributes than we found then complain */
637 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
638 "HTML element %s is missing %d required attributes",
639 name, required_attrs);
640 }
641 }
642
643 static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/
644 #ifndef GO_FASTER
645 , int verbose
646 #endif
647 ) {
648 meta* ret = NULL ;
649 size_t offs = 0 ;
650 const char* p ;
651 const char* q ;
652 char* header ;
653 char* content ;
654 ap_regmatch_t pmatch[2] ;
655 char delim ;
656
657 while ( ! ap_regexec(seek_meta, buf+offs, 2, pmatch, 0) ) {
658 header = NULL ;
659 content = NULL ;
660 p = buf+offs+pmatch[1].rm_eo ;
661 while ( !isalpha(*++p) ) ;
662 for ( q = p ; isalnum(*q) || (*q == '-') ; ++q ) ;
663 header = apr_pstrndup(r->pool, p, q-p) ;
664 if ( strncasecmp(header, "Content-", 8) ) {
665 /* find content=... string */
666 p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so,
667 pmatch[0].rm_eo - pmatch[0].rm_so);
668 /* if it doesn't contain "content", ignore, don't crash! */
669 if (p != NULL) {
670 while (*p) {
671 p += 7 ;
672 while ( *p && isspace(*p) )
673 ++p ;
674 if ( *p != '=' )
675 continue ;
676 while ( *p && isspace(*++p) ) ;
677 if ( ( *p == '\'' ) || ( *p == '"' ) ) {
678 delim = *p++ ;
679 for ( q = p ; *q != delim ; ++q ) ;
680 } else {
681 for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
682 }
683 content = apr_pstrndup(r->pool, p, q-p) ;
684 break ;
685 }
686 }
687 } else if ( !strncasecmp(header, "Content-Type", 12) ) {
688 ret = apr_palloc(r->pool, sizeof(meta) ) ;
689 ret->start = pmatch[0].rm_so ;
690 ret->end = pmatch[0].rm_eo ;
691 }
692 if ( header && content ) {
693 VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
694 "Adding header [%s: %s] from HTML META", header, content) ) ;
695 apr_table_setn(r->headers_out, header, content) ;
696 }
697 offs += pmatch[0].rm_eo ;
698 }
699 return ret ;
700 }
701
702 static const char* interpolate_vars(request_rec* r, const char* str) {
703 const char* start;
704 const char* end;
705 const char* delim;
706 const char* before;
707 const char* after;
708 const char* replacement;
709 const char* var;
710 for (;;) {
711 start = str ;
712 if (start = ap_strstr_c(start, "${"), start == NULL)
713 break;
714
715 if (end = ap_strchr_c(start+2, '}'), end == NULL)
716 break;
717
718 delim = ap_strchr_c(start, '|');
719 before = apr_pstrndup(r->pool, str, start-str);
720 after = end+1;
721 if (delim) {
722 var = apr_pstrndup(r->pool, start+2, delim-start-2) ;
723 } else {
724 var = apr_pstrndup(r->pool, start+2, end-start-2) ;
725 }
726 replacement = apr_table_get(r->subprocess_env, var) ;
727 if (!replacement) {
728 if (delim)
729 replacement = apr_pstrndup(r->pool, delim+1, end-delim-1);
730 else
731 replacement = "";
732 }
733 str = apr_pstrcat(r->pool, before, replacement, after, NULL);
734 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
735 "Interpolating %s => %s", var, replacement) ;
736 }
737 return str;
738 }
739 static void fixup_rules(saxctxt* ctx) {
740 const char* thisval;
741 urlmap* newp;
742 urlmap* p;
743 urlmap* prev = NULL;
744 request_rec* r = ctx->f->r;
745 int has_cond;
746
747 for (p = ctx->cfg->map; p; p = p->next) {
748 has_cond = -1;
749 if (p->cond != NULL) {
750 thisval = apr_table_get(r->subprocess_env, p->cond->env);
751 if (!p->cond->val) {
752 /* required to be "anything" */
753 if (thisval)
754 has_cond = 1; /* satisfied */
755 else
756 has_cond = 0; /* unsatisfied */
757 } else {
758 if (thisval && !strcasecmp(p->cond->val, thisval)) {
759 has_cond = 1; /* satisfied */
760 } else {
761 has_cond = 0; /* unsatisfied */
762 }
763 }
764 if (((has_cond == 0) && (p->cond->rel ==1 ))
765 || ((has_cond == 1) && (p->cond->rel == -1))) {
766 continue; /* condition is unsatisfied */
767 }
768 }
769
770 newp = apr_pmemdup(r->pool, p, sizeof(urlmap));
771
772 if (newp->flags & M_INTERPOLATE_FROM) {
773 newp->from.c = interpolate_vars(r, newp->from.c);
774 if (!newp->from.c || !*newp->from.c)
775 continue; /* don't use empty from-pattern */
776 if (newp->flags & M_REGEX) {
777 newp->from.r = ap_pregcomp(r->pool, newp->from.c, newp->regflags) ;
778 }
779 }
780 if (newp->flags & M_INTERPOLATE_TO) {
781 newp->to = interpolate_vars(r, newp->to);
782 }
783 /* evaluate p->cond; continue if unsatisfied */
784 /* create new urlmap with memcpy and append to map */
785 /* interpolate from if flagged to do so */
786 /* interpolate to if flagged to do so */
787
788 if (prev != NULL)
789 prev->next = newp ;
790 else
791 ctx->map = newp ;
792 prev = newp ;
793 }
794
795 if (prev)
796 prev->next = NULL;
797 }
798 static saxctxt* check_filter_init (ap_filter_t* f) {
799 saxctxt* fctx ;
800 if ( ! f->ctx) {
801 proxy_html_conf* cfg
802 = ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
803 const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
804
805 const char* errmsg = NULL ;
806 if ( !force ) {
807 if ( ! f->r->proxyreq ) {
808 errmsg = "Non-proxy request; not inserting proxy-html filter" ;
809 } else if ( ! f->r->content_type ) {
810 errmsg = "No content-type; bailing out of proxy-html filter" ;
811 } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
812 strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
813 errmsg = "Non-HTML content; not inserting proxy-html filter" ;
814 }
815 }
816 if (!cfg->links) {
817 errmsg = "No links configured: nothing for proxy-html filter to do";
818 }
819
820 if ( errmsg ) {
821 #ifndef GO_FASTER
822 if ( cfg->verbose ) {
823 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "%s", errmsg) ;
824 }
825 #endif
826 ap_remove_output_filter(f) ;
827 return NULL ;
828 }
829
830 fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
831 fctx->f = f ;
832 fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
833 fctx->cfg = cfg;
834 apr_table_unset(f->r->headers_out, "Content-Length") ;
835
836 if (cfg->interp)
837 fixup_rules(fctx);
838 else
839 fctx->map = cfg->map;
840 /* defer dealing with charset_out until after sniffing charset_in
841 * so we can support setting one to t'other.
842 */
843 }
844 return f->ctx ;
845 }
846 static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
847 apr_bucket* b ;
848 meta* m = NULL ;
849 xmlCharEncoding enc ;
850 const char* buf = 0 ;
851 apr_size_t bytes = 0 ;
852 #ifndef USE_OLD_LIBXML2
853 int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET |
854 XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING ;
855 #endif
856
857 saxctxt* ctxt = check_filter_init(f) ;
858 #ifndef GO_FASTER
859 int verbose;
860 #endif
861 if ( ! ctxt )
862 return ap_pass_brigade(f->next, bb) ;
863 #ifndef GO_FASTER
864 verbose = ctxt->cfg->verbose;
865 #endif
866
867 for ( b = APR_BRIGADE_FIRST(bb) ;
868 b != APR_BRIGADE_SENTINEL(bb) ;
869 b = APR_BUCKET_NEXT(b) ) {
870 if ( APR_BUCKET_IS_METADATA(b) ) {
871 if ( APR_BUCKET_IS_EOS(b) ) {
872 if ( ctxt->parser != NULL ) {
873 consume_buffer(ctxt, buf, 0, 1);
874 }
875 APR_BRIGADE_INSERT_TAIL(ctxt->bb,
876 apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
877 ap_pass_brigade(ctxt->f->next, ctxt->bb) ;
878 } else if ( APR_BUCKET_IS_FLUSH(b) ) {
879 /* pass on flush, except at start where it would cause
880 * headers to be sent before doc sniffing
881 */
882 if ( ctxt->parser != NULL ) {
883 ap_fflush(ctxt->f->next, ctxt->bb) ;
884 }
885 }
886 } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
887 == APR_SUCCESS ) {
888 if ( ctxt->parser == NULL ) {
889 const char* cenc;
890 if (!xml2enc_charset ||
891 (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) {
892 if (!xml2enc_charset)
893 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
894 "No i18n support found. Install mod_xml2enc if required") ;
895 enc = XML_CHAR_ENCODING_NONE;
896 ap_set_content_type(f->r, "text/html;charset=utf-8") ;
897 } else {
898 /* if we wanted a non-default charset_out, insert the
899 * xml2enc filter now that we've sniffed it
900 */
901 if (ctxt->cfg->charset_out && xml2enc_filter) {
902 if (*ctxt->cfg->charset_out != '*')
903 cenc = ctxt->cfg->charset_out;
904 xml2enc_filter(f->r, cenc, ENCIO_OUTPUT);
905 ap_set_content_type(f->r,
906 apr_pstrcat(f->r->pool, "text/html;charset=", cenc, NULL)) ;
907 } else /* Normal case, everything worked, utf-8 output */
908 ap_set_content_type(f->r, "text/html;charset=utf-8") ;
909 }
910
911 ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
912 ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, 0, enc) ;
913 buf += 4;
914 bytes -= 4;
915 if (ctxt->parser == NULL) {
916 apr_status_t rv = ap_pass_brigade(f->next, bb) ;
917 ap_remove_output_filter(f) ;
918 return rv;
919 }
920 apr_pool_cleanup_register(f->r->pool, ctxt->parser,
921 (int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null) ;
922 #ifndef USE_OLD_LIBXML2
923 if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
924 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
925 "Unsupported parser opts %x", xmlopts) ;
926 #endif
927 if ( ctxt->cfg->metafix )
928 #ifndef GO_FASTER
929 m = metafix(f->r, buf, ctxt->cfg->verbose) ;
930 #else
931 m = metafix(f->r, buf) ;
932 #endif
933 if ( m ) {
934 consume_buffer(ctxt, buf, m->start, 0) ;
935 consume_buffer(ctxt, buf+m->end, bytes-m->end, 0) ;
936 } else {
937 consume_buffer(ctxt, buf, bytes, 0) ;
938 }
939 } else {
940 consume_buffer(ctxt, buf, bytes, 0) ;
941 }
942 } else {
943 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ;
944 }
945 }
946 /*ap_fflush(ctxt->f->next, ctxt->bb) ; // uncomment for debug */
947 apr_brigade_cleanup(bb) ;
948 return APR_SUCCESS ;
949 }
950
951 static void* proxy_html_config(apr_pool_t* pool, char* x) {
952 proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ;
953 ret->doctype = DEFAULT_DOCTYPE ;
954 ret->etag = DEFAULT_ETAG ;
955 ret->bufsz = 8192 ;
956 /* ret->interp = 1; */
957 /* don't initialise links and events until they get set/used */
958 return ret ;
959 }
960 static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
961 proxy_html_conf* base = (proxy_html_conf*) BASE ;
962 proxy_html_conf* add = (proxy_html_conf*) ADD ;
963 proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ;
964
965 /* don't merge declarations - just use the most specific */
966 conf->links = (add->links == NULL) ? base->links : add->links;
967 conf->events = (add->events == NULL) ? base->events : add->events;
968
969 conf->charset_out = (add->charset_out == NULL)
970 ? base->charset_out : add->charset_out ;
971
972 if ( add->map && base->map ) {
973 urlmap* a ;
974 conf->map = NULL ;
975 for ( a = base->map ; a ; a = a->next ) {
976 urlmap* save = conf->map ;
977 conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
978 conf->map->next = save ;
979 }
980 for ( a = add->map ; a ; a = a->next ) {
981 urlmap* save = conf->map ;
982 conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
983 conf->map->next = save ;
984 }
985 } else
986 conf->map = add->map ? add->map : base->map ;
987
988 conf->doctype = ( add->doctype == DEFAULT_DOCTYPE )
989 ? base->doctype : add->doctype ;
990 conf->etag = ( add->etag == DEFAULT_ETAG ) ? base->etag : add->etag ;
991 conf->bufsz = add->bufsz ;
992 if ( add->flags & NORM_RESET ) {
993 conf->flags = add->flags ^ NORM_RESET ;
994 conf->metafix = add->metafix ;
995 conf->extfix = add->extfix ;
996 conf->interp = add->interp ;
997 conf->strip_comments = add->strip_comments ;
998 conf->enabled = add->enabled;
999 #ifndef GO_FASTER
1000 conf->verbose = add->verbose ;
1001 #endif
1002 } else {
1003 conf->flags = base->flags | add->flags ;
1004 conf->metafix = base->metafix | add->metafix ;
1005 conf->extfix = base->extfix | add->extfix ;
1006 conf->interp = base->interp | add->interp ;
1007 conf->strip_comments = base->strip_comments | add->strip_comments ;
1008 conf->enabled = add->enabled | base->enabled;
1009 #ifndef GO_FASTER
1010 conf->verbose = base->verbose | add->verbose ;
1011 #endif
1012 }
1013 return conf ;
1014 }
1015 #define REGFLAG(n,s,c) ( (s&&(ap_strchr_c((s),(c))!=NULL)) ? (n) : 0 )
1016 #define XREGFLAG(n,s,c) ( (!s||(ap_strchr_c((s),(c))==NULL)) ? (n) : 0 )
1017 static void comp_urlmap(apr_pool_t* pool, urlmap* newmap,
1018 const char* from, const char* to, const char* flags, const char* cond) {
1019 char* eq;
1020 newmap->flags
1021 = XREGFLAG(M_HTML,flags,'h')
1022 | XREGFLAG(M_EVENTS,flags,'e')
1023 | XREGFLAG(M_CDATA,flags,'c')
1024 | REGFLAG(M_ATSTART,flags,'^')
1025 | REGFLAG(M_ATEND,flags,'$')
1026 | REGFLAG(M_REGEX,flags,'R')
1027 | REGFLAG(M_LAST,flags,'L')
1028 | REGFLAG(M_NOTLAST,flags,'l')
1029 | REGFLAG(M_INTERPOLATE_TO,flags,'V')
1030 | REGFLAG(M_INTERPOLATE_FROM,flags,'v')
1031 ;
1032 if ( ( newmap->flags & M_INTERPOLATE_FROM)
1033 || ! (newmap->flags & M_REGEX) ) {
1034 newmap->from.c = from ;
1035 newmap->to = to ;
1036 } else {
1037 newmap->regflags
1038 = REGFLAG(AP_REG_EXTENDED,flags,'x')
1039 | REGFLAG(AP_REG_ICASE,flags,'i')
1040 | REGFLAG(AP_REG_NOSUB,flags,'n')
1041 | REGFLAG(AP_REG_NEWLINE,flags,'s')
1042 ;
1043 newmap->from.r = ap_pregcomp(pool, from, newmap->regflags) ;
1044 newmap->to = to ;
1045 }
1046 if (cond != NULL) {
1047 char* cond_copy;
1048 newmap->cond = apr_pcalloc(pool, sizeof(rewritecond));
1049 if (cond[0] == '!') {
1050 newmap->cond->rel = -1;
1051 newmap->cond->env = cond_copy = apr_pstrdup(pool, cond+1);
1052 } else {
1053 newmap->cond->rel = 1;
1054 newmap->cond->env = cond_copy = apr_pstrdup(pool, cond);
1055 }
1056 eq = ap_strchr(++cond_copy, '=');
1057 if (eq) {
1058 *eq = 0;
1059 newmap->cond->val = eq+1;
1060 }
1061 } else {
1062 newmap->cond = NULL;
1063 }
1064 }
1065 static const char* set_urlmap(cmd_parms* cmd, void* CFG, const char* args) {
1066 proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
1067 urlmap* map ;
1068 apr_pool_t* pool = cmd->pool;
1069 urlmap* newmap ;
1070 const char* usage =
1071 "Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]";
1072 const char* from;
1073 const char* to;
1074 const char* flags;
1075 const char* cond = NULL;
1076
1077 if (from = ap_getword_conf(cmd->pool, &args), !from)
1078 return usage;
1079 if (to = ap_getword_conf(cmd->pool, &args), !to)
1080 return usage;
1081 flags = ap_getword_conf(cmd->pool, &args);
1082 if (flags && *flags)
1083 cond = ap_getword_conf(cmd->pool, &args);
1084 if (cond && !*cond)
1085 cond = NULL;
1086
1087 /* the args look OK, so let's use them */
1088 newmap = apr_palloc(pool, sizeof(urlmap) ) ;
1089 newmap->next = NULL;
1090 if ( cfg->map ) {
1091 for ( map = cfg->map ; map->next ; map = map->next ) ;
1092 map->next = newmap ;
1093 } else
1094 cfg->map = newmap ;
1095
1096 comp_urlmap(cmd->pool, newmap, from, to, flags, cond);
1097 return NULL;
1098 }
1099
1100 static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t,
1101 const char* l) {
1102 proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
1103 if ( !strcasecmp(t, "xhtml") ) {
1104 cfg->etag = xhtml_etag ;
1105 if ( l && !strcasecmp(l, "legacy") )
1106 cfg->doctype = fpi_xhtml_legacy ;
1107 else
1108 cfg->doctype = fpi_xhtml ;
1109 } else if ( !strcasecmp(t, "html") ) {
1110 cfg->etag = html_etag ;
1111 if ( l && !strcasecmp(l, "legacy") )
1112 cfg->doctype = fpi_html_legacy ;
1113 else
1114 cfg->doctype = fpi_html ;
1115 } else {
1116 cfg->doctype = apr_pstrdup(cmd->pool, t) ;
1117 if ( l && ( ( l[0] == 'x' ) || ( l[0] == 'X' ) ) )
1118 cfg->etag = xhtml_etag ;
1119 else
1120 cfg->etag = html_etag ;
1121 }
1122 return NULL ;
1123 }
1124 static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg) {
1125 proxy_html_conf* cfg = CFG;
1126 if ( arg && *arg ) {
1127 if ( !strcmp(arg, "lowercase") )
1128 cfg->flags |= NORM_LC ;
1129 else if ( !strcmp(arg, "dospath") )
1130 cfg->flags |= NORM_MSSLASH ;
1131 else if ( !strcmp(arg, "reset") )
1132 cfg->flags |= NORM_RESET ;
1133 }
1134 return NULL ;
1135 }
1136 static const char* set_events(cmd_parms* cmd, void* CFG, const char* arg) {
1137 tattr* attr;
1138 proxy_html_conf* cfg = CFG;
1139 if (cfg->events == NULL)
1140 cfg->events = apr_array_make(cmd->pool, 20, sizeof(tattr));
1141 attr = apr_array_push(cfg->events) ;
1142 attr->val = arg;
1143 return NULL ;
1144 }
1145 static const char* set_links(cmd_parms* cmd, void* CFG,
1146 const char* elt, const char* att) {
1147 apr_array_header_t* attrs;
1148 tattr* attr ;
1149 proxy_html_conf* cfg = CFG;
1150
1151 if (cfg->links == NULL)
1152 cfg->links = apr_hash_make(cmd->pool);
1153
1154 attrs = apr_hash_get(cfg->links, elt, APR_HASH_KEY_STRING) ;
1155 if (!attrs) {
1156 attrs = apr_array_make(cmd->pool, 2, sizeof(tattr*)) ;
1157 apr_hash_set(cfg->links, elt, APR_HASH_KEY_STRING, attrs) ;
1158 }
1159 attr = apr_array_push(attrs) ;
1160 attr->val = att ;
1161 return NULL ;
1162 }
1163 static const command_rec proxy_html_cmds[] = {
1164 AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL,
1165 RSRC_CONF|ACCESS_CONF, "Strings to be treated as scripting events"),
1166 AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL,
1167 RSRC_CONF|ACCESS_CONF, "Declare HTML Attributes"),
1168 AP_INIT_RAW_ARGS("ProxyHTMLURLMap", set_urlmap, NULL,
1169 RSRC_CONF|ACCESS_CONF, "Map URL From To" ) ,
1170 AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL,
1171 RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]" ) ,
1172 AP_INIT_ITERATE("ProxyHTMLFixups", set_flags, NULL,
1173 RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath" ) ,
1174 AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot,
1175 (void*)APR_OFFSETOF(proxy_html_conf, metafix),
1176 RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements" ) ,
1177 AP_INIT_FLAG("ProxyHTMLInterp", ap_set_flag_slot,
1178 (void*)APR_OFFSETOF(proxy_html_conf, interp),
1179 RSRC_CONF|ACCESS_CONF,
1180 "Support interpolation and conditions in URLMaps" ) ,
1181 AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot,
1182 (void*)APR_OFFSETOF(proxy_html_conf, extfix),
1183 RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS" ) ,
1184 AP_INIT_FLAG("ProxyHTMLStripComments", ap_set_flag_slot,
1185 (void*)APR_OFFSETOF(proxy_html_conf, strip_comments),
1186 RSRC_CONF|ACCESS_CONF, "Strip out comments" ) ,
1187 #ifndef GO_FASTER
1188 AP_INIT_FLAG("ProxyHTMLLogVerbose", ap_set_flag_slot,
1189 (void*)APR_OFFSETOF(proxy_html_conf, verbose),
1190 RSRC_CONF|ACCESS_CONF, "Verbose Logging (use with LogLevel Info)" ) ,
1191 #endif
1192 AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot,
1193 (void*)APR_OFFSETOF(proxy_html_conf, bufsz),
1194 RSRC_CONF|ACCESS_CONF, "Buffer size" ) ,
1195 AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot,
1196 (void*)APR_OFFSETOF(proxy_html_conf, charset_out),
1197 RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset" ) ,
1198 AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot,
1199 (void*)APR_OFFSETOF(proxy_html_conf, enabled),
1200 RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters" ) ,
1201 { NULL }
1202 } ;
1203 static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2,
1204 server_rec* s) {
1205 ap_add_version_component(p, VERSION_STRING) ;
1206 seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>",
1207 AP_REG_EXTENDED|AP_REG_ICASE) ;
1208 seek_content = apr_strmatch_precompile(p, "content", 0);
1209 memset(&sax, 0, sizeof(htmlSAXHandler));
1210 sax.startElement = pstartElement ;
1211 sax.endElement = pendElement ;
1212 sax.characters = pcharacters ;
1213 sax.comment = pcomment ;
1214 sax.cdataBlock = pcdata ;
1215 xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset);
1216 xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter);
1217 if (!xml2enc_charset) {
1218 ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2,
1219 "I18n support in mod_proxy_html requires mod_xml2enc. "
1220 "Without it, non-ASCII characters in proxied pages are "
1221 "likely to display incorrectly.");
1222 }
1223 return OK ;
1224 }
1225 static void proxy_html_insert(request_rec* r) {
1226 proxy_html_conf* cfg
1227 = ap_get_module_config(r->per_dir_config, &proxy_html_module);
1228 if (cfg->enabled) {
1229 if (xml2enc_filter)
1230 xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS);
1231 ap_add_output_filter("proxy-html", NULL, r, r->connection);
1232 }
1233 }
1234 static void proxy_html_hooks(apr_pool_t* p) {
1235 static const char* aszSucc[] = { "mod_filter.c", NULL };
1236 ap_register_output_filter_protocol("proxy-html", proxy_html_filter,
1237 NULL, AP_FTYPE_RESOURCE,
1238 AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH) ;
1239 ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ;
1240 ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE) ;
1241 }
1242 module AP_MODULE_DECLARE_DATA proxy_html_module = {
1243 STANDARD20_MODULE_STUFF,
1244 proxy_html_config,
1245 proxy_html_merge,
1246 NULL,
1247 NULL,
1248 proxy_html_cmds,
1249 proxy_html_hooks
1250 } ;