"Fossies" - the Fresh Open Source Software Archive 
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /********************************************************************
2 Copyright (c) 2007-8, WebThing Ltd
3 Author: Nick Kew <nick@webthing.com>
4
5 * This work is available to you under EITHER the Apache License Version 2.0
6 * OR the GNU General Poblic License Version 2. It is your choice which
7 * of these licenses you accept, but if you wish to copy or use this
8 * work, you MUST accept one of these licenses and abide by its terms.
9 *
10 *
11 *
12 * OPTION 1: Apache License
13 * WebThing licenses this file to You under the Apache License, Version 2.0
14 * (the "License"); you may not use this file except in compliance with
15 * the License. You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 *
25 *
26 *
27 * OPTION 2: GNU General Public License
28 * This program is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License Version 2,
30 * as published by the Free Software Foundation.
31 *
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 *
37 * You can obtain a copy of the GNU General Poblic License Version 2
38 * from http://www.gnu.org/licenses/old-licenses/gpl-2.0.html or
39 * http://apache.webthing.com/COPYING.txt
40
41 **********************************************************************/
42
43 /* Version 1.0.3 - Bugfix against crash on no-content-type response
44 * reaching the filter function
45 */
46
47 #if defined(WIN32)
48 #define XML2ENC_DECLARE_EXPORT
49 #endif
50
51 #include <ctype.h>
52
53 /* libxml2 */
54 #include <libxml/encoding.h>
55
56 /* apache */
57 #include <http_protocol.h>
58 #include <http_config.h>
59 #include <http_log.h>
60 #include <apr_strings.h>
61 #include <apr_xlate.h>
62
63 #include <apr_optional.h>
64 #include "mod_xml2enc.h"
65
66 /* Apache 2.0 isn't really supported, but "should work" with these #defines. */
67 #ifndef AP_REG_ICASE
68 /* it's 2.0, so we #define the ap_ versions */
69 #define ap_regex_t regex_t
70 #define ap_regmatch_t regmatch_t
71 #define AP_REG_EXTENDED REG_EXTENDED
72 #define AP_REG_ICASE REG_ICASE
73 #define AP_REG_NOSUB REG_NOSUB
74 #define AP_REG_NEWLINE REG_NEWLINE
75 #define APACHE20
76 #define ap_register_output_filter_protocol(a,b,c,d,e) ap_register_output_filter(a,b,c,d)
77 #else
78 #define APACHE22
79 #endif
80
81 module AP_MODULE_DECLARE_DATA xml2enc_module;
82
83 #define BUFLEN 8192
84 #define BUF_MIN 4096
85 #define APR_BRIGADE_DO(b,bb) for (b = APR_BRIGADE_FIRST(bb); \
86 b != APR_BRIGADE_SENTINEL(bb); b = APR_BUCKET_NEXT(b))
87
88 #define ENC_INITIALISED 0x100
89 #define ENC_SEEN_EOS 0x200
90 #define ENC_SKIPTO ENCIO_SKIPTO
91
92 #define HAVE_ENCODING(enc) \
93 (((enc)!=XML_CHAR_ENCODING_NONE)&&((enc)!=XML_CHAR_ENCODING_ERROR))
94
95 typedef struct {
96 xmlCharEncoding xml2enc;
97 char* buf;
98 apr_size_t bytes;
99 apr_xlate_t* convset;
100 unsigned int flags;
101 apr_off_t bblen;
102 apr_bucket_brigade* bbnext;
103 apr_bucket_brigade* bbsave;
104 const char* encoding;
105 } xml2ctx;
106
107 typedef struct {
108 const char* default_charset;
109 xmlCharEncoding default_encoding;
110 apr_array_header_t* skipto;
111 } xml2cfg;
112
113 typedef struct {
114 const char* val;
115 } tattr;
116
117 static ap_regex_t* seek_meta_ctype;
118 static ap_regex_t* seek_charset;
119
120 static apr_status_t xml2enc_filter(request_rec* r, const char* enc,
121 unsigned int mode) {
122 /* set up a ready-initialised ctx to convert to enc, and insert filter */
123 apr_xlate_t* convset;
124 apr_status_t rv;
125 unsigned int flags = (mode ^ ENCIO);
126 if ((mode & ENCIO) == ENCIO_OUTPUT) {
127 rv = apr_xlate_open(&convset, enc, "UTF-8", r->pool);
128 flags |= ENC_INITIALISED;
129 } else if ((mode & ENCIO) == ENCIO_INPUT) {
130 rv = apr_xlate_open(&convset, "UTF-8", enc, r->pool);
131 flags |= ENC_INITIALISED;
132 } else if ((mode & ENCIO) == ENCIO_INPUT_CHECKS) {
133 convset = NULL;
134 rv = APR_SUCCESS; /* we'll initialise later by sniffing */
135 } else {
136 rv = APR_EGENERAL;
137 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "xml2enc: bad mode %x", mode);
138 }
139 if (rv == APR_SUCCESS) {
140 xml2ctx* ctx = apr_pcalloc(r->pool, sizeof(xml2ctx));
141 ctx->flags = flags;
142 if (flags & ENC_INITIALISED) {
143 ctx->convset = convset;
144 ctx->bblen = BUFLEN;
145 ctx->buf = apr_palloc(r->pool, (apr_size_t)ctx->bblen);
146 }
147 ap_add_output_filter("xml2enc", ctx, r, r->connection);
148 } else {
149 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
150 "xml2enc: Charset %s not supported.", enc) ;
151 }
152 return rv;
153 }
154
155 /* This needs to operate only when we're using htmlParser */
156 /* Different modules may apply different rules here. Ho, hum. */
157 static void fix_skipto(request_rec* r, xml2ctx* ctx) {
158 apr_status_t rv;
159 xml2cfg* cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
160 if ((cfg->skipto != NULL) && (ctx->flags | ENC_SKIPTO)) {
161 int found = 0;
162 char* p = ap_strchr(ctx->buf, '<');
163 tattr* starts = (tattr*) cfg->skipto->elts;
164 while (!found && p && *p) {
165 int i;
166 for (i = 0; i < cfg->skipto->nelts; ++i) {
167 if (!strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) {
168 /* found a starting element. Strip all that comes before. */
169 apr_bucket* b;
170 apr_bucket* bstart;
171 rv = apr_brigade_partition(ctx->bbsave, (p-ctx->buf), &bstart);
172 while (b = APR_BRIGADE_FIRST(ctx->bbsave), b != bstart) {
173 APR_BUCKET_REMOVE(b);
174 apr_bucket_destroy(b);
175 }
176 ctx->bytes -= (p-ctx->buf);
177 ctx->buf = p ;
178 found = 1;
179 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
180 "Skipped to first <%s> element", starts[i].val) ;
181 break;
182 }
183 }
184 p = ap_strchr(p+1, '<');
185 }
186 if (p == NULL) {
187 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
188 "Failed to find start of recognised HTML!") ;
189 }
190 }
191 }
192 static void sniff_encoding(request_rec* r, xml2ctx* ctx) {
193 xml2cfg* cfg = NULL; /* initialise to shut compiler warnings up */
194 char* p ;
195 apr_bucket* cutb;
196 apr_bucket* cute;
197 apr_bucket* b;
198 ap_regmatch_t match[2] ;
199 apr_status_t rv;
200 const char* ctype = r->content_type;
201
202 if (ctype) {
203 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Content-Type is %s", ctype) ;
204
205 /* If we've got it in the HTTP headers, there's nothing to do */
206 if (ctype && (p = ap_strcasestr(ctype, "charset=") , p != NULL)) {
207 p += 8 ;
208 if (ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ), ctx->encoding) {
209 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
210 "Got charset %s from HTTP headers", ctx->encoding) ;
211 ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
212 }
213 }
214 }
215
216 /* to sniff, first we look for BOM */
217 if (ctx->xml2enc == XML_CHAR_ENCODING_NONE) {
218 ctx->xml2enc = xmlDetectCharEncoding((const xmlChar*)ctx->buf, ctx->bytes);
219 if (HAVE_ENCODING(ctx->xml2enc)) {
220 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
221 "Got charset from XML rules.") ;
222 ctx->encoding = xmlGetCharEncodingName(ctx->xml2enc);
223 }
224 }
225
226 /* If none of the above, look for a META-thingey */
227 /* also we're probably about to invalidate it, so we remove it. */
228 if ( ap_regexec(seek_meta_ctype, ctx->buf, 1, match, 0) == 0 ) {
229 /* get markers on the start and end of the match */
230 rv = apr_brigade_partition(ctx->bbsave, match[0].rm_eo, &cute);
231 rv = apr_brigade_partition(ctx->bbsave, match[0].rm_so, &cutb);
232 /* now set length of useful buf for start-of-data hooks */
233 ctx->bytes = match[0].rm_so;
234 if (ctx->encoding == NULL) {
235 p = apr_pstrndup(r->pool, ctx->buf + match[0].rm_so,
236 match[0].rm_eo - match[0].rm_so) ;
237 if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 ) {
238 if (ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
239 match[1].rm_eo - match[1].rm_so), ctx->encoding) {
240 ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
241 if (HAVE_ENCODING(ctx->xml2enc))
242 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
243 "Got charset %s from HTML META", ctx->encoding) ;
244 }
245 }
246 }
247
248 /* cut out the <meta> we're invalidating */
249 while (cutb != cute) {
250 b = APR_BUCKET_NEXT(cutb);
251 APR_BUCKET_REMOVE(cutb);
252 apr_bucket_destroy(cutb);
253 cutb = b;
254 }
255 /* and leave a string */
256 ctx->buf[ctx->bytes] = 0;
257 }
258
259 /* either it's set to something we found or it's still the default */
260 /* Aaargh! libxml2 has undocumented <META-crap> support. So this fails
261 * if metafix is not active. Have to make it conditional.
262 *
263 * No, that means no-metafix breaks things. Deal immediately with
264 * this particular instance of metafix.
265 */
266 if (!HAVE_ENCODING(ctx->xml2enc)) {
267 cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
268 if (!ctx->encoding) {
269 ctx->encoding = cfg->default_charset?cfg->default_charset:"ISO-8859-1";
270 }
271 /* Unsupported charset. Can we get (iconv) support through apr_xlate? */
272 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
273 "Charset %s not supported by libxml2; trying apr_xlate", ctx->encoding);
274 if (apr_xlate_open(&ctx->convset, "UTF-8", ctx->encoding, r->pool) == APR_SUCCESS) {
275 ctx->xml2enc = XML_CHAR_ENCODING_UTF8 ;
276 } else {
277 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
278 "Charset %s not supported. Consider aliasing it?", ctx->encoding) ;
279 }
280 }
281
282 if (!HAVE_ENCODING(ctx->xml2enc)) {
283 /* Use configuration default as a last resort */
284 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
285 "No usable charset information; using configuration default") ;
286 ctx->xml2enc = (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
287 ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
288 }
289 if (ctype && ctx->encoding) {
290 if (ap_regexec(seek_charset, ctype, 2, match, 0)) {
291 r->content_type = apr_pstrcat(r->pool, ctype, ";charset=utf-8", NULL);
292 } else {
293 char* str = apr_palloc(r->pool, strlen(r->content_type)
294 + 13 - (match[0].rm_eo - match[0].rm_so) + 1);
295 memcpy(str, r->content_type, match[1].rm_so);
296 //memcpy(str + match[1].rm_so, "charset=utf-8", 5);
297 memcpy(str + match[1].rm_so, "utf-8", 5);
298 strcpy(str + match[1].rm_so + 5, r->content_type+match[1].rm_eo);
299 r->content_type = str;
300 }
301 }
302 }
303
304 static apr_status_t xml2enc_filter_init(ap_filter_t* f) {
305 xml2ctx* ctx;
306 if (!f->ctx) {
307 xml2cfg* cfg = ap_get_module_config(f->r->per_dir_config, &xml2enc_module);
308 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(xml2ctx));
309 ctx->xml2enc = XML_CHAR_ENCODING_NONE;
310 if (cfg->skipto != NULL) {
311 ctx->flags |= ENC_SKIPTO;
312 }
313 }
314 return APR_SUCCESS;
315 }
316 static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb) {
317 xml2ctx* ctx = f->ctx;
318 apr_status_t rv;
319 apr_bucket* b;
320 apr_bucket* bstart;
321 apr_size_t insz = 0;
322 char *ctype;
323 char *p;
324
325 if (!ctx || !f->r->content_type) {
326 /* log error about configuring this */
327 ap_remove_output_filter(f);
328 return ap_pass_brigade(f->next, bb) ;
329 }
330
331 ctype = apr_pstrdup(f->r->pool, f->r->content_type);
332 for (p = ctype; *p; ++p)
333 if (isupper(*p))
334 *p = tolower(*p);
335
336 /* only act if starts-with "text/" or contains "xml" */
337 if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml")) {
338 ap_remove_output_filter(f);
339 return ap_pass_brigade(f->next, bb) ;
340 }
341
342 if (ctx->bbsave == NULL) {
343 ctx->bbsave = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc);
344 }
345 /* append to any data left over from last time */
346 APR_BRIGADE_CONCAT(ctx->bbsave, bb);
347
348 if (!(ctx->flags & ENC_INITIALISED)) {
349 /* some kind of initialisation required */
350 /* Turn all this off when post-processing */
351
352 /* if we don't have enough data to sniff but more's to come, wait for it */
353 rv = apr_brigade_length(ctx->bbsave, 0, &ctx->bblen);
354 if ((ctx->bblen < BUF_MIN) && (ctx->bblen != -1)) {
355 APR_BRIGADE_DO(b, ctx->bbsave) {
356 if (APR_BUCKET_IS_EOS(b)) {
357 ctx->flags |= ENC_SEEN_EOS;
358 break;
359 }
360 }
361 if (!(ctx->flags & ENC_SEEN_EOS)) {
362 /* not enough data to sniff. Wait for more */
363 APR_BRIGADE_DO(b, ctx->bbsave) {
364 apr_bucket_setaside(b, f->r->pool);
365 }
366 return APR_SUCCESS;
367 }
368 }
369 if (ctx->bblen == -1) {
370 ctx->bblen = BUFLEN-1;
371 }
372 /* flatten it into a NULL-terminated string */
373 ctx->buf = apr_palloc(f->r->pool, (apr_size_t)(ctx->bblen+1));
374 ctx->bytes = (apr_size_t)ctx->bblen;
375 rv = apr_brigade_flatten(ctx->bbsave, ctx->buf, &ctx->bytes);
376 ctx->buf[ctx->bytes] = 0;
377 sniff_encoding(f->r, ctx);
378 /* FIXME: hook here for rewriting start-of-data? */
379 /* nah, we only have one action here - call it inline */
380 fix_skipto(f->r, ctx);
381
382 /* consume the data we just sniffed */
383 /* we need to omit any <meta> we just invalidated */
384 ctx->flags |= ENC_INITIALISED;
385 ap_set_module_config(f->r->request_config, &xml2enc_module, ctx);
386 }
387 if (ctx->bbnext == NULL) {
388 ctx->bbnext = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc);
389 }
390
391 if (!ctx->convset) {
392 rv = ap_pass_brigade(f->next, ctx->bbsave);
393 apr_brigade_cleanup(ctx->bbsave);
394 ap_remove_output_filter(f);
395 return rv;
396 }
397 /* move the data back to bb */
398 APR_BRIGADE_CONCAT(bb, ctx->bbsave);
399
400 while (b = APR_BRIGADE_FIRST(bb), b != APR_BRIGADE_SENTINEL(bb)) {
401 ctx->bytes = 0;
402 if (APR_BUCKET_IS_METADATA(b)) {
403 if (APR_BUCKET_IS_EOS(b)) {
404 /* send remaining data */
405 return ap_fflush(f->next, ctx->bbnext);
406 } else if (APR_BUCKET_IS_FLUSH(b)) {
407 ap_fflush(f->next, ctx->bbnext);
408 }
409 APR_BUCKET_REMOVE(b);
410 apr_bucket_destroy(b);
411 } else { /* data bucket */
412 char* buf;
413 apr_size_t bytes = 0;
414 char fixbuf[BUFLEN];
415 apr_bucket* bdestroy = NULL;
416 if (insz > 0) { /* we have dangling data. Flatten it. */
417 buf = fixbuf;
418 bytes = BUFLEN;
419 rv = apr_brigade_flatten(bb, buf, &bytes);
420 if (bytes == insz) {
421 /* this is only what we've already tried to convert.
422 * The brigade is exhausted.
423 * Save remaining data for next time round
424 */
425
426 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
427 "xml2enc: Setting aside %" APR_SIZE_T_FMT
428 " unconverted bytes", bytes);
429 rv = ap_fflush(f->next, ctx->bbnext);
430 APR_BRIGADE_CONCAT(ctx->bbsave, bb);
431 APR_BRIGADE_DO(b, ctx->bbsave) {
432 apr_bucket_setaside(b, f->r->pool);
433 }
434 return rv;
435 }
436 /* remove the data we've just read */
437 rv = apr_brigade_partition(bb, bytes, &bstart);
438 while (b = APR_BRIGADE_FIRST(bb), b != bstart) {
439 APR_BUCKET_REMOVE(b);
440 apr_bucket_destroy(b);
441 }
442 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xml2enc: consuming %"
443 APR_SIZE_T_FMT " bytes flattened", bytes);
444 }
445 else {
446 rv = apr_bucket_read(b, (const char**)&buf, &bytes, APR_BLOCK_READ);
447 APR_BUCKET_REMOVE(b);
448 bdestroy = b; /* can't destroy until we've finished with the data */
449 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xml2enc: consuming %"
450 APR_SIZE_T_FMT " bytes from bucket", bytes);
451 }
452 /* OK, we've got some input we can use in [buf,bytes] */
453 if (rv == APR_SUCCESS) {
454 apr_size_t consumed;
455 xml2enc_run_preprocess(f, &buf, &bytes);
456 consumed = insz = bytes;
457 while (insz > 0) {
458 if (ctx->bytes == ctx->bblen) {
459 /* nothing was converted last time!
460 * break out of this loop!
461 */
462 b = apr_bucket_transient_create(buf+(bytes - insz), insz,
463 bb->bucket_alloc);
464 APR_BRIGADE_INSERT_HEAD(bb, b);
465 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
466 "xml2enc: reinserting %" APR_SIZE_T_FMT
467 " unconsumed bytes from bucket", insz);
468 break;
469 }
470 ctx->bytes = (apr_size_t)ctx->bblen;
471 rv = apr_xlate_conv_buffer(ctx->convset, buf+(bytes - insz), &insz,
472 ctx->buf, &ctx->bytes);
473 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r,
474 "xml2enc: converted %" APR_SIZE_T_FMT "/%" APR_OFF_T_FMT " bytes",
475 consumed - insz, ctx->bblen - ctx->bytes);
476 #if DEBUG_XML2ENC
477 /* never use this in the wild */
478 {
479 static int serial = 0;
480 const char* fname ;
481 apr_file_t* file ;
482 fname = apr_psprintf(f->r->pool, "/tmp/%d-xml2enc.%d", rv, serial++);
483 apr_file_open(&file, fname, APR_WRITE|APR_TRUNCATE|APR_CREATE,
484 APR_FPROT_OS_DEFAULT, f->r->pool);
485 apr_file_write(file, buf+(bytes-consumed), &consumed);
486 apr_file_close(file);
487 }
488 #endif
489 consumed = insz;
490 ap_fwrite(f->next, ctx->bbnext, ctx->buf, (apr_size_t)ctx->bblen - ctx->bytes);
491 switch (rv) {
492 case APR_SUCCESS:
493 continue;
494 case APR_EINCOMPLETE:
495 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "INCOMPLETE");
496 continue; /* If outbuf was too small, go round again.
497 * If it was inbuf, we'll break out when we test
498 * ctx->bytes == ctx->bblen
499 */
500 case APR_EINVAL: /* try skipping one bad byte */
501 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r,
502 "Skipping invalid byte(s) in input stream!");
503 --insz;
504 continue;
505 default:
506 /* Erk! What's this?
507 * Bail out, flush, and hope to eat the buf raw
508 */
509 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
510 "Failed to convert input; trying it raw") ;
511 ctx->convset = NULL;
512 ap_fflush(f->next, ctx->bbnext);
513 return ap_pass_brigade(f->next, ctx->bbnext);
514 }
515 }
516 } else {
517 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
518 "xml2enc: error reading data") ;
519 }
520 if (bdestroy) {
521 apr_bucket_destroy(bdestroy);
522 }
523 }
524 }
525 return APR_SUCCESS;
526 }
527 static apr_status_t xml2enc_charset(request_rec* r, xmlCharEncoding* encp,
528 const char** encoding) {
529 xml2ctx* ctx = ap_get_module_config(r->request_config, &xml2enc_module);
530 if (!ctx || !(ctx->flags & ENC_INITIALISED)) {
531 return APR_EAGAIN;
532 }
533 *encp = ctx->xml2enc;
534 *encoding = ctx->encoding;
535 return HAVE_ENCODING(ctx->xml2enc) ? APR_SUCCESS : APR_EGENERAL;
536 }
537 #define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
538 static void xml2enc_hooks(apr_pool_t* pool) {
539 ap_register_output_filter_protocol("xml2enc", xml2enc_ffunc,
540 xml2enc_filter_init, AP_FTYPE_RESOURCE, PROTO_FLAGS);
541 APR_REGISTER_OPTIONAL_FN(xml2enc_filter);
542 APR_REGISTER_OPTIONAL_FN(xml2enc_charset);
543 seek_meta_ctype = ap_pregcomp(pool,
544 "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
545 AP_REG_EXTENDED|AP_REG_ICASE) ;
546 seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)",
547 AP_REG_EXTENDED|AP_REG_ICASE) ;
548 }
549 static const char* set_alias(cmd_parms* cmd, void* CFG,
550 const char* charset, const char* alias) {
551 const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY);
552 if (errmsg != NULL)
553 return errmsg ;
554 else if (xmlAddEncodingAlias(charset, alias) == 0)
555 return NULL;
556 else
557 return "Error setting charset alias";
558 }
559
560 static const char* set_default(cmd_parms* cmd, void* CFG, const char* charset) {
561 xml2cfg* cfg = CFG;
562 cfg->default_charset = charset;
563 cfg->default_encoding = xmlParseCharEncoding(charset);
564 #if 0
565 switch(cfg->default_encoding) {
566 case XML_CHAR_ENCODING_NONE:
567 return "Default charset not found";
568 case XML_CHAR_ENCODING_ERROR:
569 /*return "Invalid or unsupported default charset";*/
570 default:
571 return NULL;
572 }
573 #endif
574 return NULL;
575 }
576 static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg) {
577 tattr* attr;
578 xml2cfg* cfg = CFG;
579 if (cfg->skipto == NULL)
580 cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr));
581 attr = apr_array_push(cfg->skipto) ;
582 attr->val = arg;
583 return NULL ;
584 }
585
586 static const command_rec xml2enc_cmds[] = {
587 AP_INIT_TAKE1("xml2EncDefault", set_default, NULL, OR_ALL,
588 "Usage: xml2EncDefault charset") ,
589 AP_INIT_ITERATE2("xml2EncAlias", set_alias, NULL, RSRC_CONF,
590 "EncodingAlias charset alias [more aliases]") ,
591 AP_INIT_ITERATE("xml2StartParse", set_skipto, NULL, OR_ALL,
592 "Ignore anything in front of the first of these elements") ,
593 { NULL }
594 };
595 static void* xml2enc_config(apr_pool_t* pool, char* x) {
596 xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
597 ret->default_encoding = XML_CHAR_ENCODING_NONE ;
598 return ret;
599 }
600
601 static void* xml2enc_merge(apr_pool_t* pool, void* BASE, void* ADD) {
602 xml2cfg* base = BASE;
603 xml2cfg* add = ADD;
604 xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
605 ret->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE)
606 ? base->default_encoding : add->default_encoding ;
607 ret->default_charset = add->default_charset ? add->default_charset : base->default_charset;
608 ret->skipto = add->skipto ? add->skipto : base->skipto;
609 return ret;
610 }
611 module AP_MODULE_DECLARE_DATA xml2enc_module = {
612 STANDARD20_MODULE_STUFF,
613 xml2enc_config,
614 xml2enc_merge,
615 NULL,
616 NULL,
617 xml2enc_cmds,
618 xml2enc_hooks
619 };
620 APR_IMPLEMENT_OPTIONAL_HOOK_RUN_ALL(xml2enc, XML2ENC, int, preprocess,
621 (ap_filter_t *f, char** bufp, apr_size_t* bytesp),
622 (f, bufp, bytesp), OK, DECLINED)