"Fossies" - the Fresh Open Source Software Archive

Member "mod_proxy_html/mod_xml2enc.c" (30 Oct 2009, 22262 Bytes) of package /linux/www/apache_httpd_modules/old/mod_proxy_html.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /********************************************************************
    2         Copyright (c) 2007-8, WebThing Ltd
    3         Author: Nick Kew <nick@webthing.com>
    4 
    5  * This work is available to you under EITHER the Apache License Version 2.0
    6  * OR the GNU General Poblic License Version 2.  It is your choice which
    7  * of these licenses you accept, but if you wish to copy or use this
    8  * work, you MUST accept one of these licenses and abide by its terms.
    9  *
   10  *
   11  *
   12  * OPTION 1: Apache License
   13  * WebThing licenses this file to You under the Apache License, Version 2.0
   14  * (the "License"); you may not use this file except in compliance with
   15  * the License.  You may obtain a copy of the License at
   16  *
   17  *     http://www.apache.org/licenses/LICENSE-2.0
   18  *
   19  * Unless required by applicable law or agreed to in writing, software
   20  * distributed under the License is distributed on an "AS IS" BASIS,
   21  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   22  * See the License for the specific language governing permissions and
   23  * limitations under the License.
   24  *
   25  *
   26  *
   27  * OPTION 2: GNU General Public License
   28  * This program is free software; you can redistribute it and/or modify
   29  * it under the terms of the GNU General Public License  Version 2,
   30  * as published by the Free Software Foundation.
   31  *
   32  * This program is distributed in the hope that it will be useful,
   33  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   34  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   35  * GNU General Public License for more details.
   36  *
   37  * You can obtain a copy of the GNU General Poblic License Version 2
   38  * from http://www.gnu.org/licenses/old-licenses/gpl-2.0.html or
   39  * http://apache.webthing.com/COPYING.txt
   40 
   41 **********************************************************************/
   42 
   43 /* Version 1.0.3 - Bugfix against crash on no-content-type response
   44  *                 reaching the filter function
   45  */
   46 
   47 #if defined(WIN32)
   48 #define XML2ENC_DECLARE_EXPORT
   49 #endif
   50 
   51 #include <ctype.h>
   52 
   53 /* libxml2 */
   54 #include <libxml/encoding.h>
   55 
   56 /* apache */
   57 #include <http_protocol.h>
   58 #include <http_config.h>
   59 #include <http_log.h>
   60 #include <apr_strings.h>
   61 #include <apr_xlate.h>
   62 
   63 #include <apr_optional.h>
   64 #include "mod_xml2enc.h"
   65 
   66 /* Apache 2.0 isn't really supported, but "should work" with these #defines. */
   67 #ifndef AP_REG_ICASE
   68 /* it's 2.0, so we #define the ap_ versions */
   69 #define ap_regex_t regex_t
   70 #define ap_regmatch_t regmatch_t
   71 #define AP_REG_EXTENDED REG_EXTENDED
   72 #define AP_REG_ICASE REG_ICASE
   73 #define AP_REG_NOSUB REG_NOSUB
   74 #define AP_REG_NEWLINE REG_NEWLINE
   75 #define APACHE20
   76 #define ap_register_output_filter_protocol(a,b,c,d,e) ap_register_output_filter(a,b,c,d)
   77 #else
   78 #define APACHE22
   79 #endif
   80 
   81 module AP_MODULE_DECLARE_DATA xml2enc_module;
   82 
   83 #define BUFLEN 8192
   84 #define BUF_MIN 4096
   85 #define APR_BRIGADE_DO(b,bb) for (b = APR_BRIGADE_FIRST(bb); \
   86         b != APR_BRIGADE_SENTINEL(bb); b = APR_BUCKET_NEXT(b))
   87 
   88 #define ENC_INITIALISED 0x100
   89 #define ENC_SEEN_EOS 0x200
   90 #define ENC_SKIPTO ENCIO_SKIPTO
   91 
   92 #define HAVE_ENCODING(enc) \
   93     (((enc)!=XML_CHAR_ENCODING_NONE)&&((enc)!=XML_CHAR_ENCODING_ERROR))
   94 
   95 typedef struct {
   96   xmlCharEncoding xml2enc;
   97   char* buf;
   98   apr_size_t bytes;
   99   apr_xlate_t* convset;
  100   unsigned int flags;
  101   apr_off_t bblen;
  102   apr_bucket_brigade* bbnext;
  103   apr_bucket_brigade* bbsave;
  104   const char* encoding;
  105 } xml2ctx;
  106 
  107 typedef struct {
  108   const char* default_charset;
  109   xmlCharEncoding default_encoding;
  110   apr_array_header_t* skipto;
  111 } xml2cfg;
  112 
  113 typedef struct {
  114   const char* val;
  115 } tattr;
  116 
  117 static ap_regex_t* seek_meta_ctype;
  118 static ap_regex_t* seek_charset;
  119 
  120 static apr_status_t xml2enc_filter(request_rec* r, const char* enc,
  121     unsigned int mode) {
  122   /* set up a ready-initialised ctx to convert to enc, and insert filter */
  123   apr_xlate_t* convset; 
  124   apr_status_t rv;
  125   unsigned int flags = (mode ^ ENCIO);
  126   if ((mode & ENCIO) == ENCIO_OUTPUT) {
  127     rv = apr_xlate_open(&convset, enc, "UTF-8", r->pool);
  128     flags |= ENC_INITIALISED;
  129   } else if ((mode & ENCIO) == ENCIO_INPUT) {
  130     rv = apr_xlate_open(&convset, "UTF-8", enc, r->pool);
  131     flags |= ENC_INITIALISED;
  132   } else if ((mode & ENCIO) == ENCIO_INPUT_CHECKS) {
  133     convset = NULL;
  134     rv = APR_SUCCESS; /* we'll initialise later by sniffing */
  135   } else {
  136     rv = APR_EGENERAL;
  137     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "xml2enc: bad mode %x", mode);
  138   }
  139   if (rv == APR_SUCCESS) {
  140     xml2ctx* ctx = apr_pcalloc(r->pool, sizeof(xml2ctx));
  141     ctx->flags = flags;
  142     if (flags & ENC_INITIALISED) {
  143       ctx->convset = convset;
  144       ctx->bblen = BUFLEN;
  145       ctx->buf = apr_palloc(r->pool, (apr_size_t)ctx->bblen);
  146     }
  147     ap_add_output_filter("xml2enc", ctx, r, r->connection);
  148   } else {
  149     ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
  150       "xml2enc: Charset %s not supported.", enc) ;
  151   }
  152   return rv;
  153 }
  154 
  155 /* This needs to operate only when we're using htmlParser */
  156 /* Different modules may apply different rules here.  Ho, hum.  */
  157 static void fix_skipto(request_rec* r, xml2ctx* ctx) {
  158   apr_status_t rv;
  159   xml2cfg* cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
  160   if ((cfg->skipto != NULL) && (ctx->flags | ENC_SKIPTO)) {
  161     int found = 0;
  162     char* p = ap_strchr(ctx->buf, '<');
  163     tattr* starts = (tattr*) cfg->skipto->elts;
  164     while (!found && p && *p) {
  165       int i;
  166       for (i = 0; i < cfg->skipto->nelts; ++i) {
  167         if (!strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) {
  168           /* found a starting element.  Strip all that comes before. */
  169           apr_bucket* b;
  170           apr_bucket* bstart;
  171           rv = apr_brigade_partition(ctx->bbsave, (p-ctx->buf), &bstart);
  172           while (b = APR_BRIGADE_FIRST(ctx->bbsave), b != bstart) {
  173             APR_BUCKET_REMOVE(b);
  174             apr_bucket_destroy(b);
  175           }
  176           ctx->bytes -= (p-ctx->buf);
  177           ctx->buf = p ;
  178           found = 1;
  179           ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
  180                         "Skipped to first <%s> element", starts[i].val) ;
  181           break;
  182         }
  183       }
  184       p = ap_strchr(p+1, '<');
  185     }
  186     if (p == NULL) {
  187       ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
  188                 "Failed to find start of recognised HTML!") ;
  189     }
  190   }
  191 }
  192 static void sniff_encoding(request_rec* r, xml2ctx* ctx) {
  193   xml2cfg* cfg = NULL; /* initialise to shut compiler warnings up */
  194   char* p ;
  195   apr_bucket* cutb;
  196   apr_bucket* cute;
  197   apr_bucket* b;
  198   ap_regmatch_t match[2] ;
  199   apr_status_t rv;
  200   const char* ctype = r->content_type;
  201 
  202   if (ctype) {
  203     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Content-Type is %s", ctype) ;
  204 
  205 /* If we've got it in the HTTP headers, there's nothing to do */
  206     if (ctype && (p = ap_strcasestr(ctype, "charset=") , p != NULL)) {
  207       p += 8 ;
  208       if (ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ), ctx->encoding) {
  209         ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
  210                       "Got charset %s from HTTP headers", ctx->encoding) ;
  211         ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
  212       }
  213     }
  214   }
  215   
  216 /* to sniff, first we look for BOM */
  217   if (ctx->xml2enc == XML_CHAR_ENCODING_NONE) {
  218     ctx->xml2enc = xmlDetectCharEncoding((const xmlChar*)ctx->buf, ctx->bytes); 
  219     if (HAVE_ENCODING(ctx->xml2enc)) {
  220       ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
  221         "Got charset from XML rules.") ;
  222       ctx->encoding = xmlGetCharEncodingName(ctx->xml2enc);
  223     }
  224   }
  225 
  226 /* If none of the above, look for a META-thingey */
  227 /* also we're probably about to invalidate it, so we remove it. */
  228   if ( ap_regexec(seek_meta_ctype, ctx->buf, 1, match, 0) == 0 ) {
  229     /* get markers on the start and end of the match */
  230     rv = apr_brigade_partition(ctx->bbsave, match[0].rm_eo, &cute);
  231     rv = apr_brigade_partition(ctx->bbsave, match[0].rm_so, &cutb);
  232     /* now set length of useful buf for start-of-data hooks */
  233     ctx->bytes = match[0].rm_so;
  234     if (ctx->encoding == NULL) {
  235       p = apr_pstrndup(r->pool, ctx->buf + match[0].rm_so,
  236         match[0].rm_eo - match[0].rm_so) ;
  237       if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 ) {
  238         if (ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
  239                           match[1].rm_eo - match[1].rm_so), ctx->encoding) {
  240           ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
  241           if (HAVE_ENCODING(ctx->xml2enc))
  242             ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
  243               "Got charset %s from HTML META", ctx->encoding) ;
  244         }
  245       }
  246     }
  247 
  248     /* cut out the <meta> we're invalidating */
  249     while (cutb != cute) {
  250       b = APR_BUCKET_NEXT(cutb);
  251       APR_BUCKET_REMOVE(cutb);
  252       apr_bucket_destroy(cutb);
  253       cutb = b;
  254     }
  255     /* and leave a string */
  256     ctx->buf[ctx->bytes] = 0;
  257   }
  258 
  259 /* either it's set to something we found or it's still the default */
  260 /* Aaargh!  libxml2 has undocumented <META-crap> support.  So this fails
  261  * if metafix is not active.  Have to make it conditional.
  262  *
  263  * No, that means no-metafix breaks things.  Deal immediately with
  264  * this particular instance of metafix.
  265  */
  266   if (!HAVE_ENCODING(ctx->xml2enc)) {
  267     cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
  268     if (!ctx->encoding) {
  269       ctx->encoding = cfg->default_charset?cfg->default_charset:"ISO-8859-1";
  270     }
  271 /* Unsupported charset.  Can we get (iconv) support through apr_xlate? */
  272     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
  273       "Charset %s not supported by libxml2; trying apr_xlate", ctx->encoding);
  274     if (apr_xlate_open(&ctx->convset, "UTF-8", ctx->encoding, r->pool) == APR_SUCCESS) {
  275       ctx->xml2enc = XML_CHAR_ENCODING_UTF8 ;
  276     } else {
  277       ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
  278         "Charset %s not supported.  Consider aliasing it?", ctx->encoding) ;
  279     }
  280   }
  281 
  282   if (!HAVE_ENCODING(ctx->xml2enc)) {
  283     /* Use configuration default as a last resort */
  284     ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
  285         "No usable charset information; using configuration default") ;
  286     ctx->xml2enc = (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
  287         ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
  288   }
  289   if (ctype && ctx->encoding) {
  290     if (ap_regexec(seek_charset, ctype, 2, match, 0)) {
  291       r->content_type = apr_pstrcat(r->pool, ctype, ";charset=utf-8", NULL);
  292     } else {
  293       char* str = apr_palloc(r->pool, strlen(r->content_type)
  294                                 + 13 - (match[0].rm_eo - match[0].rm_so) + 1);
  295       memcpy(str, r->content_type, match[1].rm_so);
  296       //memcpy(str + match[1].rm_so, "charset=utf-8", 5);
  297       memcpy(str + match[1].rm_so, "utf-8", 5);
  298       strcpy(str + match[1].rm_so + 5, r->content_type+match[1].rm_eo);
  299       r->content_type = str;
  300     }
  301   }
  302 }
  303 
  304 static apr_status_t xml2enc_filter_init(ap_filter_t* f) {
  305   xml2ctx* ctx;
  306   if (!f->ctx) {
  307     xml2cfg* cfg = ap_get_module_config(f->r->per_dir_config, &xml2enc_module);
  308     f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(xml2ctx));
  309     ctx->xml2enc = XML_CHAR_ENCODING_NONE;
  310     if (cfg->skipto != NULL) {
  311       ctx->flags |= ENC_SKIPTO;
  312     }
  313   }
  314   return APR_SUCCESS;
  315 }
  316 static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb) {
  317   xml2ctx* ctx = f->ctx;
  318   apr_status_t rv;
  319   apr_bucket* b;
  320   apr_bucket* bstart;
  321   apr_size_t insz = 0;
  322   char *ctype;
  323   char *p;
  324 
  325   if (!ctx || !f->r->content_type) {
  326     /* log error about configuring this */
  327     ap_remove_output_filter(f);
  328     return ap_pass_brigade(f->next, bb) ;
  329   }
  330 
  331   ctype = apr_pstrdup(f->r->pool, f->r->content_type);
  332   for (p = ctype; *p; ++p)
  333     if (isupper(*p))
  334       *p = tolower(*p);
  335 
  336   /* only act if starts-with "text/" or contains "xml" */
  337   if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml"))  {
  338     ap_remove_output_filter(f);
  339     return ap_pass_brigade(f->next, bb) ;
  340   }
  341 
  342   if (ctx->bbsave == NULL) {
  343     ctx->bbsave = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc);
  344   }
  345   /* append to any data left over from last time */
  346   APR_BRIGADE_CONCAT(ctx->bbsave, bb);
  347 
  348   if (!(ctx->flags & ENC_INITIALISED)) {
  349     /* some kind of initialisation required */
  350     /* Turn all this off when post-processing */
  351 
  352     /* if we don't have enough data to sniff but more's to come, wait for it */
  353     rv = apr_brigade_length(ctx->bbsave, 0, &ctx->bblen);
  354     if ((ctx->bblen < BUF_MIN) && (ctx->bblen != -1)) {
  355       APR_BRIGADE_DO(b, ctx->bbsave) {
  356         if (APR_BUCKET_IS_EOS(b)) {
  357           ctx->flags |= ENC_SEEN_EOS;
  358           break;
  359         }
  360       }
  361       if (!(ctx->flags & ENC_SEEN_EOS)) {
  362         /* not enough data to sniff.  Wait for more */
  363         APR_BRIGADE_DO(b, ctx->bbsave) {
  364           apr_bucket_setaside(b, f->r->pool);
  365         }
  366         return APR_SUCCESS;
  367       }
  368     }
  369     if (ctx->bblen == -1) {
  370       ctx->bblen = BUFLEN-1;
  371     }
  372     /* flatten it into a NULL-terminated string */
  373     ctx->buf = apr_palloc(f->r->pool, (apr_size_t)(ctx->bblen+1));
  374     ctx->bytes = (apr_size_t)ctx->bblen;
  375     rv = apr_brigade_flatten(ctx->bbsave, ctx->buf, &ctx->bytes);
  376     ctx->buf[ctx->bytes] = 0;
  377     sniff_encoding(f->r, ctx);
  378     /* FIXME: hook here for rewriting start-of-data? */
  379     /* nah, we only have one action here - call it inline */
  380     fix_skipto(f->r, ctx);
  381 
  382     /* consume the data we just sniffed */
  383     /* we need to omit any <meta> we just invalidated */
  384     ctx->flags |= ENC_INITIALISED;
  385     ap_set_module_config(f->r->request_config, &xml2enc_module, ctx);
  386   }
  387   if (ctx->bbnext == NULL) {
  388     ctx->bbnext = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc);
  389   }
  390 
  391   if (!ctx->convset) {
  392     rv = ap_pass_brigade(f->next, ctx->bbsave);
  393     apr_brigade_cleanup(ctx->bbsave);
  394     ap_remove_output_filter(f);
  395     return rv;
  396   }
  397   /* move the data back to bb */
  398   APR_BRIGADE_CONCAT(bb, ctx->bbsave);
  399 
  400   while (b = APR_BRIGADE_FIRST(bb), b != APR_BRIGADE_SENTINEL(bb)) {
  401     ctx->bytes = 0;
  402     if (APR_BUCKET_IS_METADATA(b)) {
  403       if (APR_BUCKET_IS_EOS(b)) {
  404         /* send remaining data */
  405         return ap_fflush(f->next, ctx->bbnext);
  406       } else if (APR_BUCKET_IS_FLUSH(b)) {
  407         ap_fflush(f->next, ctx->bbnext);
  408       }
  409       APR_BUCKET_REMOVE(b);
  410       apr_bucket_destroy(b);
  411     } else {        /* data bucket */
  412       char* buf;
  413       apr_size_t bytes = 0;
  414       char fixbuf[BUFLEN];
  415       apr_bucket* bdestroy = NULL;
  416       if (insz > 0) { /* we have dangling data.  Flatten it. */
  417         buf = fixbuf;
  418         bytes = BUFLEN;
  419         rv = apr_brigade_flatten(bb, buf, &bytes);
  420         if (bytes == insz) {
  421           /* this is only what we've already tried to convert.
  422            * The brigade is exhausted.
  423            * Save remaining data for next time round
  424            */
  425           
  426           ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
  427                         "xml2enc: Setting aside %" APR_SIZE_T_FMT
  428                         " unconverted bytes", bytes);
  429           rv = ap_fflush(f->next, ctx->bbnext);
  430           APR_BRIGADE_CONCAT(ctx->bbsave, bb);
  431           APR_BRIGADE_DO(b, ctx->bbsave) {
  432             apr_bucket_setaside(b, f->r->pool);
  433           }
  434           return rv;
  435         }
  436         /* remove the data we've just read */
  437         rv = apr_brigade_partition(bb, bytes, &bstart);
  438         while (b = APR_BRIGADE_FIRST(bb), b != bstart) {
  439           APR_BUCKET_REMOVE(b);
  440           apr_bucket_destroy(b);
  441         }
  442         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xml2enc: consuming %"
  443                         APR_SIZE_T_FMT " bytes flattened", bytes);
  444       }
  445       else {
  446         rv = apr_bucket_read(b, (const char**)&buf, &bytes, APR_BLOCK_READ);
  447         APR_BUCKET_REMOVE(b);
  448         bdestroy = b;  /* can't destroy until we've finished with the data */
  449         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xml2enc: consuming %"
  450                         APR_SIZE_T_FMT " bytes from bucket", bytes);
  451       }
  452       /* OK, we've got some input we can use in [buf,bytes] */
  453       if (rv == APR_SUCCESS) {
  454         apr_size_t consumed;
  455         xml2enc_run_preprocess(f, &buf, &bytes);
  456         consumed = insz = bytes;
  457         while (insz > 0) {
  458           if (ctx->bytes == ctx->bblen) {
  459             /* nothing was converted last time!
  460              * break out of this loop! 
  461              */
  462             b = apr_bucket_transient_create(buf+(bytes - insz), insz,
  463                               bb->bucket_alloc);
  464             APR_BRIGADE_INSERT_HEAD(bb, b);
  465             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
  466                         "xml2enc: reinserting %" APR_SIZE_T_FMT
  467                         " unconsumed bytes from bucket", insz);
  468             break;
  469           }
  470           ctx->bytes = (apr_size_t)ctx->bblen;
  471           rv = apr_xlate_conv_buffer(ctx->convset, buf+(bytes - insz), &insz,
  472                           ctx->buf, &ctx->bytes);
  473           ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r,
  474              "xml2enc: converted %" APR_SIZE_T_FMT "/%" APR_OFF_T_FMT " bytes",
  475              consumed - insz, ctx->bblen - ctx->bytes);
  476 #if DEBUG_XML2ENC
  477   /* never use this in the wild */
  478   {
  479     static int serial = 0;
  480     const char* fname ;
  481     apr_file_t* file ; 
  482     fname = apr_psprintf(f->r->pool, "/tmp/%d-xml2enc.%d", rv, serial++);
  483     apr_file_open(&file, fname, APR_WRITE|APR_TRUNCATE|APR_CREATE,
  484                   APR_FPROT_OS_DEFAULT, f->r->pool);
  485     apr_file_write(file, buf+(bytes-consumed), &consumed);
  486     apr_file_close(file);
  487   }
  488 #endif
  489           consumed = insz;
  490           ap_fwrite(f->next, ctx->bbnext, ctx->buf, (apr_size_t)ctx->bblen - ctx->bytes);
  491           switch (rv) {
  492             case APR_SUCCESS:
  493               continue;
  494             case  APR_EINCOMPLETE:
  495               ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "INCOMPLETE");
  496               continue;     /* If outbuf was too small, go round again.
  497                              * If it was inbuf, we'll break out when we test
  498                              * ctx->bytes == ctx->bblen
  499                              */
  500             case APR_EINVAL: /* try skipping one bad byte */
  501               ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r,
  502                         "Skipping invalid byte(s) in input stream!");
  503               --insz;
  504               continue;
  505             default:
  506               /* Erk!  What's this?
  507                * Bail out, flush, and hope to eat the buf raw
  508                */
  509               ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
  510                       "Failed to convert input; trying it raw") ;
  511               ctx->convset = NULL;
  512               ap_fflush(f->next, ctx->bbnext);
  513               return ap_pass_brigade(f->next, ctx->bbnext);
  514           }
  515         }
  516       } else {
  517         ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
  518                         "xml2enc: error reading data") ;
  519       }
  520       if (bdestroy) {
  521         apr_bucket_destroy(bdestroy);
  522       }
  523     }
  524   }
  525   return APR_SUCCESS;
  526 }
  527 static apr_status_t xml2enc_charset(request_rec* r, xmlCharEncoding* encp,
  528         const char** encoding) {
  529   xml2ctx* ctx = ap_get_module_config(r->request_config, &xml2enc_module);
  530   if (!ctx || !(ctx->flags & ENC_INITIALISED)) {
  531     return APR_EAGAIN;
  532   }
  533   *encp = ctx->xml2enc;
  534   *encoding = ctx->encoding;
  535   return HAVE_ENCODING(ctx->xml2enc) ? APR_SUCCESS : APR_EGENERAL;
  536 }
  537 #define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
  538 static void xml2enc_hooks(apr_pool_t* pool) {
  539   ap_register_output_filter_protocol("xml2enc", xml2enc_ffunc,
  540                   xml2enc_filter_init, AP_FTYPE_RESOURCE, PROTO_FLAGS);
  541   APR_REGISTER_OPTIONAL_FN(xml2enc_filter);
  542   APR_REGISTER_OPTIONAL_FN(xml2enc_charset);
  543   seek_meta_ctype = ap_pregcomp(pool,
  544       "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
  545       AP_REG_EXTENDED|AP_REG_ICASE) ;
  546   seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)",
  547       AP_REG_EXTENDED|AP_REG_ICASE) ;
  548 }
  549 static const char* set_alias(cmd_parms* cmd, void* CFG,
  550                         const char* charset, const char* alias) {
  551   const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY);
  552   if (errmsg != NULL)
  553     return errmsg ;
  554   else if (xmlAddEncodingAlias(charset, alias) == 0)
  555     return NULL;
  556   else
  557     return "Error setting charset alias";
  558 }
  559 
  560 static const char* set_default(cmd_parms* cmd, void* CFG, const char* charset) {
  561   xml2cfg* cfg = CFG;
  562   cfg->default_charset = charset;
  563   cfg->default_encoding = xmlParseCharEncoding(charset);
  564 #if 0
  565   switch(cfg->default_encoding) {
  566     case XML_CHAR_ENCODING_NONE:
  567       return "Default charset not found";
  568     case XML_CHAR_ENCODING_ERROR:
  569       /*return "Invalid or unsupported default charset";*/
  570     default:
  571       return NULL;
  572   }
  573 #endif
  574   return NULL;
  575 }
  576 static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg) {
  577   tattr* attr;
  578   xml2cfg* cfg = CFG;
  579   if (cfg->skipto == NULL)
  580     cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr));
  581   attr = apr_array_push(cfg->skipto) ;
  582   attr->val = arg;
  583   return NULL ;
  584 }
  585 
  586 static const command_rec xml2enc_cmds[] = {
  587   AP_INIT_TAKE1("xml2EncDefault", set_default, NULL, OR_ALL,
  588         "Usage: xml2EncDefault charset") ,
  589   AP_INIT_ITERATE2("xml2EncAlias", set_alias, NULL, RSRC_CONF,
  590         "EncodingAlias charset alias [more aliases]") ,
  591   AP_INIT_ITERATE("xml2StartParse", set_skipto, NULL, OR_ALL,
  592         "Ignore anything in front of the first of these elements") ,
  593   { NULL }
  594 };
  595 static void* xml2enc_config(apr_pool_t* pool, char* x) {
  596   xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
  597   ret->default_encoding = XML_CHAR_ENCODING_NONE ;
  598   return ret;
  599 }
  600 
  601 static void* xml2enc_merge(apr_pool_t* pool, void* BASE, void* ADD) {
  602   xml2cfg* base = BASE;
  603   xml2cfg* add = ADD;
  604   xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
  605   ret->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE)
  606           ? base->default_encoding : add->default_encoding ;
  607   ret->default_charset = add->default_charset ? add->default_charset : base->default_charset;
  608   ret->skipto = add->skipto ? add->skipto : base->skipto;
  609   return ret;
  610 }
  611 module AP_MODULE_DECLARE_DATA xml2enc_module = {
  612   STANDARD20_MODULE_STUFF,
  613   xml2enc_config,
  614   xml2enc_merge,
  615   NULL,
  616   NULL,
  617   xml2enc_cmds,
  618   xml2enc_hooks
  619 };
  620 APR_IMPLEMENT_OPTIONAL_HOOK_RUN_ALL(xml2enc, XML2ENC, int, preprocess,
  621                       (ap_filter_t *f, char** bufp, apr_size_t* bytesp),
  622                       (f, bufp, bytesp), OK, DECLINED)