]> git.pld-linux.org Git - packages/apache-mod_proxy_html.git/blame - mod_proxy_html.c
- up to 1.1
[packages/apache-mod_proxy_html.git] / mod_proxy_html.c
CommitLineData
ce3fd0e0 1/*
a8811d39 2 Copyright (c) 2003-4, WebThing Ltd
ce3fd0e0
AM
3 Author: Nick Kew <nick@webthing.com>
4
5This program is free software; you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation; either version 2 of the License, or
8(at your option) any later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; if not, write to the Free Software
17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
19*/
a8811d39 20/* Note to Users
21
22 You are requested to register as a user, at
23 http://apache.webthing.com/registration.html
24
25 This entitles you to support from the developer
26 (see the webpage for details).
27 I'm unlikely to reply to help/support requests from
28 non-registered users, unless you're paying and/or offering
29 constructive feedback such as bug reports or sensible
30 suggestions for further development.
31
32 It also makes a small contribution to the effort
33 that's gone into developing this work.
34*/
ce3fd0e0
AM
35
36/* libxml */
37#include <libxml/HTMLparser.h>
38
39/* apache */
40#include <http_protocol.h>
41#include <http_config.h>
42#include <http_log.h>
43#include <apr_strings.h>
44
45module AP_MODULE_DECLARE_DATA proxy_html_module ;
46
a8811d39 47typedef struct urlmap {
ce3fd0e0
AM
48 struct urlmap* next ;
49 const char* from ;
50 const char* to ;
51} urlmap ;
52typedef struct {
53 urlmap* map ;
54 const char* doctype ;
a8811d39 55 const char* etag ;
56 unsigned int flags ;
ce3fd0e0
AM
57} proxy_html_conf ;
58typedef struct {
59 htmlSAXHandlerPtr sax ;
60 ap_filter_t* f ;
a8811d39 61 proxy_html_conf* cfg ;
ce3fd0e0
AM
62 htmlParserCtxtPtr parser ;
63 apr_bucket_brigade* bb ;
a8811d39 64 xmlCharEncoding enc ;
ce3fd0e0
AM
65} saxctxt ;
66
a8811d39 67static int is_empty_elt(const char* name) {
68 const char** p ;
69 static const char* empty_elts[] = {
70 "br" ,
71 "link" ,
72 "img" ,
73 "hr" ,
74 "input" ,
75 "meta" ,
76 "base" ,
77 "area" ,
78 "param" ,
79 "col" ,
80 "frame" ,
81 "isindex" ,
82 "basefont" ,
83 NULL
84 } ;
85 for ( p = empty_elts ; *p ; ++p )
86 if ( !strcmp( *p, name) )
87 return 1 ;
88 return 0 ;
ce3fd0e0 89}
a8811d39 90
ce3fd0e0
AM
91typedef struct {
92 const char* name ;
93 const char** attrs ;
94} elt_t ;
95
a8811d39 96#define NORM_LC 0x1
97#define NORM_MSSLASH 0x2
98#define NORM_RESET 0x4
99
100static char* normalise(unsigned int flags, char* str) {
101 xmlChar* p ;
102 if ( flags & NORM_LC )
103 for ( p = str ; *p ; ++p )
104 if ( isupper(*p) )
105 *p = tolower(*p) ;
106
107 if ( flags & NORM_MSSLASH )
108 for ( p = strchr(str, '\\') ; p ; p = strchr(p+1, '\\') )
109 *p = '/' ;
110
111 return str ;
112}
113
ce3fd0e0 114static void pstartElement(void* ctxt, const xmlChar* name,
a8811d39 115 const xmlChar** attrs ) {
ce3fd0e0
AM
116
117 saxctxt* ctx = (saxctxt*) ctxt ;
118
119 static const char* href[] = { "href", NULL } ;
120 static const char* cite[] = { "cite", NULL } ;
121 static const char* action[] = { "action", NULL } ;
122 static const char* imgattr[] = { "src", "longdesc", "usemap", NULL } ;
123 static const char* inputattr[] = { "src", "usemap", NULL } ;
124 static const char* scriptattr[] = { "src", "for", NULL } ;
125 static const char* frameattr[] = { "src", "longdesc", NULL } ;
126 static const char* objattr[] = { "classid", "codebase", "data", "usemap", NULL } ;
127 static const char* profile[] = { "profile", NULL } ;
128 static const char* background[] = { "background", NULL } ;
129 static const char* codebase[] = { "codebase", NULL } ;
130
131 static elt_t linked_elts[] = {
132 { "a" , href } ,
133 { "form", action } ,
134 { "base" , href } ,
135 { "area" , href } ,
136 { "link" , href } ,
137 { "img" , imgattr } ,
138 { "input" , inputattr } ,
139 { "script" , scriptattr } ,
140 { "frame", frameattr } ,
141 { "iframe", frameattr } ,
142 { "object", objattr } ,
143 { "q" , cite } ,
144 { "blockquote" , cite } ,
145 { "ins" , cite } ,
146 { "del" , cite } ,
147 { "head" , profile } ,
148 { "body" , background } ,
149 { "applet", codebase } ,
150 { NULL, NULL }
151 } ;
152
153 ap_fputc(ctx->f->next, ctx->bb, '<') ;
154 ap_fputs(ctx->f->next, ctx->bb, name) ;
155
156 if ( attrs ) {
157 const char** linkattrs = 0 ;
158 const xmlChar** a ;
159 elt_t* elt ;
160 for ( elt = linked_elts; elt->name != NULL ; ++elt )
161 if ( !strcmp(elt->name, name) ) {
162 linkattrs = elt->attrs ;
163 break ;
164 }
165 for ( a = attrs ; *a ; a += 2 ) {
166 const xmlChar* value = a[1] ;
167 if ( linkattrs && value ) {
168 int is_uri = 0 ;
169 const char** linkattr = linkattrs ;
170 do {
171 if ( !strcmp(*linkattr, *a) ) {
172 is_uri = 1 ;
173 break ;
174 }
175 } while ( *++linkattr ) ;
176 if ( is_uri ) {
177 urlmap* m ;
a8811d39 178 for ( m = ctx->cfg->map ; m ; m = m->next ) {
ce3fd0e0
AM
179 if ( ! strncasecmp(value, m->from, strlen(m->from) ) ) {
180 value = apr_pstrcat(ctx->f->r->pool, m->to, value+strlen(m->from) , NULL) ;
181 break ;
182 }
183 }
184 }
185 }
186 if ( ! value )
a8811d39 187 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
188 else {
189 if ( ctx->cfg->flags != 0 )
190 value = normalise(ctx->cfg->flags,
191 apr_pstrdup(ctx->f->r->pool, value ) ) ;
192 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"",
193 value, "\"", NULL) ;
194 }
ce3fd0e0
AM
195 }
196 }
a8811d39 197 if ( is_empty_elt(name) )
198 ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag) ;
199 else
200 ap_fputc(ctx->f->next, ctx->bb, '>') ;
ce3fd0e0
AM
201}
202static void pendElement(void* ctxt, const xmlChar* name) {
ce3fd0e0 203 saxctxt* ctx = (saxctxt*) ctxt ;
a8811d39 204 if ( ! is_empty_elt(name) )
205 ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
ce3fd0e0
AM
206}
207#define FLUSH ap_fwrite(ctx->f->next, ctx->bb, (chars+begin), (i-begin)) ; begin = i+1
208static void pcharacters(void* ctxt, const xmlChar *chars, int length) {
209 saxctxt* ctx = (saxctxt*) ctxt ;
210 int i ;
211 int begin ;
212 for ( begin=i=0; i<length; i++ ) {
213 switch (chars[i]) {
214 case '&' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&amp;") ; break ;
215 case '<' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&lt;") ; break ;
216 case '>' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&gt;") ; break ;
217 case '"' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&quot;") ; break ;
218 default : break ;
219 }
220 }
221 FLUSH ;
222}
223static void pcdata(void* ctxt, const xmlChar *chars, int length) {
224 saxctxt* ctx = (saxctxt*) ctxt ;
225 ap_fwrite(ctx->f->next, ctx->bb, chars, length) ;
226}
227static void pcomment(void* ctxt, const xmlChar *chars) {
228 saxctxt* ctx = (saxctxt*) ctxt ;
229 ap_fputstrs(ctx->f->next, ctx->bb, "<!--", chars, "-->", NULL) ;
230}
231static htmlSAXHandlerPtr setupSAX(apr_pool_t* pool) {
232 htmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(htmlSAXHandler) ) ;
a8811d39 233 sax->startDocument = NULL ;
234 sax->endDocument = NULL ;
ce3fd0e0
AM
235 sax->startElement = pstartElement ;
236 sax->endElement = pendElement ;
237 sax->characters = pcharacters ;
238 sax->comment = pcomment ;
239 sax->cdataBlock = pcdata ;
240 return sax ;
241}
242static char* ctype2encoding(apr_pool_t* pool, const char* in) {
243 char* x ;
244 char* ptr ;
245 char* ctype ;
246 if ( ! in )
247 return 0 ;
a8811d39 248 if ( ctype = strdup(in) , ! ctype )
249 return 0 ;
ce3fd0e0
AM
250 for ( ptr = ctype ; *ptr; ++ptr)
251 if ( isupper(*ptr) )
252 *ptr = tolower(*ptr) ;
253
254 if ( ptr = strstr(ctype, "charset=") , ptr > 0 ) {
255 ptr += 8 ; // jump over "charset=" and chop anything that follows charset
256 if ( x = strchr(ptr, ' ') , x )
257 *x = 0 ;
258 if ( x = strchr(ptr, ';') , x )
259 *x = 0 ;
260 }
261 x = ptr ? apr_pstrdup(pool, ptr) : 0 ;
262 free (ctype ) ;
263 return x ;
264}
265
266static int proxy_html_filter_init(ap_filter_t* f) {
267 saxctxt* fctx ;
268
ce3fd0e0
AM
269/* remove content-length filter */
270 ap_filter_rec_t* clf = ap_get_output_filter_handle("CONTENT_LENGTH") ;
271 ap_filter_t* ff = f->next ;
272
273 do {
274 ap_filter_t* fnext = ff->next ;
275 if ( ff->frec == clf )
276 ap_remove_output_filter(ff) ;
277 ff = fnext ;
278 } while ( ff ) ;
279
280 fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
281 fctx->sax = setupSAX(f->r->pool) ;
282 fctx->f = f ;
283 fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
a8811d39 284 fctx->cfg = ap_get_module_config(f->r->per_dir_config,&proxy_html_module);
285
286/* Note the encoding now, before updating content-type */
287 fctx->enc = xmlParseCharEncoding
288 (ctype2encoding(f->r->pool, f->r->content_type)) ;
ce3fd0e0
AM
289
290 if ( f->r->proto_num >= 1001 ) {
291 if ( ! f->r->main && ! f->r->prev )
292 f->r->chunked = 1 ;
293 }
a8811d39 294 apr_table_unset(f->r->headers_out, "Content-Length") ;
295 apr_table_unset(f->r->headers_out, "ETag") ;
296 ap_set_content_type(f->r, "text/html;charset=utf-8") ;
297 ap_fputs(f->next, fctx->bb, fctx->cfg->doctype) ;
ce3fd0e0
AM
298 return OK ;
299}
300static saxctxt* check_filter_init (ap_filter_t* f) {
301
302 if ( f->r->proxyreq && f->r->content_type ) {
303 if ( strncasecmp(f->r->content_type, "text/html", 9) &&
304 strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
305 ap_remove_output_filter(f) ;
306 return NULL ;
307 }
308 }
309
310 if ( ! f->ctx )
311 proxy_html_filter_init(f) ;
312 return f->ctx ;
313}
314static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
315 apr_bucket* b ;
316 const char* buf = 0 ;
317 apr_size_t bytes = 0 ;
318
319 saxctxt* ctxt = check_filter_init(f) ;
320 if ( ! ctxt )
321 return ap_pass_brigade(f->next, bb) ;
322
323 for ( b = APR_BRIGADE_FIRST(bb) ;
324 b != APR_BRIGADE_SENTINEL(bb) ;
325 b = APR_BUCKET_NEXT(b) ) {
326 if ( APR_BUCKET_IS_EOS(b) ) {
a8811d39 327 if ( ctxt->parser != NULL ) {
328 htmlParseChunk(ctxt->parser, buf, 0, 1) ;
329 htmlFreeParserCtxt(ctxt->parser) ;
330 }
331 APR_BRIGADE_INSERT_TAIL(ctxt->bb,
332 apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
333 ap_pass_brigade(ctxt->f->next, ctxt->bb) ;
ce3fd0e0
AM
334 } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
335 == APR_SUCCESS ) {
a8811d39 336 if ( ctxt->parser == NULL )
337 ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
338 buf, bytes, 0, ctxt->enc) ;
339 else
340 htmlParseChunk(ctxt->parser, buf, bytes, 0) ;
ce3fd0e0
AM
341 } else {
342 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ;
343 }
344 }
a8811d39 345 //ap_fflush(ctxt->f->next, ctxt->bb) ; // uncomment for debug
ce3fd0e0
AM
346 apr_brigade_destroy(bb) ;
347 return APR_SUCCESS ;
348}
a8811d39 349static const char* fpi_html =
ce3fd0e0 350 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
a8811d39 351static const char* fpi_html_legacy =
352 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
353static const char* fpi_xhtml =
354 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
355static const char* fpi_xhtml_legacy =
356 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
357static const char* html_etag = ">" ;
358static const char* xhtml_etag = " />" ;
359#define DEFAULT_DOCTYPE fpi_html
360#define DEFAULT_ETAG html_etag
ce3fd0e0
AM
361
362static void* proxy_html_config(apr_pool_t* pool, char* x) {
363 proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ;
364 ret->doctype = DEFAULT_DOCTYPE ;
a8811d39 365 ret->etag = DEFAULT_ETAG ;
ce3fd0e0
AM
366 return ret ;
367}
368static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
369 proxy_html_conf* base = (proxy_html_conf*) BASE ;
370 proxy_html_conf* add = (proxy_html_conf*) ADD ;
371 proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ;
a8811d39 372
ce3fd0e0 373 if ( add->map && base->map ) {
a8811d39 374 urlmap* a ;
375 conf->map = NULL ;
376 for ( a = base->map ; a ; a = a->next ) {
377 urlmap* save = conf->map ;
378 conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
379 conf->map->next = save ;
380 }
381 for ( a = add->map ; a ; a = a->next ) {
382 urlmap* save = conf->map ;
383 conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
384 conf->map->next = save ;
385 }
386 } else
387 conf->map = add->map ? add->map : base->map ;
388
ce3fd0e0
AM
389 conf->doctype = ( add->doctype == DEFAULT_DOCTYPE )
390 ? base->doctype : add->doctype ;
a8811d39 391 conf->etag = ( add->etag == DEFAULT_ETAG ) ? base->etag : add->etag ;
392 if ( add->flags & NORM_RESET )
393 conf->flags = add->flags ^ NORM_RESET ;
394 else
395 conf->flags = base->flags | add->flags ;
ce3fd0e0
AM
396 return conf ;
397}
398static const char* set_urlmap(cmd_parms* cmd, void* CFG,
399 const char* from, const char* to) {
400 proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
a8811d39 401 urlmap* oldmap = cfg->map ;
ce3fd0e0
AM
402 urlmap* newmap = apr_palloc(cmd->pool, sizeof(urlmap) ) ;
403 newmap->from = apr_pstrdup(cmd->pool, from) ;
404 newmap->to = apr_pstrdup(cmd->pool, to) ;
a8811d39 405 newmap->next = NULL ;
406 if ( oldmap ) {
407 while ( oldmap->next )
408 oldmap = oldmap->next ;
409 oldmap->next = newmap ;
410 } else
411 cfg->map = newmap ;
ce3fd0e0
AM
412 return NULL ;
413}
a8811d39 414static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t,
415 const char* l) {
ce3fd0e0 416 proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
a8811d39 417 if ( !strcasecmp(t, "xhtml") ) {
418 cfg->etag = xhtml_etag ;
419 if ( l && !strcasecmp(l, "legacy") )
420 cfg->doctype = fpi_xhtml_legacy ;
421 else
422 cfg->doctype = fpi_xhtml ;
423 } else if ( !strcasecmp(t, "html") ) {
424 cfg->etag = html_etag ;
425 if ( l && !strcasecmp(l, "legacy") )
426 cfg->doctype = fpi_html_legacy ;
427 else
428 cfg->doctype = fpi_html ;
429 } else {
430 cfg->doctype = apr_pstrdup(cmd->pool, t) ;
431 if ( l && ( ( l[0] == 'x' ) || ( l[0] == 'X' ) ) )
432 cfg->etag = xhtml_etag ;
433 }
434 return NULL ;
435}
436static void set_param(proxy_html_conf* cfg, const char* arg) {
437 if ( arg && *arg )
438 if ( !strcmp(arg, "lowercase") )
439 cfg->flags |= NORM_LC ;
440 else if ( !strcmp(arg, "dospath") )
441 cfg->flags |= NORM_MSSLASH ;
442 else if ( !strcmp(arg, "reset") )
443 cfg->flags |= NORM_RESET ;
444}
445static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg1,
446 const char* arg2, const char* arg3) {
447 set_param( (proxy_html_conf*)CFG, arg1) ;
448 set_param( (proxy_html_conf*)CFG, arg2) ;
449 set_param( (proxy_html_conf*)CFG, arg3) ;
ce3fd0e0
AM
450 return NULL ;
451}
452static const command_rec proxy_html_cmds[] = {
453 AP_INIT_TAKE2("ProxyHTMLURLMap", set_urlmap, NULL, OR_ALL, "Map URL From To" ) ,
a8811d39 454 AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL, OR_ALL, "(HTML|XHTML) [Legacy]" ) ,
455 AP_INIT_TAKE123("ProxyHTMLFixups", set_flags, NULL, OR_ALL, "Options are lowercase, dospath" ) ,
ce3fd0e0
AM
456 { NULL }
457} ;
458static void proxy_html_hooks(apr_pool_t* p) {
459 ap_register_output_filter("proxy-html", proxy_html_filter,
a8811d39 460 NULL, AP_FTYPE_RESOURCE) ;
ce3fd0e0
AM
461}
462module AP_MODULE_DECLARE_DATA proxy_html_module = {
463 STANDARD20_MODULE_STUFF,
464 proxy_html_config,
465 proxy_html_merge,
466 NULL,
467 NULL,
468 proxy_html_cmds,
469 proxy_html_hooks
470} ;
This page took 0.127929 seconds and 4 git commands to generate.