]> git.pld-linux.org Git - packages/apache-mod_proxy_html.git/blame - mod_proxy_html.c
- pl, added libxml2 version (as stated on homepage)
[packages/apache-mod_proxy_html.git] / mod_proxy_html.c
CommitLineData
ce3fd0e0
AM
1/*
2 Copyright (c) 2003, WebThing Ltd
3 Author: Nick Kew <nick@webthing.com>
4
5This program is free software; you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation; either version 2 of the License, or
8(at your option) any later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; if not, write to the Free Software
17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
19*/
20
21/* libxml */
22#include <libxml/HTMLparser.h>
23
24/* apache */
25#include <http_protocol.h>
26#include <http_config.h>
27#include <http_log.h>
28#include <apr_strings.h>
29
30module AP_MODULE_DECLARE_DATA proxy_html_module ;
31
32typedef struct {
33 struct urlmap* next ;
34 const char* from ;
35 const char* to ;
36} urlmap ;
37typedef struct {
38 urlmap* map ;
39 const char* doctype ;
40} proxy_html_conf ;
41typedef struct {
42 htmlSAXHandlerPtr sax ;
43 ap_filter_t* f ;
44 urlmap* map ;
45 htmlParserCtxtPtr parser ;
46 apr_bucket_brigade* bb ;
47} saxctxt ;
48
49static void pstartDocument(void* ctxt) {
50 saxctxt* ctx = (saxctxt*) ctxt ;
51
52 proxy_html_conf* cfg = ap_get_module_config(ctx->f->r->per_dir_config,&proxy_html_module);
53 apr_table_unset(ctx->f->r->headers_out, "Content-Length") ;
54 apr_table_unset(ctx->f->r->headers_out, "ETag") ;
55 ap_set_content_type(ctx->f->r, "text/html;charset=utf-8") ;
56 ap_fputs(ctx->f->next, ctx->bb, cfg->doctype) ;
57}
58static void pendDocument(void* ctxt) {
59 saxctxt* ctx = (saxctxt*) ctxt ;
60 APR_BRIGADE_INSERT_TAIL(ctx->bb,
61 apr_bucket_eos_create(ctx->bb->bucket_alloc) ) ;
62 ap_pass_brigade(ctx->f->next, ctx->bb) ;
63}
64typedef struct {
65 const char* name ;
66 const char** attrs ;
67} elt_t ;
68
69static void pstartElement(void* ctxt, const xmlChar* name,
70 const xmlChar** attrs ) {
71
72 saxctxt* ctx = (saxctxt*) ctxt ;
73
74 static const char* href[] = { "href", NULL } ;
75 static const char* cite[] = { "cite", NULL } ;
76 static const char* action[] = { "action", NULL } ;
77 static const char* imgattr[] = { "src", "longdesc", "usemap", NULL } ;
78 static const char* inputattr[] = { "src", "usemap", NULL } ;
79 static const char* scriptattr[] = { "src", "for", NULL } ;
80 static const char* frameattr[] = { "src", "longdesc", NULL } ;
81 static const char* objattr[] = { "classid", "codebase", "data", "usemap", NULL } ;
82 static const char* profile[] = { "profile", NULL } ;
83 static const char* background[] = { "background", NULL } ;
84 static const char* codebase[] = { "codebase", NULL } ;
85
86 static elt_t linked_elts[] = {
87 { "a" , href } ,
88 { "form", action } ,
89 { "base" , href } ,
90 { "area" , href } ,
91 { "link" , href } ,
92 { "img" , imgattr } ,
93 { "input" , inputattr } ,
94 { "script" , scriptattr } ,
95 { "frame", frameattr } ,
96 { "iframe", frameattr } ,
97 { "object", objattr } ,
98 { "q" , cite } ,
99 { "blockquote" , cite } ,
100 { "ins" , cite } ,
101 { "del" , cite } ,
102 { "head" , profile } ,
103 { "body" , background } ,
104 { "applet", codebase } ,
105 { NULL, NULL }
106 } ;
107
108 ap_fputc(ctx->f->next, ctx->bb, '<') ;
109 ap_fputs(ctx->f->next, ctx->bb, name) ;
110
111 if ( attrs ) {
112 const char** linkattrs = 0 ;
113 const xmlChar** a ;
114 elt_t* elt ;
115 for ( elt = linked_elts; elt->name != NULL ; ++elt )
116 if ( !strcmp(elt->name, name) ) {
117 linkattrs = elt->attrs ;
118 break ;
119 }
120 for ( a = attrs ; *a ; a += 2 ) {
121 const xmlChar* value = a[1] ;
122 if ( linkattrs && value ) {
123 int is_uri = 0 ;
124 const char** linkattr = linkattrs ;
125 do {
126 if ( !strcmp(*linkattr, *a) ) {
127 is_uri = 1 ;
128 break ;
129 }
130 } while ( *++linkattr ) ;
131 if ( is_uri ) {
132 urlmap* m ;
133 for ( m = ctx->map ; m ; m = (urlmap*)m->next ) {
134 if ( ! strncasecmp(value, m->from, strlen(m->from) ) ) {
135 value = apr_pstrcat(ctx->f->r->pool, m->to, value+strlen(m->from) , NULL) ;
136 break ;
137 }
138 }
139 }
140 }
141 if ( ! value )
142 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
143 else
144 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", value, "\"", NULL) ;
145 }
146 }
147 ap_fputc(ctx->f->next, ctx->bb, '>') ;
148}
149static void pendElement(void* ctxt, const xmlChar* name) {
150 const char** p ;
151 saxctxt* ctx = (saxctxt*) ctxt ;
152 static const char* empty_elts[] = {
153 "br" ,
154 "link" ,
155 "img" ,
156 "hr" ,
157 "input" ,
158 "meta" ,
159 "base" ,
160 "area" ,
161 "param" ,
162 "col" ,
163 "frame" ,
164 "isindex" ,
165 "basefont" ,
166 NULL
167 } ;
168 for ( p = empty_elts ; *p ; ++p )
169 if ( !strcmp( *p, name) )
170 return ;
171 ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
172}
173#define FLUSH ap_fwrite(ctx->f->next, ctx->bb, (chars+begin), (i-begin)) ; begin = i+1
174static void pcharacters(void* ctxt, const xmlChar *chars, int length) {
175 saxctxt* ctx = (saxctxt*) ctxt ;
176 int i ;
177 int begin ;
178 for ( begin=i=0; i<length; i++ ) {
179 switch (chars[i]) {
180 case '&' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&amp;") ; break ;
181 case '<' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&lt;") ; break ;
182 case '>' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&gt;") ; break ;
183 case '"' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&quot;") ; break ;
184 default : break ;
185 }
186 }
187 FLUSH ;
188}
189static void pcdata(void* ctxt, const xmlChar *chars, int length) {
190 saxctxt* ctx = (saxctxt*) ctxt ;
191 ap_fwrite(ctx->f->next, ctx->bb, chars, length) ;
192}
193static void pcomment(void* ctxt, const xmlChar *chars) {
194 saxctxt* ctx = (saxctxt*) ctxt ;
195 ap_fputstrs(ctx->f->next, ctx->bb, "<!--", chars, "-->", NULL) ;
196}
197static htmlSAXHandlerPtr setupSAX(apr_pool_t* pool) {
198 htmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(htmlSAXHandler) ) ;
199 sax->startDocument = pstartDocument ;
200 sax->endDocument = pendDocument ;
201 sax->startElement = pstartElement ;
202 sax->endElement = pendElement ;
203 sax->characters = pcharacters ;
204 sax->comment = pcomment ;
205 sax->cdataBlock = pcdata ;
206 return sax ;
207}
208static char* ctype2encoding(apr_pool_t* pool, const char* in) {
209 char* x ;
210 char* ptr ;
211 char* ctype ;
212 if ( ! in )
213 return 0 ;
214 ctype = strdup(in) ;
215 for ( ptr = ctype ; *ptr; ++ptr)
216 if ( isupper(*ptr) )
217 *ptr = tolower(*ptr) ;
218
219 if ( ptr = strstr(ctype, "charset=") , ptr > 0 ) {
220 ptr += 8 ; // jump over "charset=" and chop anything that follows charset
221 if ( x = strchr(ptr, ' ') , x )
222 *x = 0 ;
223 if ( x = strchr(ptr, ';') , x )
224 *x = 0 ;
225 }
226 x = ptr ? apr_pstrdup(pool, ptr) : 0 ;
227 free (ctype ) ;
228 return x ;
229}
230
231static int proxy_html_filter_init(ap_filter_t* f) {
232 saxctxt* fctx ;
233
234 xmlCharEncoding enc
235 = xmlParseCharEncoding(ctype2encoding(f->r->pool, f->r->content_type)) ;
236
237/* remove content-length filter */
238 ap_filter_rec_t* clf = ap_get_output_filter_handle("CONTENT_LENGTH") ;
239 ap_filter_t* ff = f->next ;
240
241 do {
242 ap_filter_t* fnext = ff->next ;
243 if ( ff->frec == clf )
244 ap_remove_output_filter(ff) ;
245 ff = fnext ;
246 } while ( ff ) ;
247
248 fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
249 fctx->sax = setupSAX(f->r->pool) ;
250 fctx->f = f ;
251 fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
252 fctx->map = ap_get_module_config(f->r->per_dir_config,&proxy_html_module);
253
254 if ( f->r->proto_num >= 1001 ) {
255 if ( ! f->r->main && ! f->r->prev )
256 f->r->chunked = 1 ;
257 }
258 fctx->parser = htmlCreatePushParserCtxt
259 ( fctx->sax , fctx, " ", 4, 0, enc) ;
260 return OK ;
261}
262static saxctxt* check_filter_init (ap_filter_t* f) {
263
264 if ( f->r->proxyreq && f->r->content_type ) {
265 if ( strncasecmp(f->r->content_type, "text/html", 9) &&
266 strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
267 ap_remove_output_filter(f) ;
268 return NULL ;
269 }
270 }
271
272 if ( ! f->ctx )
273 proxy_html_filter_init(f) ;
274 return f->ctx ;
275}
276static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
277 apr_bucket* b ;
278 const char* buf = 0 ;
279 apr_size_t bytes = 0 ;
280
281 saxctxt* ctxt = check_filter_init(f) ;
282 if ( ! ctxt )
283 return ap_pass_brigade(f->next, bb) ;
284
285 for ( b = APR_BRIGADE_FIRST(bb) ;
286 b != APR_BRIGADE_SENTINEL(bb) ;
287 b = APR_BUCKET_NEXT(b) ) {
288 if ( APR_BUCKET_IS_EOS(b) ) {
289 htmlParseChunk(ctxt->parser, buf, 0, 1) ;
290 htmlFreeParserCtxt(ctxt->parser) ;
291 } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
292 == APR_SUCCESS ) {
293 htmlParseChunk(ctxt->parser, buf, bytes, 0) ;
294 } else {
295 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ;
296 }
297 }
298 apr_brigade_destroy(bb) ;
299 return APR_SUCCESS ;
300}
301static const char* DEFAULT_DOCTYPE =
302 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
303
304static void* proxy_html_config(apr_pool_t* pool, char* x) {
305 proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ;
306 ret->doctype = DEFAULT_DOCTYPE ;
307 return ret ;
308}
309static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
310 proxy_html_conf* base = (proxy_html_conf*) BASE ;
311 proxy_html_conf* add = (proxy_html_conf*) ADD ;
312 proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ;
313 conf->map = add->map ? add->map : base->map ;
314 if ( add->map && base->map ) {
315 urlmap* newmap = add->map ;
316 while ( newmap->next )
317 newmap = (urlmap*)newmap->next ;
318 newmap->next = (struct urlmap*) base->map ;
319 }
320 conf->doctype = ( add->doctype == DEFAULT_DOCTYPE )
321 ? base->doctype : add->doctype ;
322 return conf ;
323}
324static const char* set_urlmap(cmd_parms* cmd, void* CFG,
325 const char* from, const char* to) {
326 proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
327 urlmap* newmap = apr_palloc(cmd->pool, sizeof(urlmap) ) ;
328 newmap->from = apr_pstrdup(cmd->pool, from) ;
329 newmap->to = apr_pstrdup(cmd->pool, to) ;
330 newmap->next = (struct urlmap*) cfg->map ;
331 cfg->map = newmap ;
332 return NULL ;
333}
334static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t) {
335 proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
336 cfg->doctype = apr_pstrdup(cmd->pool, t) ;
337 return NULL ;
338}
339static const command_rec proxy_html_cmds[] = {
340 AP_INIT_TAKE2("ProxyHTMLURLMap", set_urlmap, NULL, OR_ALL, "Map URL From To" ) ,
341 AP_INIT_TAKE1("ProxyHTMLDoctype", set_doctype, NULL, OR_ALL, "Set Doctype for URL mapped documents" ) ,
342 { NULL }
343} ;
344static void proxy_html_hooks(apr_pool_t* p) {
345 ap_register_output_filter("proxy-html", proxy_html_filter,
346 proxy_html_filter_init, AP_FTYPE_RESOURCE) ;
347}
348module AP_MODULE_DECLARE_DATA proxy_html_module = {
349 STANDARD20_MODULE_STUFF,
350 proxy_html_config,
351 proxy_html_merge,
352 NULL,
353 NULL,
354 proxy_html_cmds,
355 proxy_html_hooks
356} ;
This page took 0.111625 seconds and 4 git commands to generate.