]>
Commit | Line | Data |
---|---|---|
ce3fd0e0 AM |
1 | /* |
2 | Copyright (c) 2003, WebThing Ltd | |
3 | Author: Nick Kew <nick@webthing.com> | |
4 | ||
5 | This program is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 2 of the License, or | |
8 | (at your option) any later version. | |
9 | ||
10 | This program is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
16 | along with this program; if not, write to the Free Software | |
17 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
18 | ||
19 | */ | |
20 | ||
21 | /* libxml */ | |
22 | #include <libxml/HTMLparser.h> | |
23 | ||
24 | /* apache */ | |
25 | #include <http_protocol.h> | |
26 | #include <http_config.h> | |
27 | #include <http_log.h> | |
28 | #include <apr_strings.h> | |
29 | ||
30 | module AP_MODULE_DECLARE_DATA proxy_html_module ; | |
31 | ||
32 | typedef struct { | |
33 | struct urlmap* next ; | |
34 | const char* from ; | |
35 | const char* to ; | |
36 | } urlmap ; | |
37 | typedef struct { | |
38 | urlmap* map ; | |
39 | const char* doctype ; | |
40 | } proxy_html_conf ; | |
41 | typedef struct { | |
42 | htmlSAXHandlerPtr sax ; | |
43 | ap_filter_t* f ; | |
44 | urlmap* map ; | |
45 | htmlParserCtxtPtr parser ; | |
46 | apr_bucket_brigade* bb ; | |
47 | } saxctxt ; | |
48 | ||
49 | static void pstartDocument(void* ctxt) { | |
50 | saxctxt* ctx = (saxctxt*) ctxt ; | |
51 | ||
52 | proxy_html_conf* cfg = ap_get_module_config(ctx->f->r->per_dir_config,&proxy_html_module); | |
53 | apr_table_unset(ctx->f->r->headers_out, "Content-Length") ; | |
54 | apr_table_unset(ctx->f->r->headers_out, "ETag") ; | |
55 | ap_set_content_type(ctx->f->r, "text/html;charset=utf-8") ; | |
56 | ap_fputs(ctx->f->next, ctx->bb, cfg->doctype) ; | |
57 | } | |
58 | static void pendDocument(void* ctxt) { | |
59 | saxctxt* ctx = (saxctxt*) ctxt ; | |
60 | APR_BRIGADE_INSERT_TAIL(ctx->bb, | |
61 | apr_bucket_eos_create(ctx->bb->bucket_alloc) ) ; | |
62 | ap_pass_brigade(ctx->f->next, ctx->bb) ; | |
63 | } | |
64 | typedef struct { | |
65 | const char* name ; | |
66 | const char** attrs ; | |
67 | } elt_t ; | |
68 | ||
69 | static void pstartElement(void* ctxt, const xmlChar* name, | |
70 | const xmlChar** attrs ) { | |
71 | ||
72 | saxctxt* ctx = (saxctxt*) ctxt ; | |
73 | ||
74 | static const char* href[] = { "href", NULL } ; | |
75 | static const char* cite[] = { "cite", NULL } ; | |
76 | static const char* action[] = { "action", NULL } ; | |
77 | static const char* imgattr[] = { "src", "longdesc", "usemap", NULL } ; | |
78 | static const char* inputattr[] = { "src", "usemap", NULL } ; | |
79 | static const char* scriptattr[] = { "src", "for", NULL } ; | |
80 | static const char* frameattr[] = { "src", "longdesc", NULL } ; | |
81 | static const char* objattr[] = { "classid", "codebase", "data", "usemap", NULL } ; | |
82 | static const char* profile[] = { "profile", NULL } ; | |
83 | static const char* background[] = { "background", NULL } ; | |
84 | static const char* codebase[] = { "codebase", NULL } ; | |
85 | ||
86 | static elt_t linked_elts[] = { | |
87 | { "a" , href } , | |
88 | { "form", action } , | |
89 | { "base" , href } , | |
90 | { "area" , href } , | |
91 | { "link" , href } , | |
92 | { "img" , imgattr } , | |
93 | { "input" , inputattr } , | |
94 | { "script" , scriptattr } , | |
95 | { "frame", frameattr } , | |
96 | { "iframe", frameattr } , | |
97 | { "object", objattr } , | |
98 | { "q" , cite } , | |
99 | { "blockquote" , cite } , | |
100 | { "ins" , cite } , | |
101 | { "del" , cite } , | |
102 | { "head" , profile } , | |
103 | { "body" , background } , | |
104 | { "applet", codebase } , | |
105 | { NULL, NULL } | |
106 | } ; | |
107 | ||
108 | ap_fputc(ctx->f->next, ctx->bb, '<') ; | |
109 | ap_fputs(ctx->f->next, ctx->bb, name) ; | |
110 | ||
111 | if ( attrs ) { | |
112 | const char** linkattrs = 0 ; | |
113 | const xmlChar** a ; | |
114 | elt_t* elt ; | |
115 | for ( elt = linked_elts; elt->name != NULL ; ++elt ) | |
116 | if ( !strcmp(elt->name, name) ) { | |
117 | linkattrs = elt->attrs ; | |
118 | break ; | |
119 | } | |
120 | for ( a = attrs ; *a ; a += 2 ) { | |
121 | const xmlChar* value = a[1] ; | |
122 | if ( linkattrs && value ) { | |
123 | int is_uri = 0 ; | |
124 | const char** linkattr = linkattrs ; | |
125 | do { | |
126 | if ( !strcmp(*linkattr, *a) ) { | |
127 | is_uri = 1 ; | |
128 | break ; | |
129 | } | |
130 | } while ( *++linkattr ) ; | |
131 | if ( is_uri ) { | |
132 | urlmap* m ; | |
133 | for ( m = ctx->map ; m ; m = (urlmap*)m->next ) { | |
134 | if ( ! strncasecmp(value, m->from, strlen(m->from) ) ) { | |
135 | value = apr_pstrcat(ctx->f->r->pool, m->to, value+strlen(m->from) , NULL) ; | |
136 | break ; | |
137 | } | |
138 | } | |
139 | } | |
140 | } | |
141 | if ( ! value ) | |
142 | ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ; | |
143 | else | |
144 | ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", value, "\"", NULL) ; | |
145 | } | |
146 | } | |
147 | ap_fputc(ctx->f->next, ctx->bb, '>') ; | |
148 | } | |
149 | static void pendElement(void* ctxt, const xmlChar* name) { | |
150 | const char** p ; | |
151 | saxctxt* ctx = (saxctxt*) ctxt ; | |
152 | static const char* empty_elts[] = { | |
153 | "br" , | |
154 | "link" , | |
155 | "img" , | |
156 | "hr" , | |
157 | "input" , | |
158 | "meta" , | |
159 | "base" , | |
160 | "area" , | |
161 | "param" , | |
162 | "col" , | |
163 | "frame" , | |
164 | "isindex" , | |
165 | "basefont" , | |
166 | NULL | |
167 | } ; | |
168 | for ( p = empty_elts ; *p ; ++p ) | |
169 | if ( !strcmp( *p, name) ) | |
170 | return ; | |
171 | ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ; | |
172 | } | |
173 | #define FLUSH ap_fwrite(ctx->f->next, ctx->bb, (chars+begin), (i-begin)) ; begin = i+1 | |
174 | static void pcharacters(void* ctxt, const xmlChar *chars, int length) { | |
175 | saxctxt* ctx = (saxctxt*) ctxt ; | |
176 | int i ; | |
177 | int begin ; | |
178 | for ( begin=i=0; i<length; i++ ) { | |
179 | switch (chars[i]) { | |
180 | case '&' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "&") ; break ; | |
181 | case '<' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, "<") ; break ; | |
182 | case '>' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, ">") ; break ; | |
183 | case '"' : FLUSH ; ap_fputs(ctx->f->next, ctx->bb, """) ; break ; | |
184 | default : break ; | |
185 | } | |
186 | } | |
187 | FLUSH ; | |
188 | } | |
189 | static void pcdata(void* ctxt, const xmlChar *chars, int length) { | |
190 | saxctxt* ctx = (saxctxt*) ctxt ; | |
191 | ap_fwrite(ctx->f->next, ctx->bb, chars, length) ; | |
192 | } | |
193 | static void pcomment(void* ctxt, const xmlChar *chars) { | |
194 | saxctxt* ctx = (saxctxt*) ctxt ; | |
195 | ap_fputstrs(ctx->f->next, ctx->bb, "<!--", chars, "-->", NULL) ; | |
196 | } | |
197 | static htmlSAXHandlerPtr setupSAX(apr_pool_t* pool) { | |
198 | htmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(htmlSAXHandler) ) ; | |
199 | sax->startDocument = pstartDocument ; | |
200 | sax->endDocument = pendDocument ; | |
201 | sax->startElement = pstartElement ; | |
202 | sax->endElement = pendElement ; | |
203 | sax->characters = pcharacters ; | |
204 | sax->comment = pcomment ; | |
205 | sax->cdataBlock = pcdata ; | |
206 | return sax ; | |
207 | } | |
208 | static char* ctype2encoding(apr_pool_t* pool, const char* in) { | |
209 | char* x ; | |
210 | char* ptr ; | |
211 | char* ctype ; | |
212 | if ( ! in ) | |
213 | return 0 ; | |
214 | ctype = strdup(in) ; | |
215 | for ( ptr = ctype ; *ptr; ++ptr) | |
216 | if ( isupper(*ptr) ) | |
217 | *ptr = tolower(*ptr) ; | |
218 | ||
219 | if ( ptr = strstr(ctype, "charset=") , ptr > 0 ) { | |
220 | ptr += 8 ; // jump over "charset=" and chop anything that follows charset | |
221 | if ( x = strchr(ptr, ' ') , x ) | |
222 | *x = 0 ; | |
223 | if ( x = strchr(ptr, ';') , x ) | |
224 | *x = 0 ; | |
225 | } | |
226 | x = ptr ? apr_pstrdup(pool, ptr) : 0 ; | |
227 | free (ctype ) ; | |
228 | return x ; | |
229 | } | |
230 | ||
231 | static int proxy_html_filter_init(ap_filter_t* f) { | |
232 | saxctxt* fctx ; | |
233 | ||
234 | xmlCharEncoding enc | |
235 | = xmlParseCharEncoding(ctype2encoding(f->r->pool, f->r->content_type)) ; | |
236 | ||
237 | /* remove content-length filter */ | |
238 | ap_filter_rec_t* clf = ap_get_output_filter_handle("CONTENT_LENGTH") ; | |
239 | ap_filter_t* ff = f->next ; | |
240 | ||
241 | do { | |
242 | ap_filter_t* fnext = ff->next ; | |
243 | if ( ff->frec == clf ) | |
244 | ap_remove_output_filter(ff) ; | |
245 | ff = fnext ; | |
246 | } while ( ff ) ; | |
247 | ||
248 | fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ; | |
249 | fctx->sax = setupSAX(f->r->pool) ; | |
250 | fctx->f = f ; | |
251 | fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ; | |
252 | fctx->map = ap_get_module_config(f->r->per_dir_config,&proxy_html_module); | |
253 | ||
254 | if ( f->r->proto_num >= 1001 ) { | |
255 | if ( ! f->r->main && ! f->r->prev ) | |
256 | f->r->chunked = 1 ; | |
257 | } | |
258 | fctx->parser = htmlCreatePushParserCtxt | |
259 | ( fctx->sax , fctx, " ", 4, 0, enc) ; | |
260 | return OK ; | |
261 | } | |
262 | static saxctxt* check_filter_init (ap_filter_t* f) { | |
263 | ||
264 | if ( f->r->proxyreq && f->r->content_type ) { | |
265 | if ( strncasecmp(f->r->content_type, "text/html", 9) && | |
266 | strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) { | |
267 | ap_remove_output_filter(f) ; | |
268 | return NULL ; | |
269 | } | |
270 | } | |
271 | ||
272 | if ( ! f->ctx ) | |
273 | proxy_html_filter_init(f) ; | |
274 | return f->ctx ; | |
275 | } | |
276 | static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) { | |
277 | apr_bucket* b ; | |
278 | const char* buf = 0 ; | |
279 | apr_size_t bytes = 0 ; | |
280 | ||
281 | saxctxt* ctxt = check_filter_init(f) ; | |
282 | if ( ! ctxt ) | |
283 | return ap_pass_brigade(f->next, bb) ; | |
284 | ||
285 | for ( b = APR_BRIGADE_FIRST(bb) ; | |
286 | b != APR_BRIGADE_SENTINEL(bb) ; | |
287 | b = APR_BUCKET_NEXT(b) ) { | |
288 | if ( APR_BUCKET_IS_EOS(b) ) { | |
289 | htmlParseChunk(ctxt->parser, buf, 0, 1) ; | |
290 | htmlFreeParserCtxt(ctxt->parser) ; | |
291 | } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) | |
292 | == APR_SUCCESS ) { | |
293 | htmlParseChunk(ctxt->parser, buf, bytes, 0) ; | |
294 | } else { | |
295 | ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ; | |
296 | } | |
297 | } | |
298 | apr_brigade_destroy(bb) ; | |
299 | return APR_SUCCESS ; | |
300 | } | |
301 | static const char* DEFAULT_DOCTYPE = | |
302 | "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ; | |
303 | ||
304 | static void* proxy_html_config(apr_pool_t* pool, char* x) { | |
305 | proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ; | |
306 | ret->doctype = DEFAULT_DOCTYPE ; | |
307 | return ret ; | |
308 | } | |
309 | static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { | |
310 | proxy_html_conf* base = (proxy_html_conf*) BASE ; | |
311 | proxy_html_conf* add = (proxy_html_conf*) ADD ; | |
312 | proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ; | |
313 | conf->map = add->map ? add->map : base->map ; | |
314 | if ( add->map && base->map ) { | |
315 | urlmap* newmap = add->map ; | |
316 | while ( newmap->next ) | |
317 | newmap = (urlmap*)newmap->next ; | |
318 | newmap->next = (struct urlmap*) base->map ; | |
319 | } | |
320 | conf->doctype = ( add->doctype == DEFAULT_DOCTYPE ) | |
321 | ? base->doctype : add->doctype ; | |
322 | return conf ; | |
323 | } | |
324 | static const char* set_urlmap(cmd_parms* cmd, void* CFG, | |
325 | const char* from, const char* to) { | |
326 | proxy_html_conf* cfg = (proxy_html_conf*)CFG ; | |
327 | urlmap* newmap = apr_palloc(cmd->pool, sizeof(urlmap) ) ; | |
328 | newmap->from = apr_pstrdup(cmd->pool, from) ; | |
329 | newmap->to = apr_pstrdup(cmd->pool, to) ; | |
330 | newmap->next = (struct urlmap*) cfg->map ; | |
331 | cfg->map = newmap ; | |
332 | return NULL ; | |
333 | } | |
334 | static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t) { | |
335 | proxy_html_conf* cfg = (proxy_html_conf*)CFG ; | |
336 | cfg->doctype = apr_pstrdup(cmd->pool, t) ; | |
337 | return NULL ; | |
338 | } | |
339 | static const command_rec proxy_html_cmds[] = { | |
340 | AP_INIT_TAKE2("ProxyHTMLURLMap", set_urlmap, NULL, OR_ALL, "Map URL From To" ) , | |
341 | AP_INIT_TAKE1("ProxyHTMLDoctype", set_doctype, NULL, OR_ALL, "Set Doctype for URL mapped documents" ) , | |
342 | { NULL } | |
343 | } ; | |
344 | static void proxy_html_hooks(apr_pool_t* p) { | |
345 | ap_register_output_filter("proxy-html", proxy_html_filter, | |
346 | proxy_html_filter_init, AP_FTYPE_RESOURCE) ; | |
347 | } | |
348 | module AP_MODULE_DECLARE_DATA proxy_html_module = { | |
349 | STANDARD20_MODULE_STUFF, | |
350 | proxy_html_config, | |
351 | proxy_html_merge, | |
352 | NULL, | |
353 | NULL, | |
354 | proxy_html_cmds, | |
355 | proxy_html_hooks | |
356 | } ; |