/*
- Copyright (c) 2003, WebThing Ltd
+ Copyright (c) 2003-4, WebThing Ltd
Author: Nick Kew <nick@webthing.com>
This program is free software; you can redistribute it and/or modify
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
+/* Note to Users
+
+ You are requested to register as a user, at
+ http://apache.webthing.com/registration.html
+
+ This entitles you to support from the developer
+ (see the webpage for details).
+ I'm unlikely to reply to help/support requests from
+ non-registered users, unless you're paying and/or offering
+ constructive feedback such as bug reports or sensible
+ suggestions for further development.
+
+ It also makes a small contribution to the effort
+ that's gone into developing this work.
+*/
/* libxml */
#include <libxml/HTMLparser.h>
module AP_MODULE_DECLARE_DATA proxy_html_module ;
-typedef struct {
+typedef struct urlmap {
struct urlmap* next ;
const char* from ;
const char* to ;
typedef struct {
urlmap* map ;
const char* doctype ;
+ const char* etag ;
+ unsigned int flags ;
} proxy_html_conf ;
typedef struct {
htmlSAXHandlerPtr sax ;
ap_filter_t* f ;
- urlmap* map ;
+ proxy_html_conf* cfg ;
htmlParserCtxtPtr parser ;
apr_bucket_brigade* bb ;
+ xmlCharEncoding enc ;
} saxctxt ;
-static void pstartDocument(void* ctxt) {
- saxctxt* ctx = (saxctxt*) ctxt ;
-
- proxy_html_conf* cfg = ap_get_module_config(ctx->f->r->per_dir_config,&proxy_html_module);
- apr_table_unset(ctx->f->r->headers_out, "Content-Length") ;
- apr_table_unset(ctx->f->r->headers_out, "ETag") ;
- ap_set_content_type(ctx->f->r, "text/html;charset=utf-8") ;
- ap_fputs(ctx->f->next, ctx->bb, cfg->doctype) ;
-}
-static void pendDocument(void* ctxt) {
- saxctxt* ctx = (saxctxt*) ctxt ;
- APR_BRIGADE_INSERT_TAIL(ctx->bb,
- apr_bucket_eos_create(ctx->bb->bucket_alloc) ) ;
- ap_pass_brigade(ctx->f->next, ctx->bb) ;
+static int is_empty_elt(const char* name) {
+ const char** p ;
+ static const char* empty_elts[] = {
+ "br" ,
+ "link" ,
+ "img" ,
+ "hr" ,
+ "input" ,
+ "meta" ,
+ "base" ,
+ "area" ,
+ "param" ,
+ "col" ,
+ "frame" ,
+ "isindex" ,
+ "basefont" ,
+ NULL
+ } ;
+ for ( p = empty_elts ; *p ; ++p )
+ if ( !strcmp( *p, name) )
+ return 1 ;
+ return 0 ;
}
+
typedef struct {
const char* name ;
const char** attrs ;
} elt_t ;
+#define NORM_LC 0x1
+#define NORM_MSSLASH 0x2
+#define NORM_RESET 0x4
+
+static char* normalise(unsigned int flags, char* str) {
+ xmlChar* p ;
+ if ( flags & NORM_LC )
+ for ( p = str ; *p ; ++p )
+ if ( isupper(*p) )
+ *p = tolower(*p) ;
+
+ if ( flags & NORM_MSSLASH )
+ for ( p = strchr(str, '\\') ; p ; p = strchr(p+1, '\\') )
+ *p = '/' ;
+
+ return str ;
+}
+
static void pstartElement(void* ctxt, const xmlChar* name,
- const xmlChar** attrs ) {
+ const xmlChar** attrs ) {
saxctxt* ctx = (saxctxt*) ctxt ;
} while ( *++linkattr ) ;
if ( is_uri ) {
urlmap* m ;
- for ( m = ctx->map ; m ; m = (urlmap*)m->next ) {
+ for ( m = ctx->cfg->map ; m ; m = m->next ) {
if ( ! strncasecmp(value, m->from, strlen(m->from) ) ) {
value = apr_pstrcat(ctx->f->r->pool, m->to, value+strlen(m->from) , NULL) ;
break ;
}
}
if ( ! value )
- ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
- else
- ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", value, "\"", NULL) ;
+ ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
+ else {
+ if ( ctx->cfg->flags != 0 )
+ value = normalise(ctx->cfg->flags,
+ apr_pstrdup(ctx->f->r->pool, value ) ) ;
+ ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"",
+ value, "\"", NULL) ;
+ }
}
}
- ap_fputc(ctx->f->next, ctx->bb, '>') ;
+ if ( is_empty_elt(name) )
+ ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag) ;
+ else
+ ap_fputc(ctx->f->next, ctx->bb, '>') ;
}
static void pendElement(void* ctxt, const xmlChar* name) {
- const char** p ;
saxctxt* ctx = (saxctxt*) ctxt ;
- static const char* empty_elts[] = {
- "br" ,
- "link" ,
- "img" ,
- "hr" ,
- "input" ,
- "meta" ,
- "base" ,
- "area" ,
- "param" ,
- "col" ,
- "frame" ,
- "isindex" ,
- "basefont" ,
- NULL
- } ;
- for ( p = empty_elts ; *p ; ++p )
- if ( !strcmp( *p, name) )
- return ;
- ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
+ if ( ! is_empty_elt(name) )
+ ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
}
#define FLUSH ap_fwrite(ctx->f->next, ctx->bb, (chars+begin), (i-begin)) ; begin = i+1
static void pcharacters(void* ctxt, const xmlChar *chars, int length) {
}
static htmlSAXHandlerPtr setupSAX(apr_pool_t* pool) {
htmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(htmlSAXHandler) ) ;
- sax->startDocument = pstartDocument ;
- sax->endDocument = pendDocument ;
+ sax->startDocument = NULL ;
+ sax->endDocument = NULL ;
sax->startElement = pstartElement ;
sax->endElement = pendElement ;
sax->characters = pcharacters ;
char* ctype ;
if ( ! in )
return 0 ;
- ctype = strdup(in) ;
+ if ( ctype = strdup(in) , ! ctype )
+ return 0 ;
for ( ptr = ctype ; *ptr; ++ptr)
if ( isupper(*ptr) )
*ptr = tolower(*ptr) ;
static int proxy_html_filter_init(ap_filter_t* f) {
saxctxt* fctx ;
- xmlCharEncoding enc
- = xmlParseCharEncoding(ctype2encoding(f->r->pool, f->r->content_type)) ;
-
/* remove content-length filter */
ap_filter_rec_t* clf = ap_get_output_filter_handle("CONTENT_LENGTH") ;
ap_filter_t* ff = f->next ;
fctx->sax = setupSAX(f->r->pool) ;
fctx->f = f ;
fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
- fctx->map = ap_get_module_config(f->r->per_dir_config,&proxy_html_module);
+ fctx->cfg = ap_get_module_config(f->r->per_dir_config,&proxy_html_module);
+
+/* Note the encoding now, before updating content-type */
+ fctx->enc = xmlParseCharEncoding
+ (ctype2encoding(f->r->pool, f->r->content_type)) ;
if ( f->r->proto_num >= 1001 ) {
if ( ! f->r->main && ! f->r->prev )
f->r->chunked = 1 ;
}
- fctx->parser = htmlCreatePushParserCtxt
- ( fctx->sax , fctx, " ", 4, 0, enc) ;
+ apr_table_unset(f->r->headers_out, "Content-Length") ;
+ apr_table_unset(f->r->headers_out, "ETag") ;
+ ap_set_content_type(f->r, "text/html;charset=utf-8") ;
+ ap_fputs(f->next, fctx->bb, fctx->cfg->doctype) ;
return OK ;
}
static saxctxt* check_filter_init (ap_filter_t* f) {
b != APR_BRIGADE_SENTINEL(bb) ;
b = APR_BUCKET_NEXT(b) ) {
if ( APR_BUCKET_IS_EOS(b) ) {
- htmlParseChunk(ctxt->parser, buf, 0, 1) ;
- htmlFreeParserCtxt(ctxt->parser) ;
+ if ( ctxt->parser != NULL ) {
+ htmlParseChunk(ctxt->parser, buf, 0, 1) ;
+ htmlFreeParserCtxt(ctxt->parser) ;
+ }
+ APR_BRIGADE_INSERT_TAIL(ctxt->bb,
+ apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
+ ap_pass_brigade(ctxt->f->next, ctxt->bb) ;
} else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
== APR_SUCCESS ) {
- htmlParseChunk(ctxt->parser, buf, bytes, 0) ;
+ if ( ctxt->parser == NULL )
+ ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
+ buf, bytes, 0, ctxt->enc) ;
+ else
+ htmlParseChunk(ctxt->parser, buf, bytes, 0) ;
} else {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ;
}
}
+ //ap_fflush(ctxt->f->next, ctxt->bb) ; // uncomment for debug
apr_brigade_destroy(bb) ;
return APR_SUCCESS ;
}
-static const char* DEFAULT_DOCTYPE =
+static const char* fpi_html =
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
+static const char* fpi_html_legacy =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
+static const char* fpi_xhtml =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
+static const char* fpi_xhtml_legacy =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
+static const char* html_etag = ">" ;
+static const char* xhtml_etag = " />" ;
+#define DEFAULT_DOCTYPE fpi_html
+#define DEFAULT_ETAG html_etag
static void* proxy_html_config(apr_pool_t* pool, char* x) {
proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ;
ret->doctype = DEFAULT_DOCTYPE ;
+ ret->etag = DEFAULT_ETAG ;
return ret ;
}
static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
proxy_html_conf* base = (proxy_html_conf*) BASE ;
proxy_html_conf* add = (proxy_html_conf*) ADD ;
proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ;
- conf->map = add->map ? add->map : base->map ;
+
if ( add->map && base->map ) {
- urlmap* newmap = add->map ;
- while ( newmap->next )
- newmap = (urlmap*)newmap->next ;
- newmap->next = (struct urlmap*) base->map ;
- }
+ urlmap* a ;
+ conf->map = NULL ;
+ for ( a = base->map ; a ; a = a->next ) {
+ urlmap* save = conf->map ;
+ conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
+ conf->map->next = save ;
+ }
+ for ( a = add->map ; a ; a = a->next ) {
+ urlmap* save = conf->map ;
+ conf->map = apr_pmemdup(pool, a, sizeof(urlmap)) ;
+ conf->map->next = save ;
+ }
+ } else
+ conf->map = add->map ? add->map : base->map ;
+
conf->doctype = ( add->doctype == DEFAULT_DOCTYPE )
? base->doctype : add->doctype ;
+ conf->etag = ( add->etag == DEFAULT_ETAG ) ? base->etag : add->etag ;
+ if ( add->flags & NORM_RESET )
+ conf->flags = add->flags ^ NORM_RESET ;
+ else
+ conf->flags = base->flags | add->flags ;
return conf ;
}
static const char* set_urlmap(cmd_parms* cmd, void* CFG,
const char* from, const char* to) {
proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
+ urlmap* oldmap = cfg->map ;
urlmap* newmap = apr_palloc(cmd->pool, sizeof(urlmap) ) ;
newmap->from = apr_pstrdup(cmd->pool, from) ;
newmap->to = apr_pstrdup(cmd->pool, to) ;
- newmap->next = (struct urlmap*) cfg->map ;
- cfg->map = newmap ;
+ newmap->next = NULL ;
+ if ( oldmap ) {
+ while ( oldmap->next )
+ oldmap = oldmap->next ;
+ oldmap->next = newmap ;
+ } else
+ cfg->map = newmap ;
return NULL ;
}
-static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t) {
+static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t,
+ const char* l) {
proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
- cfg->doctype = apr_pstrdup(cmd->pool, t) ;
+ if ( !strcasecmp(t, "xhtml") ) {
+ cfg->etag = xhtml_etag ;
+ if ( l && !strcasecmp(l, "legacy") )
+ cfg->doctype = fpi_xhtml_legacy ;
+ else
+ cfg->doctype = fpi_xhtml ;
+ } else if ( !strcasecmp(t, "html") ) {
+ cfg->etag = html_etag ;
+ if ( l && !strcasecmp(l, "legacy") )
+ cfg->doctype = fpi_html_legacy ;
+ else
+ cfg->doctype = fpi_html ;
+ } else {
+ cfg->doctype = apr_pstrdup(cmd->pool, t) ;
+ if ( l && ( ( l[0] == 'x' ) || ( l[0] == 'X' ) ) )
+ cfg->etag = xhtml_etag ;
+ }
+ return NULL ;
+}
+static void set_param(proxy_html_conf* cfg, const char* arg) {
+ if ( arg && *arg )
+ if ( !strcmp(arg, "lowercase") )
+ cfg->flags |= NORM_LC ;
+ else if ( !strcmp(arg, "dospath") )
+ cfg->flags |= NORM_MSSLASH ;
+ else if ( !strcmp(arg, "reset") )
+ cfg->flags |= NORM_RESET ;
+}
+static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg1,
+ const char* arg2, const char* arg3) {
+ set_param( (proxy_html_conf*)CFG, arg1) ;
+ set_param( (proxy_html_conf*)CFG, arg2) ;
+ set_param( (proxy_html_conf*)CFG, arg3) ;
return NULL ;
}
static const command_rec proxy_html_cmds[] = {
AP_INIT_TAKE2("ProxyHTMLURLMap", set_urlmap, NULL, OR_ALL, "Map URL From To" ) ,
- AP_INIT_TAKE1("ProxyHTMLDoctype", set_doctype, NULL, OR_ALL, "Set Doctype for URL mapped documents" ) ,
+ AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL, OR_ALL, "(HTML|XHTML) [Legacy]" ) ,
+ AP_INIT_TAKE123("ProxyHTMLFixups", set_flags, NULL, OR_ALL, "Options are lowercase, dospath" ) ,
{ NULL }
} ;
static void proxy_html_hooks(apr_pool_t* p) {
ap_register_output_filter("proxy-html", proxy_html_filter,
- proxy_html_filter_init, AP_FTYPE_RESOURCE) ;
+ NULL, AP_FTYPE_RESOURCE) ;
}
module AP_MODULE_DECLARE_DATA proxy_html_module = {
STANDARD20_MODULE_STUFF,