--- httpd-2.2.14-p/server/mpm/prefork/prefork.c 2009-02-01 07:54:55.000000000 +1100 +++ httpd-2.2.14/server/mpm/prefork/prefork.c 2009-11-02 12:09:50.511530535 +1100 @@ -48,6 +48,7 @@ #include "ap_listen.h" #include "ap_mmn.h" #include "apr_poll.h" +#include "apr_md5.h" #ifdef HAVE_BSTRING_H #include /* for IRIX, FD_SET calls bzero() */ @@ -336,6 +337,28 @@ die_now = 1; } +static int volatile client_socket = -1; + +#ifndef NO_USE_SIGACTION +static void shutdown_socket(int sig, siginfo_t *info, void *context) +#else +static void shutdown_socket(int sig) +#endif +{ +#ifndef NO_USE_SIGACTION + if (info->si_pid == getppid()) { +#endif + if (client_socket != -1) { + shutdown(client_socket, SHUT_RDWR); + } +#ifndef NO_USE_SIGACTION + } + else { + clean_child_exit(0); + } +#endif +} + /* volatile just in case */ static int volatile shutdown_pending; static int volatile restart_pending; @@ -659,8 +682,12 @@ current_conn = ap_run_create_connection(ptrans, ap_server_conf, csd, my_child_num, sbh, bucket_alloc); if (current_conn) { + apr_os_sock_get((apr_os_sock_t *)&client_socket, csd); + ap_process_connection(current_conn, csd); ap_lingering_close(current_conn); + + client_socket = -1; } /* Check the pod and the generation number after processing a @@ -733,6 +760,10 @@ } if (!pid) { +#ifndef NO_USE_SIGACTION + struct sigaction act; +#endif + #ifdef HAVE_BINDPROCESSOR /* by default AIX binds to a single processor * this bit unbinds children which will then bind to another cpu @@ -755,6 +786,19 @@ * The pod is used for signalling the graceful restart. */ apr_signal(AP_SIG_GRACEFUL, stop_listening); + + /* If the parent sends SIGINT to the child, we shutdown the + * client socket, as we suspect that we are under a DoS attack. + */ +#ifndef NO_USE_SIGACTION + memset(&act, 0, sizeof(act)); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = shutdown_socket; + sigaction(SIGINT, &act, NULL); +#else + apr_signal(SIGINT, shutdown_socket); +#endif + child_main(slot); } @@ -803,6 +847,8 @@ int free_slots[MAX_SPAWN_RATE]; int last_non_dead; int total_non_dead; + int status; + static apr_time_t maxed_out = 0; /* initialize the free_list */ free_length = 0; @@ -813,8 +859,6 @@ total_non_dead = 0; for (i = 0; i < ap_daemons_limit; ++i) { - int status; - if (i >= ap_max_daemons_limit && free_length == idle_spawn_rate) break; ws = &ap_scoreboard_image->servers[i][0]; @@ -856,12 +900,17 @@ */ ap_mpm_pod_signal(pod); idle_spawn_rate = 1; + maxed_out = 0; } else if (idle_count < ap_daemons_min_free) { /* terminate the free list */ if (free_length == 0) { /* only report this condition once */ static int reported = 0; + static unsigned char sb_digest[APR_MD5_DIGESTSIZE]; + apr_time_t now = apr_time_now(); + apr_md5_ctx_t ctx; + pid_t pid; if (!reported) { ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, @@ -870,6 +919,120 @@ reported = 1; } idle_spawn_rate = 1; + + /* If after one maintenace interval we still see the same + * situation on the scoreboard, shutdown all client sockets + * in read state and at least 10% of all client sockets. + * Crude, but seems to clear things out. + */ + if (maxed_out) { + apr_time_t diff = now - maxed_out; + + if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) { + unsigned char cur_digest[APR_MD5_DIGESTSIZE]; + + /* Current digest of the scoreboard. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + status = ap_scoreboard_image->servers[i][0].status; + apr_md5_update(&ctx, &status, sizeof(status)); + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(cur_digest, &ctx); + + /* If we haven't had a change for one maintenance + * interval, we need to make room. + */ + if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) { + maxed_out = 0; + } + else { + int rdrs = 0, cull = ap_daemons_limit / 10; + + /* Disconnect all readers (includes keep alive). + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + status = ap_scoreboard_image->servers[i][0].status; + + if (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE) { + + ap_mpm_safe_kill(pid, SIGINT); + rdrs++; + } + } + + /* Make up to 10% of all sockets, if required. + */ + for (i = 0; i < ap_daemons_limit && cull > rdrs; ++i) { + status = ap_scoreboard_image->servers[i][0].status; + + if (status != SERVER_BUSY_READ && + status != SERVER_BUSY_KEEPALIVE) { + + pid = ap_scoreboard_image->parent[i].pid; + ap_mpm_safe_kill(pid, SIGINT); + cull--; + } + } + } + } + } + else { + int rdrs = 0; + + /* Create digest of the scorboard, see if things + * change next time around. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + status = ap_scoreboard_image->servers[i][0].status; + + /* These are the conditions we are concerned with. + */ + switch (status) { + case SERVER_BUSY_READ: + case SERVER_BUSY_KEEPALIVE: + rdrs++; + case SERVER_BUSY_WRITE: + case SERVER_DEAD: + case SERVER_GRACEFUL: + break; + default: + return; + } + + apr_md5_update(&ctx, &status, sizeof(status)); + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(sb_digest, &ctx); + + /* Over 95% in read state (includes keep alive), clear now. + */ + if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) { + /* Disconnect all readers (includes keep alive). + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + status = ap_scoreboard_image->servers[i][0].status; + + if (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE) { + ap_mpm_safe_kill(pid, SIGINT); + rdrs++; + } + } + } + else { + maxed_out = now; + } + } } else { if (idle_spawn_rate >= 8) { @@ -902,10 +1065,13 @@ else if (idle_spawn_rate < MAX_SPAWN_RATE) { idle_spawn_rate *= 2; } + + maxed_out = 0; } } else { idle_spawn_rate = 1; + maxed_out = 0; } } --- httpd-2.2.14-p/server/mpm/worker/worker.c 2009-11-02 09:40:23.129750043 +1100 +++ httpd-2.2.14/server/mpm/worker/worker.c 2009-11-02 12:37:53.987529627 +1100 @@ -33,6 +33,7 @@ #define APR_WANT_STRFUNC #include "apr_want.h" #include "apr_atomic.h" +#include "apr_md5.h" #if APR_HAVE_UNISTD_H #include @@ -422,6 +423,101 @@ clean_child_exit(0); } +#if !defined(NO_USE_SIGACTION) && defined(HAVE_PTHREAD_KILL) +static void shutdown_sockets(int sig, siginfo_t *info, void *context) +{ + int csd, i, j, slot = 0, status, total_rdrs = 0, rdrs = 0, + cull = ap_daemons_limit * ap_threads_per_child / 10; + + /* not from parent, ignore */ + if (info->si_pid != getppid()) { + return; + } + + suspend_workers = 1; + apr_atomic_set32(&suspended_workers, 0); + + /* suspend worker threads */ + for (i = 0; i < ap_threads_per_child; i++) { + if (worker_os_threads[i]) { + pthread_kill(*worker_os_threads[i], WORKER_SIGNAL); + } + } + + /* wait for threads to suspend, but press ahead after a while anyway */ + for (i = 0; + apr_atomic_read32(&suspended_workers) < ap_threads_per_child && i < 25; + i++) { + apr_sleep(apr_time_from_sec(1) / 5); + } + + /* Determine total number of readers (includes keep alive), our + * slot and the number of our own readers. + */ + for (i = 0; i < ap_daemons_limit; ++i) { + if (ap_scoreboard_image->parent[i].pid == ap_my_pid) { + slot = i; + } + + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[i][j].status; + + if (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE) { + + total_rdrs++; + + if (slot == i) { + rdrs++; + } + } + } + } + + /* Disconnect all readers (includes keep alive). + */ + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[slot][j].status; + + if (worker_sockets[j] && + (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE)) { + + apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]); + shutdown(csd, SHUT_RDWR); + } + } + + /* Make up to 10% of all sockets, if required. + */ + if (total_rdrs < cull) { + cull = ((ap_threads_per_child - rdrs) * (cull - total_rdrs)) / cull; + + for (j = 0; j < ap_threads_per_child && cull > 0; j++) { + status = ap_scoreboard_image->servers[slot][j].status; + + if (worker_sockets[j] && + status != SERVER_BUSY_READ && + status != SERVER_BUSY_KEEPALIVE) { + + apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]); + shutdown(csd, SHUT_RDWR); + cull--; + } + } + } + + suspend_workers = 0; + + /* resume worker threads */ + for (i = 0; i < ap_threads_per_child; i++) { + if (worker_os_threads[i]) { + pthread_kill(*worker_os_threads[i], WORKER_SIGNAL); + } + } +} +#endif + /***************************************************************** * Connection structures and accounting... */ @@ -1319,12 +1415,28 @@ join_workers(ts->listener, threads); } else { /* !one_process */ +#if !defined(NO_USE_SIGACTION) && defined(HAVE_PTHREAD_KILL) + struct sigaction act; +#endif + /* remove SIGTERM from the set of blocked signals... if one of * the other threads in the process needs to take us down * (e.g., for MaxRequestsPerChild) it will send us SIGTERM */ unblock_signal(SIGTERM); apr_signal(SIGTERM, dummy_signal_handler); + + /* If the parent sends SIGINT to the child, we shutdown the + * client socket, as we suspect that we are under a DoS attack. + */ +#if !defined(NO_USE_SIGACTION) && defined(HAVE_PTHREAD_KILL) + unblock_signal(SIGINT); + memset(&act, 0, sizeof(act)); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = shutdown_sockets; + sigaction(SIGINT, &act, NULL); +#endif + /* Watch for any messages from the parent over the POD */ while (1) { rv = ap_mpm_pod_check(pod); @@ -1476,6 +1588,8 @@ int last_non_dead; int total_non_dead; int active_thread_count = 0; + int status = SERVER_DEAD; + static apr_time_t maxed_out = 0; /* initialize the free_list */ free_length = 0; @@ -1487,7 +1601,6 @@ for (i = 0; i < ap_daemons_limit; ++i) { /* Initialization to satisfy the compiler. It doesn't know * that ap_threads_per_child is always > 0 */ - int status = SERVER_DEAD; int any_dying_threads = 0; int any_dead_threads = 0; int all_dead_threads = 1; @@ -1581,12 +1694,17 @@ /* Kill off one child */ ap_mpm_pod_signal(pod, TRUE); idle_spawn_rate = 1; + maxed_out = 0; } else if (idle_thread_count < min_spare_threads) { /* terminate the free list */ if (free_length == 0) { /* only report this condition once */ static int reported = 0; + static unsigned char sb_digest[APR_MD5_DIGESTSIZE]; + apr_time_t now = apr_time_now(); + apr_md5_ctx_t ctx; + pid_t pid; if (!reported) { ap_log_error(APLOG_MARK, APLOG_ERR, 0, @@ -1596,6 +1714,95 @@ reported = 1; } idle_spawn_rate = 1; + + /* If after one maintenace interval we still see the same + * situation on the scoreboard, shutdown all client sockets + * in read state and at least 10% of all client sockets. + * Crude, but seems to clear things out. + */ + if (maxed_out) { + apr_time_t diff = now - maxed_out; + + if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) { + unsigned char cur_digest[APR_MD5_DIGESTSIZE]; + + /* Current digest of the scoreboard. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[i][j].status; + apr_md5_update(&ctx, &status, sizeof(status)); + } + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(cur_digest, &ctx); + + /* If we haven't had a change for one maintenance + * interval, we need to make room. + */ + if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) { + maxed_out = 0; + } + else { + /* Signal child processes to shutdown client sockets. + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + ap_mpm_safe_kill(pid, SIGINT); + } + } + } + } + else { + int rdrs = 0; + + /* Create digest of the scoreboard, see if things + * change next time around. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[i][j].status; + + /* These are conditions we are concerned with. + */ + switch (status) { + case SERVER_BUSY_READ: + case SERVER_BUSY_KEEPALIVE: + rdrs++; + case SERVER_BUSY_WRITE: + case SERVER_DEAD: + case SERVER_GRACEFUL: + break; + default: + return; + } + + apr_md5_update(&ctx, &status, sizeof(status)); + } + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(sb_digest, &ctx); + + /* Over 95% in read state (includes keep alive), clear now. + */ + if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) { + /* Signal child processes to shutdown client sockets. + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + ap_mpm_safe_kill(pid, SIGINT); + } + } + else { + maxed_out = now; + } + } } else { if (free_length > idle_spawn_rate) { @@ -1623,10 +1830,13 @@ else if (idle_spawn_rate < MAX_SPAWN_RATE) { idle_spawn_rate *= 2; } + + maxed_out = 0; } } else { idle_spawn_rate = 1; + maxed_out = 0; } }