]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-ipvs-1.0.7-2.2.19.patch
run depmod before geninitrd and rc-boot
[packages/kernel.git] / kernel-ipvs-1.0.7-2.2.19.patch
1 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/Documentation/Configure.help linux-2.2.19-vs-1.0.7/Documentation/Configure.help
2 --- linux-2.2.19/Documentation/Configure.help   Tue Mar 27 09:33:35 2001
3 +++ linux-2.2.19-vs-1.0.7/Documentation/Configure.help  Tue Mar 27 09:32:02 2001
4 @@ -2807,6 +2807,118 @@
5    The module will be called ip_masq_markfw.o. If you want to compile
6    it as a module, say M here and read Documentation/modules.txt.
7  
8 +IP: masquerading virtual server support
9 +CONFIG_IP_MASQUERADE_VS
10 +  IP Virtual Server support will let you build a virtual server
11 +  based on cluster of two or more real servers. This option must
12 +  be enabled for at least one of the clustered computers that will
13 +  take care of intercepting incomming connections to a single IP
14 +  address and scheduling them to real servers.
15 +
16 +  Three request dispatching techniques are implemented, they are
17 +  virtual server via NAT, virtual server via tunneling and virtual
18 +  server via direct routing. The round-robin scheduling, the weighted
19 +  round-robin secheduling, the weighted least-connection scheduling,
20 +  the locality-based least-connection scheduling, or the
21 +  locality-based least-connection with replication scheduling
22 +  algorithm can be used to choose which server the connection is
23 +  directed to, thus load balancing can be achieved among the servers.
24 +  For more information and its administration program, please visit
25 +  the following URL:
26 +
27 +       http://www.linuxvirtualserver.org/
28 +  If you want this, say Y.
29 +
30 +IP virtual server debugging
31 +CONFIG_IP_VS_DEBUG
32 +  Say Y here if you want to get additional messages useful in
33 +  debugging the IP virtual server code. You can change the debug
34 +  level in /proc/sys/net/ipv4/vs/debug_level
35 +
36 +IP masquerading VS table size (the Nth power of 2)
37 +CONFIG_IP_MASQUERADE_VS_TAB_BITS
38 +  Using a big ipvs hash table for virtual server will greatly reduce
39 +  conflicts in the ipvs hash table when there are hundreds of thousands
40 +  of active connections.
41 +
42 +  Note the table size must be power of 2. The table size will be the
43 +  value of 2 to the your input number power. For example, the default
44 +  number is 12, so the table size is 4096. Don't input the number too
45 +  small, otherwise you will lose performance on it. You can adapt the
46 +  table size yourself, according to your virtual server application. It
47 +  is good to set the table size not far less than the number of
48 +  connections per second multiplying average lasting time of connection
49 +  in the table.  For example, your virtual server gets 200 connections
50 +  per second, the connection lasts for 200 seconds in average in the
51 +  masquerading table, the table size should be not far less than
52 +  200x200, it is good to set the table size 32768 (2**15).
53 +
54 +  Another note that each connection occupies 128 bytes effectively and
55 +  each hash entry uses 8 bytes, so you can estimate how much memory is
56 +  needed for your box.
57 +
58 +IPVS: round-robin scheduling
59 +CONFIG_IP_MASQUERADE_VS_RR
60 +  The robin-robin scheduling algorithm simply directs network
61 +  connections to different real servers in a round-robin manner.
62 +  If you want to compile it in kernel, say Y. If you want to compile
63 +  it as a module, say M here and read Documentation/modules.txt.
64 +
65 +IPVS: weighted round-robin scheduling
66 +CONFIG_IP_MASQUERADE_VS_WRR
67 +  The weighted robin-robin scheduling algorithm directs network
68 +  connections to different real servers based on server weights
69 +  in a round-robin manner. Servers with higher weights receive
70 +  new connections first than those with less weights, and servers
71 +  with higher weights get more connections than those with less
72 +  weights and servers with equal weights get equal connections.
73 +  If you want to compile it in kernel, say Y. If you want to compile
74 +  it as a module, say M here and read Documentation/modules.txt.
75 +
76 +IPVS: least-connection scheduling
77 +CONFIG_IP_MASQUERADE_VS_LC
78 +  The least-connection scheduling algorithm directs network
79 +  connections to the server with the least number of active 
80 +  connections.
81 +  If you want to compile it in kernel, say Y. If you want to compile
82 +  it as a module, say M here and read Documentation/modules.txt.
83 +
84 +IPVS: weighted least-connection scheduling
85 +CONFIG_IP_MASQUERADE_VS_WLC
86 +  The weighted least-connection scheduling algorithm directs network
87 +  connections to the server with the least active connections
88 +  normalized by the server weight.
89 +  If you want to compile it in kernel, say Y. If you want to compile
90 +  it as a module, say M here and read Documentation/modules.txt.
91 +
92 +IPVS: locality-based least-connection scheduling
93 +CONFIG_IP_MASQUERADE_VS_LBLC
94 +  The locality-based least-connection scheduling algorithm is for
95 +  destination IP load balancing. It is usually used in cache cluster.
96 +  This algorithm usually directs packet destined for an IP address to
97 +  its server if the server is alive and under load. If the server is
98 +  overloaded (its active connection numbers is larger than its weight)
99 +  and there is a server in its half load, then allocate the weighted
100 +  least-connection server to this IP address.
101 +  If you want to compile it in kernel, say Y. If you want to compile
102 +  it as a module, say M here and read Documentation/modules.txt.
103 +
104 +IPVS: locality-based least-connection with replication scheduling
105 +CONFIG_IP_MASQUERADE_VS_LBLCR
106 +  The locality-based least-connection with replication scheduling
107 +  algorithm is also for destination IP load balancing. It is 
108 +  usually used in cache cluster. It differs from the LBLC scheduling
109 +  as follows: the load balancer maintains mappings from a target
110 +  to a set of server nodes that can serve the target. Requests for
111 +  a target are assigned to the least-connection node in the target's
112 +  server set. If all the node in the server set are over loaded,
113 +  it picks up a least-connection node in the cluster and adds it
114 +  in the sever set for the target. If the server set has not been
115 +  modified for the specified time, the most loaded node is removed
116 +  from the server set, in order to avoid high degree of replication.
117 +  If you want to compile it in kernel, say Y. If you want to compile
118 +  it as a module, say M here and read Documentation/modules.txt.
119 +
120  IP: aliasing support
121  CONFIG_IP_ALIAS
122    Sometimes it is useful to give several IP addresses to a single
123 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/include/linux/ip_masq.h linux-2.2.19-vs-1.0.7/include/linux/ip_masq.h
124 --- linux-2.2.19/include/linux/ip_masq.h        Sat Oct 23 17:02:32 1999
125 +++ linux-2.2.19-vs-1.0.7/include/linux/ip_masq.h       Tue Dec 12 19:17:27 2000
126 @@ -103,6 +103,27 @@
127  
128  #define IP_MASQ_MFW_SCHED      0x01
129  
130 +/* 
131 + *     Virtual server stuff 
132 + */
133 +struct ip_vs_user {
134 +       /* virtual service options */
135 +       u_int16_t       protocol;
136 +       u_int32_t       vaddr;          /* virtual address */
137 +       u_int16_t       vport;
138 +        u_int32_t       vfwmark;        /* firwall mark of virtual */
139 +       unsigned        vs_flags;       /* virtual service flags */
140 +       unsigned        timeout;        /* persistent timeout in ticks */
141 +       u_int32_t       netmask;        /* persistent netmask */
142 +        
143 +       /* destination specific options */
144 +       u_int32_t       daddr;          /* real destination address */
145 +       u_int16_t       dport;
146 +       unsigned        masq_flags;     /* destination flags */
147 +       int             weight;         /* destination weight */
148 +};
149 +
150 +
151  #define IP_FW_MASQCTL_MAX 256
152  #define IP_MASQ_TNAME_MAX  32
153  
154 @@ -115,6 +136,7 @@
155                 struct ip_autofw_user autofw_user;
156                 struct ip_mfw_user mfw_user;
157                 struct ip_masq_user user;
158 +               struct ip_vs_user vs_user;
159                 unsigned char m_raw[IP_FW_MASQCTL_MAX];
160         } u;
161  };
162 @@ -124,7 +146,9 @@
163  #define IP_MASQ_TARGET_CORE    1
164  #define IP_MASQ_TARGET_MOD     2       /* masq_mod is selected by "name" */
165  #define IP_MASQ_TARGET_USER    3       
166 -#define IP_MASQ_TARGET_LAST    4
167 +#define IP_MASQ_TARGET_VS      4
168 +#define IP_MASQ_TARGET_LAST    5
169 +
170  
171  #define IP_MASQ_CMD_NONE       0       /* just peek */
172  #define IP_MASQ_CMD_INSERT     1
173 @@ -136,5 +160,9 @@
174  #define IP_MASQ_CMD_LIST       7       /* actually fake: done via /proc */
175  #define IP_MASQ_CMD_ENABLE     8
176  #define IP_MASQ_CMD_DISABLE    9
177 +#define IP_MASQ_CMD_ADD_DEST   10      /* for adding dest in IPVS */
178 +#define IP_MASQ_CMD_DEL_DEST   11      /* for deleting dest in IPVS */
179 +#define IP_MASQ_CMD_SET_DEST   12      /* for setting dest in IPVS */
180  
181  #endif /* _LINUX_IP_MASQ_H */
182 +
183 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/include/linux/sysctl.h linux-2.2.19-vs-1.0.7/include/linux/sysctl.h
184 --- linux-2.2.19/include/linux/sysctl.h Tue Mar 27 09:33:48 2001
185 +++ linux-2.2.19-vs-1.0.7/include/linux/sysctl.h        Tue Mar 27 09:32:20 2001
186 @@ -196,6 +196,7 @@
187         NET_IPV4_NEIGH=17,
188         NET_IPV4_ROUTE=18,
189         NET_IPV4_FIB_HASH=19,
190 +       NET_IPV4_VS=20,
191  
192         NET_IPV4_TCP_TIMESTAMPS=33,
193         NET_IPV4_TCP_WINDOW_SCALING=34,
194 @@ -275,6 +276,32 @@
195         NET_IPV4_CONF_LOG_MARTIANS=11,
196         NET_IPV4_CONF_HIDDEN=12,
197         NET_IPV4_CONF_ARPFILTER=13
198 +};
199 +
200 +/* /proc/sys/net/ipv4/vs */
201 +
202 +enum
203 +{
204 +       NET_IPV4_VS_AMEMTHRESH=1,
205 +       NET_IPV4_VS_AMDROPRATE=2,
206 +       NET_IPV4_VS_DROP_ENTRY=3,
207 +       NET_IPV4_VS_DROP_PACKET=4,
208 +       NET_IPV4_VS_SECURE_TCP=5,
209 +       NET_IPV4_VS_TO_ES=6,
210 +       NET_IPV4_VS_TO_SS=7,
211 +       NET_IPV4_VS_TO_SR=8,
212 +       NET_IPV4_VS_TO_FW=9,
213 +       NET_IPV4_VS_TO_TW=10,
214 +       NET_IPV4_VS_TO_CL=11,
215 +       NET_IPV4_VS_TO_CW=12,
216 +       NET_IPV4_VS_TO_LA=13,
217 +       NET_IPV4_VS_TO_LI=14,
218 +       NET_IPV4_VS_TO_SA=15,
219 +       NET_IPV4_VS_TO_UDP=16,
220 +       NET_IPV4_VS_TO_ICMP=17,
221 +       NET_IPV4_VS_DEBUG_LEVEL=18,
222 +        NET_IPV4_VS_LBLC_EXPIRE=19,
223 +        NET_IPV4_VS_LBLCR_EXPIRE=20,
224  };
225  
226  /* /proc/sys/net/ipv6 */
227 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/include/net/ip.h linux-2.2.19-vs-1.0.7/include/net/ip.h
228 --- linux-2.2.19/include/net/ip.h       Tue Mar 27 09:33:48 2001
229 +++ linux-2.2.19-vs-1.0.7/include/net/ip.h      Tue Mar 27 17:48:23 2001
230 @@ -47,6 +47,9 @@
231  #define IPSKB_MASQUERADED      1
232  #define IPSKB_TRANSLATED       2
233  #define IPSKB_FORWARDED                4
234 +#ifdef CONFIG_IP_MASQUERADE_VS
235 +#define IPSKB_REDIRECTED        8
236 +#endif
237  };
238  
239  struct ipcm_cookie
240 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/include/net/ip_masq.h linux-2.2.19-vs-1.0.7/include/net/ip_masq.h
241 --- linux-2.2.19/include/net/ip_masq.h  Tue Mar 27 09:33:48 2001
242 +++ linux-2.2.19-vs-1.0.7/include/net/ip_masq.h Wed Apr 18 16:17:59 2001
243 @@ -12,8 +12,15 @@
244  #include <linux/ip.h>
245  #include <linux/skbuff.h>
246  #include <linux/list.h>
247 +#ifdef CONFIG_SYSCTL
248 +#include <linux/sysctl.h>
249 +#endif
250  #endif /* __KERNEL__ */
251  
252 +#ifdef CONFIG_IP_MASQUERADE_VS
253 +struct ip_vs_dest;
254 +#endif
255 +
256  /*
257   * This define affects the number of ports that can be handled
258   * by each of the protocol helper modules.
259 @@ -66,10 +73,6 @@
260  #define IP_MASQ_MOD_CTL                        0x00
261  #define IP_MASQ_USER_CTL               0x01
262  
263 -#ifdef __KERNEL__
264 -
265 -#define IP_MASQ_TAB_SIZE       256
266 -
267  #define IP_MASQ_F_NO_DADDR           0x0001    /* no daddr yet */
268  #define IP_MASQ_F_NO_DPORT                   0x0002    /* no dport set yet */
269  #define IP_MASQ_F_NO_SADDR           0x0004    /* no sport set yet */
270 @@ -86,6 +89,22 @@
271  #define IP_MASQ_F_USER               0x2000    /* from uspace */
272  #define IP_MASQ_F_SIMPLE_HASH        0x8000    /* prevent s+d and m+d hashing */
273  
274 +#ifdef CONFIG_IP_MASQUERADE_VS
275 +#define IP_MASQ_F_VS             0x00010000    /* virtual server related */
276 +#define IP_MASQ_F_VS_NO_OUTPUT    0x00020000   /* output packets avoid masq */
277 +#define IP_MASQ_F_VS_INACTIVE     0x00040000    /* not established */
278 +#define IP_MASQ_F_VS_FWD_MASK    0x00700000    /* mask for the fdw method */
279 +#define IP_MASQ_F_VS_LOCALNODE   0x00100000    /* local node destination */
280 +#define IP_MASQ_F_VS_TUNNEL      0x00200000    /* packets will be tunneled */
281 +#define IP_MASQ_F_VS_DROUTE      0x00400000    /* direct routing */
282 +                                                /* masquerading otherwise */
283 +#define IP_MASQ_VS_FWD(ms) (ms->flags & IP_MASQ_F_VS_FWD_MASK)
284 +#endif /* CONFIG_IP_MASQUERADE_VS */
285 +
286 +#ifdef __KERNEL__
287 +
288 +#define IP_MASQ_TAB_SIZE       256
289 +
290  /*
291   *     Delta seq. info structure
292   *     Each MASQ struct has 2 (output AND input seq. changes).
293 @@ -114,9 +133,13 @@
294         struct ip_masq  *control;       /* Master control connection */
295         atomic_t        n_control;      /* Number of "controlled" masqs */
296         unsigned        flags;          /* status flags */
297 -       unsigned        timeout;        /* timeout */
298 +       unsigned long   timeout;        /* timeout */
299         unsigned        state;          /* state info */
300         struct ip_masq_timeout_table *timeout_table;
301 +#ifdef CONFIG_IP_MASQUERADE_VS
302 +       struct ip_vs_dest *dest;        /* real server */
303 +        atomic_t in_pkts;               /* incoming packet counter */
304 +#endif /* CONFIG_IP_MASQUERADE_VS */
305  };
306  
307  /*
308 @@ -179,7 +202,7 @@
309  extern struct list_head ip_masq_d_table[IP_MASQ_TAB_SIZE];
310  extern const char * ip_masq_state_name(int state);
311  extern struct ip_masq_hook *ip_masq_user_hook;
312 -extern u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope);
313 +extern int ip_masq_select_addr(struct sk_buff *skb,__u32 *maddr);
314  /*
315   *     
316   *     IP_MASQ_APP: IP application masquerading definitions 
317 @@ -354,6 +377,10 @@
318         static const char *strProt[] = {"UDP","TCP","ICMP"};
319         int msproto = masq_proto_num(proto);
320  
321 +#ifdef CONFIG_IP_MASQUERADE_VS
322 +        if (proto == IPPROTO_IP)
323 +                return "IP ";
324 +#endif /* CONFIG_IP_MASQUERADE_VS */
325         if (msproto<0||msproto>2)  {
326                 sprintf(buf, "IP_%d", proto);
327                 return buf;
328 @@ -372,6 +399,9 @@
329         IP_MASQ_S_CLOSE_WAIT,
330         IP_MASQ_S_LAST_ACK,
331         IP_MASQ_S_LISTEN,
332 +#ifdef CONFIG_IP_MASQUERADE_VS
333 +       IP_MASQ_S_SYNACK,
334 +#endif
335         IP_MASQ_S_UDP,
336         IP_MASQ_S_ICMP,
337         IP_MASQ_S_LAST
338 @@ -395,8 +425,33 @@
339  
340         if (!mstim)
341                 return;
342 +       ms->timeout_table = NULL;
343         atomic_dec(&mstim->refcnt);
344  }
345 +
346 +#ifdef CONFIG_IP_MASQUERADE_VS
347 +
348 +extern struct ip_masq_timeout_table masq_timeout_table_dos;
349 +extern void ip_masq_secure_tcp_set(int on);
350 +
351 +/*
352 + *     This is a simple mechanism to ignore packets when
353 + *     we are loaded. Just set ip_masq_drop_rate to 'n' and
354 + *     we start to drop 1/n of the packets
355 + */
356 +
357 +extern int ip_masq_drop_rate;
358 +extern int ip_masq_drop_counter;
359 +
360 +static __inline__ int ip_masq_todrop(void)
361 +{
362 +       if (!ip_masq_drop_rate) return 0;
363 +       if (--ip_masq_drop_counter > 0) return 0;
364 +       ip_masq_drop_counter = ip_masq_drop_rate;
365 +       return 1;
366 +}
367 +
368 +#endif /* CONFIG_IP_MASQUERADE_VS */
369  
370  #endif /* __KERNEL__ */
371  
372 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/include/net/ip_vs.h linux-2.2.19-vs-1.0.7/include/net/ip_vs.h
373 --- linux-2.2.19/include/net/ip_vs.h    Thu Jan  1 08:00:00 1970
374 +++ linux-2.2.19-vs-1.0.7/include/net/ip_vs.h   Thu Apr 19 22:33:09 2001
375 @@ -0,0 +1,392 @@
376 +/*
377 + *      IP virtual server
378 + *      data structure and functionality definitions
379 + */
380 +
381 +#include <linux/config.h>
382 +
383 +#ifndef _IP_VS_H
384 +#define _IP_VS_H
385 +
386 +#define IP_VS_VERSION_CODE            0x010007
387 +#define NVERSION(version)                       \
388 +       (version >> 16) & 0xFF,                 \
389 +       (version >> 8) & 0xFF,                  \
390 +       version & 0xFF
391 +
392 +/*
393 + *     Virtual Service Flags
394 + */
395 +#define IP_VS_SVC_F_PERSISTENT        0x0001    /* persistent port */
396 +#define IP_VS_SVC_F_HASHED            0x0002    /* hashed entry */
397 +
398 +/*
399 + *     Destination Server Flags
400 + */
401 +#define IP_VS_DEST_F_AVAILABLE        0x0001    /* Available tag */
402 +
403 +/*
404 + * The default IP_VS_TEMPLATE_TIMEOUT is a little larger than average
405 + * connection time plus MASQUERADE_EXPIRE_TCP_FIN(2*60*HZ). Because the
406 + * template won't be released until its controlled masq entries are
407 + * expired.
408 + * If IP_VS_TEMPLATE_TIMEOUT is too less, the template will soon expire
409 + * and will be put in expire again and again, which requires additional
410 + * overhead. If it is too large, the same will always visit the same
411 + * server, which will make dynamic load imbalance worse.
412 + */
413 +#define IP_VS_TEMPLATE_TIMEOUT  6*60*HZ
414 +
415 +#ifdef __KERNEL__
416 +
417 +extern int ip_vs_forwarding_related_icmp(struct sk_buff *skb);
418 +
419 +#ifdef CONFIG_IP_VS_DEBUG
420 +extern int ip_vs_get_debug_level(void);
421 +#define IP_VS_DBG(level, msg...)                        \
422 +    do {                                                \
423 +           if (level <= ip_vs_get_debug_level())       \
424 +                   printk(KERN_DEBUG "IPVS: " ## msg); \
425 +    } while (0)
426 +#else  /* NO DEBUGGING at ALL */
427 +#define IP_VS_DBG(level, msg...)  do {} while (0)
428 +#endif
429 +
430 +#define IP_VS_ERR(msg...) printk(KERN_ERR "IPVS: " ## msg )
431 +#define IP_VS_INFO(msg...) printk(KERN_INFO "IPVS: " ## msg )
432 +#define IP_VS_WARNING(msg...) \
433 +       printk(KERN_WARNING "IPVS: " ## msg)
434 +
435 +#ifdef CONFIG_IP_VS_DEBUG
436 +#define EnterFunction(level)                                            \
437 +    do {                                                                \
438 +           if (level <= ip_vs_get_debug_level())                       \
439 +                   printk(KERN_DEBUG "Enter: %s, %s line %i\n",        \
440 +                          __FUNCTION__, __FILE__, __LINE__);           \
441 +    } while (0)
442 +#define LeaveFunction(level)                                            \
443 +    do {                                                                \
444 +           if (level <= ip_vs_get_debug_level())                       \
445 +                       printk(KERN_DEBUG "Leave: %s, %s line %i\n",    \
446 +                              __FUNCTION__, __FILE__, __LINE__);       \
447 +    } while (0)
448 +#else
449 +#define EnterFunction(level)   do {} while (0)
450 +#define LeaveFunction(level)   do {} while (0)
451 +#endif
452 +
453 +
454 +/*
455 + *     IPVS statistics object
456 + */
457 +struct ip_vs_stats
458 +{
459 +       spinlock_t              lock;           /* spin lock */
460 +       __u32                   conns;          /* connections scheduled */
461 +       __u32                   inpkts;         /* incoming packets */
462 +       __u32                   outpkts;        /* outgoing packets */
463 +       __u64                   inbytes;        /* incoming bytes */
464 +       __u64                   outbytes;       /* outgoing bytes */
465 +};
466 +
467 +
468 +/*
469 + *     The real server destination forwarding entry
470 + *     with ip address, port
471 + */
472 +struct ip_vs_dest {
473 +       struct list_head        n_list;   /* for the dests in the service */
474 +       struct list_head        d_list;   /* for table with all the dests */
475 +
476 +       __u32                   addr;     /* IP address of real server */
477 +       __u16                   port;     /* port number of the service */
478 +       unsigned                flags;    /* dest status flags */
479 +       unsigned                masq_flags;     /* flags to copy to masq */
480 +       atomic_t                activeconns;    /* active connections */
481 +       atomic_t                inactconns;     /* inactive connections */
482 +       atomic_t                refcnt;         /* reference counter */
483 +       int                     weight;         /* server weight */
484 +       struct ip_vs_stats      stats;          /* statistics */
485 +
486 +       /* for virtual service */
487 +       struct ip_vs_service    *svc;     /* service that it belongs to */
488 +       __u16                   protocol; /* which protocol (TCP/UDP) */
489 +       __u32                   vaddr;    /* IP address for virtual service */
490 +       __u16                   vport;    /* port number for the service */
491 +       __u32                   vfwmark;  /* firewall mark of the service */
492 +};
493 +
494 +
495 +/*
496 + *     The scheduler object
497 + */
498 +struct ip_vs_scheduler {
499 +       struct list_head        n_list;   /* d-linked list head */
500 +       char                    *name;    /* scheduler name */
501 +       atomic_t                refcnt;   /* reference counter */
502 +
503 +       /* scheduler initializing service */
504 +       int (*init_service)(struct ip_vs_service *svc);
505 +       /* scheduling service finish */
506 +       int (*done_service)(struct ip_vs_service *svc);
507 +       /* scheduler updating service */
508 +       int (*update_service)(struct ip_vs_service *svc);
509 +
510 +       /* selecting a server from the given service */
511 +       struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
512 +                                      struct iphdr *iph);
513 +};
514 +
515 +
516 +/*
517 + *     The information about the virtual service offered to the net
518 + *     and the forwarding entries
519 + */
520 +struct ip_vs_service {
521 +       struct list_head        s_list;   /* hashed d-linked list head */
522 +       struct list_head        f_list;   /* hashed d-linked list head */
523 +       __u16                   protocol; /* which protocol (TCP/UDP) */
524 +       __u32                   addr;     /* IP address for virtual service */
525 +       __u16                   port;     /* port number for the service */
526 +       __u32                   fwmark;   /* firewall mark of the service */
527 +       unsigned                flags;    /* service status flags */
528 +       unsigned                timeout;  /* persistent timeout in ticks */
529 +       __u32                   netmask;  /* grouping granularity */
530 +       struct list_head        destinations;  /* real server d-linked list */
531 +       struct ip_vs_scheduler  *scheduler;    /* bound scheduler object */
532 +       void                    *sched_data;   /* scheduler application data */
533 +       struct ip_vs_stats      stats;         /* statistics for the service */
534 +};
535 +
536 +
537 +/*
538 + *     IP Virtual Server masq entry hash table
539 + */
540 +#define IP_VS_TAB_BITS CONFIG_IP_MASQUERADE_VS_TAB_BITS
541 +#define IP_VS_TAB_SIZE  (1 << IP_VS_TAB_BITS)
542 +#define IP_VS_TAB_MASK  (IP_VS_TAB_SIZE - 1)
543 +extern struct list_head *ip_vs_table;
544 +
545 +/*
546 + *     Hash and unhash functions
547 + */
548 +extern int ip_vs_hash(struct ip_masq *ms);
549 +extern int ip_vs_unhash(struct ip_masq *ms);
550 +
551 +/*
552 + *      Registering/unregistering scheduler functions
553 + */
554 +extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
555 +extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
556 +
557 +/*
558 + *      Lookup functions for the hash table (caller must lock table)
559 + */
560 +extern struct ip_masq * __ip_vs_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
561 +extern struct ip_masq * __ip_vs_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
562 +
563 +/*
564 + *      Creating a masquerading entry for IPVS
565 + */
566 +extern struct ip_masq * ip_masq_new_vs(int proto, __u32 maddr, __u16 mport, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned flags);
567 +
568 +/*
569 + *      IPVS data and functions
570 + */
571 +extern rwlock_t __ip_vs_lock;
572 +
573 +extern void ip_vs_set_state(struct ip_masq *ms, int new_state);
574 +extern void ip_vs_bind_masq(struct ip_masq *ms, struct ip_vs_dest *dest);
575 +extern void ip_vs_unbind_masq(struct ip_masq *ms);
576 +
577 +extern int ip_vs_ctl(int optname, struct ip_masq_ctl *mctl, int optlen);
578 +extern struct ip_vs_service *
579 +ip_vs_lookup_service(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport);
580 +extern struct ip_vs_service * ip_vs_lookup_svc_fwm(__u32 fwmark);
581 +extern struct ip_vs_dest *
582 +__ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport);
583 +extern struct ip_vs_dest *ip_vs_lookup_dest(struct ip_vs_service *svc,
584 +                                           __u32 daddr, __u16 dport);
585 +extern struct ip_masq * ip_vs_schedule(struct ip_vs_service *svc,
586 +                                      struct iphdr *iph);
587 +extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb);
588 +extern int ip_vs_tunnel_xmit(struct sk_buff *skb, __u32 daddr);
589 +extern int ip_vs_dr_xmit(struct sk_buff *skb, __u32 daddr);
590 +
591 +/*
592 + *      init function
593 + */
594 +extern int ip_vs_init(void);
595 +
596 +/*
597 + *     init function prototypes for scheduling modules
598 + *      these function will be called when they are built in kernel
599 + */
600 +extern int ip_vs_rr_init(void);
601 +extern int ip_vs_wrr_init(void);
602 +extern int ip_vs_lc_init(void);
603 +extern int ip_vs_wlc_init(void);
604 +extern int ip_vs_lblc_init(void);
605 +extern int ip_vs_lblcr_init(void);
606 +
607 +
608 +/*
609 + *      Slow timer functions for IPVS
610 + */
611 +extern void add_sltimer(struct timer_list * timer);
612 +extern int  del_sltimer(struct timer_list * timer);
613 +extern void mod_sltimer(struct timer_list *timer, unsigned long expires);
614 +
615 +
616 +/*
617 + *     IP Virtual Server statistics
618 + */
619 +extern struct ip_vs_stats ip_vs_stats;
620 +
621 +extern __inline__ void
622 +ip_vs_in_stats(struct ip_masq *ms, struct sk_buff *skb)
623 +{
624 +       struct ip_vs_dest *dest = ms->dest;
625 +       read_lock(&__ip_vs_lock);
626 +       if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
627 +               spin_lock(&dest->stats.lock);
628 +               dest->stats.inpkts++;
629 +               dest->stats.inbytes += skb->len;
630 +               spin_unlock(&dest->stats.lock);
631 +
632 +               spin_lock(&dest->svc->stats.lock);
633 +               dest->svc->stats.inpkts++;
634 +               dest->svc->stats.inbytes += skb->len;
635 +               spin_unlock(&dest->svc->stats.lock);
636 +
637 +               spin_lock(&ip_vs_stats.lock);
638 +               ip_vs_stats.inpkts++;
639 +               ip_vs_stats.inbytes += skb->len;
640 +               spin_unlock(&ip_vs_stats.lock);
641 +       }
642 +       read_unlock(&__ip_vs_lock);
643 +}
644 +
645 +
646 +extern __inline__ void
647 +ip_vs_out_stats(struct ip_masq *ms, struct sk_buff *skb)
648 +{
649 +       struct ip_vs_dest *dest = ms->dest;
650 +       read_lock(&__ip_vs_lock);
651 +       if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
652 +               spin_lock(&dest->stats.lock);
653 +               dest->stats.outpkts++;
654 +               dest->stats.outbytes += skb->len;
655 +               spin_unlock(&dest->stats.lock);
656 +
657 +               spin_lock(&dest->svc->stats.lock);
658 +               dest->svc->stats.outpkts++;
659 +               dest->svc->stats.outbytes += skb->len;
660 +               spin_unlock(&dest->svc->stats.lock);
661 +
662 +               spin_lock(&ip_vs_stats.lock);
663 +               ip_vs_stats.outpkts++;
664 +               ip_vs_stats.outbytes += skb->len;
665 +               spin_unlock(&ip_vs_stats.lock);
666 +       }
667 +       read_unlock(&__ip_vs_lock);
668 +}
669 +
670 +
671 +extern __inline__ void
672 +ip_vs_conn_stats(struct ip_masq *ms, struct ip_vs_service *svc)
673 +{
674 +       spin_lock(&ms->dest->stats.lock);
675 +       ms->dest->stats.conns++;
676 +       spin_unlock(&ms->dest->stats.lock);
677 +
678 +       spin_lock(&svc->stats.lock);
679 +       svc->stats.conns++;
680 +       spin_unlock(&svc->stats.lock);
681 +
682 +       spin_lock(&ip_vs_stats.lock);
683 +       ip_vs_stats.conns++;
684 +       spin_unlock(&ip_vs_stats.lock);
685 +}
686 +
687 +
688 +/*
689 + *      ip_vs_fwd_tag returns the forwarding tag of the masq
690 + */
691 +extern __inline__ char ip_vs_fwd_tag(struct ip_masq *ms)
692 +{
693 +       char fwd = 'M';
694 +
695 +       switch (IP_MASQ_VS_FWD(ms)) {
696 +       case IP_MASQ_F_VS_LOCALNODE: fwd = 'L'; break;
697 +       case IP_MASQ_F_VS_TUNNEL: fwd = 'T'; break;
698 +       case IP_MASQ_F_VS_DROUTE: fwd = 'R'; break;
699 +       }
700 +       return fwd;
701 +}
702 +
703 +
704 +extern __inline__ char * ip_vs_fwd_name(unsigned masq_flags)
705 +{
706 +       char *fwd;
707 +
708 +       switch (masq_flags & IP_MASQ_F_VS_FWD_MASK) {
709 +       case IP_MASQ_F_VS_LOCALNODE:
710 +               fwd = "Local";
711 +               break;
712 +       case IP_MASQ_F_VS_TUNNEL:
713 +               fwd = "Tunnel";
714 +               break;
715 +       case IP_MASQ_F_VS_DROUTE:
716 +               fwd = "Route";
717 +               break;
718 +       default:
719 +               fwd = "Masq";
720 +       }
721 +       return fwd;
722 +}
723 +
724 +
725 +/*
726 + *      ip_vs_forward forwards the packet through tunneling, direct
727 + *      routing or local node (passing to the upper layer).
728 + *      Return values mean:
729 + *          0    skb must be passed to the upper layer
730 + *         -1    skb must be released
731 + *         -2    skb has been released
732 + */
733 +extern __inline__ int ip_vs_forward(struct sk_buff *skb, struct ip_masq *ms)
734 +{
735 +       int ret = -1;
736 +
737 +       atomic_inc(&ms->in_pkts);
738 +
739 +       switch (IP_MASQ_VS_FWD(ms)) {
740 +       case IP_MASQ_F_VS_TUNNEL:
741 +               if (ip_vs_tunnel_xmit(skb, ms->saddr) == 0) {
742 +                       IP_VS_DBG(10, "tunneling failed.\n");
743 +               } else {
744 +                       IP_VS_DBG(10, "tunneling succeeded.\n");
745 +               }
746 +               ret = -2;
747 +               break;
748 +
749 +       case IP_MASQ_F_VS_DROUTE:
750 +               if (ip_vs_dr_xmit(skb, ms->saddr) == 0) {
751 +                       IP_VS_DBG(10, "direct routing failed.\n");
752 +               } else {
753 +                       IP_VS_DBG(10, "direct routing succeeded.\n");
754 +               }
755 +               ret = -2;
756 +               break;
757 +
758 +       case IP_MASQ_F_VS_LOCALNODE:
759 +               ret = 0;
760 +       }
761 +
762 +       return ret;
763 +}
764 +
765 +#endif /* __KERNEL__ */
766 +
767 +#endif /* _IP_VS_H */
768 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/Config.in linux-2.2.19-vs-1.0.7/net/ipv4/Config.in
769 --- linux-2.2.19/net/ipv4/Config.in     Sat Dec 16 23:10:12 2000
770 +++ linux-2.2.19-vs-1.0.7/net/ipv4/Config.in    Tue Dec 12 18:35:06 2000
771 @@ -51,6 +51,17 @@
772            tristate 'IP: ipportfw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPPORTFW
773            tristate 'IP: ip fwmark masq-forwarding support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_MFW
774         fi
775 +       bool 'IP: masquerading virtual server support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_VS
776 +       if [ "$CONFIG_IP_MASQUERADE_VS" = "y" ]; then
777 +         bool '  IP virtual server debugging' CONFIG_IP_VS_DEBUG
778 +          int '  IP masquerading VS table size (the Nth power of 2)' CONFIG_IP_MASQUERADE_VS_TAB_BITS 12
779 +          tristate '  IPVS: round-robin scheduling' CONFIG_IP_MASQUERADE_VS_RR
780 +          tristate '  IPVS: weighted round-robin scheduling' CONFIG_IP_MASQUERADE_VS_WRR
781 +          tristate '  IPVS: least-connection scheduling' CONFIG_IP_MASQUERADE_VS_LC
782 +          tristate '  IPVS: weighted least-connection scheduling' CONFIG_IP_MASQUERADE_VS_WLC
783 +          tristate '  IPVS: locality-based least-connection scheduling' CONFIG_IP_MASQUERADE_VS_LBLC
784 +          tristate '  IPVS: locality-based least-connection with replication scheduling' CONFIG_IP_MASQUERADE_VS_LBLCR
785 +       fi
786        fi
787      fi
788  fi
789 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/Makefile linux-2.2.19-vs-1.0.7/net/ipv4/Makefile
790 --- linux-2.2.19/net/ipv4/Makefile      Tue Jan  5 07:31:34 1999
791 +++ linux-2.2.19-vs-1.0.7/net/ipv4/Makefile     Sat Dec  2 22:32:10 2000
792 @@ -91,6 +91,58 @@
793  
794  endif
795  
796 +ifeq ($(CONFIG_IP_MASQUERADE_VS),y)
797 +  IPV4X_OBJS += ip_vs.o
798 +  
799 +  ifeq ($(CONFIG_IP_MASQUERADE_VS_RR),y)
800 +  IPV4_OBJS += ip_vs_rr.o
801 +  else
802 +    ifeq ($(CONFIG_IP_MASQUERADE_VS_RR),m)
803 +    M_OBJS += ip_vs_rr.o
804 +    endif
805 +  endif
806 +  
807 +  ifeq ($(CONFIG_IP_MASQUERADE_VS_WRR),y)
808 +  IPV4_OBJS += ip_vs_wrr.o
809 +  else
810 +    ifeq ($(CONFIG_IP_MASQUERADE_VS_WRR),m)
811 +    M_OBJS += ip_vs_wrr.o
812 +    endif
813 +  endif
814 +  
815 +  ifeq ($(CONFIG_IP_MASQUERADE_VS_LC),y)
816 +  IPV4_OBJS += ip_vs_lc.o
817 +  else
818 +    ifeq ($(CONFIG_IP_MASQUERADE_VS_LC),m)
819 +    M_OBJS += ip_vs_lc.o
820 +    endif
821 +  endif
822 +  
823 +  ifeq ($(CONFIG_IP_MASQUERADE_VS_WLC),y)
824 +  IPV4_OBJS += ip_vs_wlc.o
825 +  else
826 +    ifeq ($(CONFIG_IP_MASQUERADE_VS_WLC),m)
827 +    M_OBJS += ip_vs_wlc.o
828 +    endif
829 +  endif
830
831 +  ifeq ($(CONFIG_IP_MASQUERADE_VS_LBLC),y)
832 +  IPV4_OBJS += ip_vs_lblc.o
833 +  else
834 +    ifeq ($(CONFIG_IP_MASQUERADE_VS_LBLC),m)
835 +    M_OBJS += ip_vs_lblc.o
836 +    endif
837 +  endif
838
839 +  ifeq ($(CONFIG_IP_MASQUERADE_VS_LBLCR),y)
840 +  IPV4_OBJS += ip_vs_lblcr.o
841 +  else
842 +    ifeq ($(CONFIG_IP_MASQUERADE_VS_LBLCR),m)
843 +    M_OBJS += ip_vs_lblcr.o
844 +    endif
845 +  endif
846 +endif
847 +
848  M_OBJS += ip_masq_user.o
849  M_OBJS += ip_masq_ftp.o ip_masq_irc.o ip_masq_raudio.o ip_masq_quake.o
850  M_OBJS += ip_masq_vdolive.o ip_masq_cuseeme.o
851 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_forward.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_forward.c
852 --- linux-2.2.19/net/ipv4/ip_forward.c  Fri Jan  7 09:45:02 2000
853 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_forward.c Fri Feb  2 15:38:28 2001
854 @@ -41,6 +41,9 @@
855  #include <linux/ip_fw.h>
856  #ifdef CONFIG_IP_MASQUERADE
857  #include <net/ip_masq.h>
858 +#ifdef CONFIG_IP_MASQUERADE_VS
859 +#include <net/ip_vs.h>
860 +#endif
861  #endif
862  #include <net/checksum.h>
863  #include <linux/route.h>
864 @@ -103,6 +106,14 @@
865         }
866  #endif
867  
868 +#ifdef CONFIG_IP_MASQUERADE_VS
869 +       if (iph->protocol == IPPROTO_ICMP &&
870 +               !(IPCB(skb)->flags&IPSKB_MASQUERADED)) {
871 +               /* Related ICMP packet for IPVS ? */
872 +               fw_res = ip_vs_forwarding_related_icmp(skb);
873 +               if (fw_res > 0) return ip_local_deliver(skb);
874 +       }
875 +#endif
876  
877  #ifdef CONFIG_IP_TRANSPARENT_PROXY
878         if (ip_chksock(skb))
879 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_input.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_input.c
880 --- linux-2.2.19/net/ipv4/ip_input.c    Tue Mar 27 09:33:49 2001
881 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_input.c   Tue Mar 27 09:32:21 2001
882 @@ -250,6 +250,15 @@
883          */
884          {
885                 int ret;
886 +
887 +#ifdef CONFIG_IP_MASQUERADE_VS
888 +               if((IPCB(skb)->flags&IPSKB_REDIRECTED)) {
889 +                       printk(KERN_DEBUG "ip_input(): ipvs recursion detected. Check ipvs configuration\n");
890 +                       kfree_skb(skb);
891 +                       return 0;
892 +               }
893 +#endif
894 +
895                 /*
896                  *      Some masq modules can re-inject packets if
897                  *      bad configured.
898 @@ -262,6 +271,12 @@
899                 }
900  
901                 ret = ip_fw_demasquerade(&skb);
902 +#ifdef CONFIG_IP_MASQUERADE_VS
903 +               if (ret == -2) {
904 +                       /* skb has already been released */
905 +                       return 0;
906 +               }
907 +#endif
908                 if (ret < 0) {
909                         kfree_skb(skb);
910                         return 0;
911 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_masq.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_masq.c
912 --- linux-2.2.19/net/ipv4/ip_masq.c     Tue Mar 27 09:33:49 2001
913 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_masq.c    Wed Apr 18 19:58:48 2001
914 @@ -50,7 +50,12 @@
915   *     Kai Bankett             :       do not toss other IP protos in proto_doff()
916   *     Dan Kegel               :       pointed correct NAT behavior for UDP streams
917   *     Julian Anastasov        :       use daddr and dport as hash keys
918 - *     
919 + *     Wensong Zhang           :       Added virtual server support
920 + *     Peter Kese              :       added masq TCP state handling for input-only
921 + *     Julian Anastasov        :       step to mSR after SYN in INPUT_ONLY table
922 + *     Julian Anastasov        :       fixed huge expire bug for IPVS after bad checksum
923 + *     Wensong Zhang           :       added server status checking for IPVS
924 + *
925   */
926  
927  #include <linux/config.h>
928 @@ -85,6 +90,10 @@
929  #include <linux/ip_fw.h>
930  #include <linux/ip_masq.h>
931  
932 +#ifdef CONFIG_IP_MASQUERADE_VS
933 +#include <net/ip_vs.h>
934 +#endif /* CONFIG_IP_MASQUERADE_VS */
935 +
936  int sysctl_ip_masq_debug = 0;
937  int sysctl_ip_masq_udp_dloose = 0;
938  
939 @@ -98,6 +107,21 @@
940  
941  struct ip_masq_hook *ip_masq_user_hook = NULL;
942  
943 +#ifdef CONFIG_IP_MASQUERADE_VS
944 +/*
945 + *     Use different state/timeout tables
946 + */
947 +#ifndef IP_MASQ_MANY_STATE_TABLES
948 +#define IP_MASQ_MANY_STATE_TABLES
949 +#endif
950 +
951 +int ip_masq_drop_rate = 0;
952 +int ip_masq_drop_counter = 0;
953 +
954 +#endif
955 +
956 +#ifndef CONFIG_IP_MASQUERADE_VS
957 +
958  /*
959   *     Timeout table[state]
960   */
961 @@ -106,38 +130,104 @@
962         ATOMIC_INIT(0), /* refcnt */
963         0,              /* scale  */
964         {
965 -               30*60*HZ,       /*      IP_MASQ_S_NONE, */
966 -               15*60*HZ,       /*      IP_MASQ_S_ESTABLISHED,  */
967 -               2*60*HZ,        /*      IP_MASQ_S_SYN_SENT,     */
968 -               1*60*HZ,        /*      IP_MASQ_S_SYN_RECV,     */
969 -               2*60*HZ,        /*      IP_MASQ_S_FIN_WAIT,     */
970 -               2*60*HZ,        /*      IP_MASQ_S_TIME_WAIT,    */
971 -               10*HZ,          /*      IP_MASQ_S_CLOSE,        */
972 -               60*HZ,          /*      IP_MASQ_S_CLOSE_WAIT,   */
973 -               30*HZ,          /*      IP_MASQ_S_LAST_ACK,     */
974 -               2*60*HZ,        /*      IP_MASQ_S_LISTEN,       */
975 -               5*60*HZ,        /*      IP_MASQ_S_UDP,  */
976 -               1*60*HZ,        /*      IP_MASQ_S_ICMP, */
977 -               2*HZ,/* IP_MASQ_S_LAST  */
978 +               [IP_MASQ_S_NONE]        =       30*60*HZ,
979 +               [IP_MASQ_S_ESTABLISHED] =       15*60*HZ,
980 +               [IP_MASQ_S_SYN_SENT]    =       2*60*HZ,
981 +               [IP_MASQ_S_SYN_RECV]    =       1*60*HZ,
982 +               [IP_MASQ_S_FIN_WAIT]    =       2*60*HZ,
983 +               [IP_MASQ_S_TIME_WAIT]   =       2*60*HZ,
984 +               [IP_MASQ_S_CLOSE]       =       10*HZ,
985 +               [IP_MASQ_S_CLOSE_WAIT]  =       60*HZ,
986 +               [IP_MASQ_S_LAST_ACK]    =       30*HZ,
987 +               [IP_MASQ_S_LISTEN]      =       2*60*HZ,
988 +               [IP_MASQ_S_UDP]         =       5*60*HZ,
989 +               [IP_MASQ_S_ICMP]        =       1*60*HZ,
990 +               [IP_MASQ_S_LAST]        =       2*HZ,
991         },      /* timeout */
992  };
993  
994 +#else /* CONFIG_IP_MASQUERADE_VS */
995 +
996 +/*
997 + *     Timeout table[state]
998 + */
999 +/* static int masq_timeout_table[IP_MASQ_S_LAST+1] = { */
1000 +static struct ip_masq_timeout_table masq_timeout_table = {
1001 +       ATOMIC_INIT(0), /* refcnt */
1002 +       0,              /* scale  */
1003 +       {
1004 +               [IP_MASQ_S_NONE]        =       30*60*HZ,
1005 +               [IP_MASQ_S_ESTABLISHED] =       15*60*HZ,
1006 +               [IP_MASQ_S_SYN_SENT]    =       2*60*HZ,
1007 +               [IP_MASQ_S_SYN_RECV]    =       1*60*HZ,
1008 +               [IP_MASQ_S_FIN_WAIT]    =       2*60*HZ,
1009 +               [IP_MASQ_S_TIME_WAIT]   =       2*60*HZ,
1010 +               [IP_MASQ_S_CLOSE]       =       10*HZ,
1011 +               [IP_MASQ_S_CLOSE_WAIT]  =       60*HZ,
1012 +               [IP_MASQ_S_LAST_ACK]    =       30*HZ,
1013 +               [IP_MASQ_S_LISTEN]      =       2*60*HZ,
1014 +               [IP_MASQ_S_SYNACK]      =       120*HZ,
1015 +               [IP_MASQ_S_UDP]         =       5*60*HZ,
1016 +               [IP_MASQ_S_ICMP]        =       1*60*HZ,
1017 +               [IP_MASQ_S_LAST]        =       2*HZ,
1018 +       },      /* timeout */
1019 +};
1020 +
1021 +
1022 +struct ip_masq_timeout_table masq_timeout_table_dos = {
1023 +       ATOMIC_INIT(0), /* refcnt */
1024 +       0,              /* scale  */
1025 +       {
1026 +               [IP_MASQ_S_NONE]        =       15*60*HZ,
1027 +               [IP_MASQ_S_ESTABLISHED] =       8*60*HZ,
1028 +               [IP_MASQ_S_SYN_SENT]    =       60*HZ,
1029 +               [IP_MASQ_S_SYN_RECV]    =       10*HZ,
1030 +               [IP_MASQ_S_FIN_WAIT]    =       60*HZ,
1031 +               [IP_MASQ_S_TIME_WAIT]   =       60*HZ,
1032 +               [IP_MASQ_S_CLOSE]       =       10*HZ,
1033 +               [IP_MASQ_S_CLOSE_WAIT]  =       60*HZ,
1034 +               [IP_MASQ_S_LAST_ACK]    =       30*HZ,
1035 +               [IP_MASQ_S_LISTEN]      =       2*60*HZ,
1036 +               [IP_MASQ_S_SYNACK]      =       100*HZ,
1037 +               [IP_MASQ_S_UDP]         =       3*60*HZ,
1038 +               [IP_MASQ_S_ICMP]        =       1*60*HZ,
1039 +               [IP_MASQ_S_LAST]        =       2*HZ,
1040 +       },      /* timeout */
1041 +};
1042 +
1043 +/*
1044 + *     Timeout table to use for the VS entries
1045 + *     If NULL we use the default table (masq_timeout_table).
1046 + *     Under flood attack we switch to masq_timeout_table_dos
1047 + */
1048 +
1049 +struct ip_masq_timeout_table *ip_vs_timeout_table = &masq_timeout_table;
1050 +
1051 +#endif /* CONFIG_IP_MASQUERADE_VS */
1052 +
1053 +#ifdef CONFIG_IP_MASQUERADE_VS
1054 +#define MASQUERADE_EXPIRE_RETRY(ms)    (ms->timeout_table? ms->timeout_table->timeout[IP_MASQ_S_TIME_WAIT] : masq_timeout_table.timeout[IP_MASQ_S_TIME_WAIT])
1055 +#else
1056  #define MASQUERADE_EXPIRE_RETRY      masq_timeout_table.timeout[IP_MASQ_S_TIME_WAIT]
1057 +#endif
1058  
1059  static const char * state_name_table[IP_MASQ_S_LAST+1] = {
1060 -       "NONE",         /*      IP_MASQ_S_NONE, */
1061 -       "ESTABLISHED",  /*      IP_MASQ_S_ESTABLISHED,  */
1062 -       "SYN_SENT",     /*      IP_MASQ_S_SYN_SENT,     */
1063 -       "SYN_RECV",     /*      IP_MASQ_S_SYN_RECV,     */
1064 -       "FIN_WAIT",     /*      IP_MASQ_S_FIN_WAIT,     */
1065 -       "TIME_WAIT",    /*      IP_MASQ_S_TIME_WAIT,    */
1066 -       "CLOSE",        /*      IP_MASQ_S_CLOSE,        */
1067 -       "CLOSE_WAIT",   /*      IP_MASQ_S_CLOSE_WAIT,   */
1068 -       "LAST_ACK",     /*      IP_MASQ_S_LAST_ACK,     */
1069 -       "LISTEN",       /*      IP_MASQ_S_LISTEN,       */
1070 -       "UDP",          /*      IP_MASQ_S_UDP,  */
1071 -       "ICMP",         /*      IP_MASQ_S_ICMP, */
1072 -       "BUG!",         /*      IP_MASQ_S_LAST  */
1073 +       [IP_MASQ_S_NONE]        =       "NONE",
1074 +       [IP_MASQ_S_ESTABLISHED] =       "ESTABLISHED",
1075 +       [IP_MASQ_S_SYN_SENT]    =       "SYN_SENT",
1076 +       [IP_MASQ_S_SYN_RECV]    =       "SYN_RECV",
1077 +       [IP_MASQ_S_FIN_WAIT]    =       "FIN_WAIT",
1078 +       [IP_MASQ_S_TIME_WAIT]   =       "TIME_WAIT",
1079 +       [IP_MASQ_S_CLOSE]       =       "CLOSE",
1080 +       [IP_MASQ_S_CLOSE_WAIT]  =       "CLOSE_WAIT",
1081 +       [IP_MASQ_S_LAST_ACK]    =       "LAST_ACK",
1082 +       [IP_MASQ_S_LISTEN]      =       "LISTEN",
1083 +#ifdef CONFIG_IP_MASQUERADE_VS
1084 +       [IP_MASQ_S_SYNACK]      =       "SYNACK",
1085 +#endif
1086 +       [IP_MASQ_S_UDP]         =       "UDP",
1087 +       [IP_MASQ_S_ICMP]        =       "ICMP",
1088 +       [IP_MASQ_S_LAST]        =       "BUG!",
1089  };
1090  
1091  #define mNO IP_MASQ_S_NONE
1092 @@ -150,6 +240,9 @@
1093  #define mCW IP_MASQ_S_CLOSE_WAIT
1094  #define mLA IP_MASQ_S_LAST_ACK
1095  #define mLI IP_MASQ_S_LISTEN
1096 +#ifdef CONFIG_IP_MASQUERADE_VS
1097 +#define mSA IP_MASQ_S_SYNACK
1098 +#endif
1099  
1100  struct masq_tcp_states_t {
1101         int next_state[IP_MASQ_S_LAST]; /* should be _LAST_TCP */
1102 @@ -159,46 +252,111 @@
1103  {
1104         if (state >= IP_MASQ_S_LAST)
1105                 return "ERR!";
1106 -       return state_name_table[state];
1107 +       return state_name_table[state] ? state_name_table[state] : "?";
1108  }
1109  
1110 +#ifndef CONFIG_IP_MASQUERADE_VS
1111 +
1112  struct masq_tcp_states_t masq_tcp_states [] = {
1113  /*     INPUT */
1114  /*       mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI      */
1115  /*syn*/        {{mSR, mES, mES, mSR, mSR, mSR, mSR, mSR, mSR, mSR }},
1116  /*fin*/        {{mCL, mCW, mSS, mTW, mTW, mTW, mCL, mCW, mLA, mLI }},
1117 -/*ack*/        {{mCL, mES, mSS, mSR, mFW, mTW, mCL, mCW, mCL, mLI }},
1118 +/*ack*/        {{mCL, mES, mSS, mES, mFW, mTW, mCL, mCW, mCL, mLI }},
1119  /*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI }},
1120  
1121  /*     OUTPUT */
1122  /*       mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI      */
1123 -/*syn*/        {{mSS, mES, mSS, mES, mSS, mSS, mSS, mSS, mSS, mLI }},
1124 +/*syn*/        {{mSS, mES, mSS, mSR, mSS, mSS, mSS, mSS, mSS, mLI }},
1125  /*fin*/        {{mTW, mFW, mSS, mTW, mFW, mTW, mCL, mTW, mLA, mLI }},
1126 -/*ack*/        {{mES, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mES }},
1127 +/*ack*/        {{mES, mES, mSS, mES, mFW, mTW, mCL, mCW, mLA, mES }},
1128  /*rst*/ {{mCL, mCL, mSS, mCL, mCL, mTW, mCL, mCL, mCL, mCL }},
1129  };
1130  
1131 -static __inline__ int masq_tcp_state_idx(struct tcphdr *th, int output) 
1132 +#else /* CONFIG_IP_MASQUERADE_VS */
1133 +
1134 +struct masq_tcp_states_t masq_tcp_states [] = {
1135 +/*     INPUT */
1136 +/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI, mSA        */
1137 +/*syn*/ {{mSR, mES, mES, mSR, mSR, mSR, mSR, mSR, mSR, mSR, mSR }},
1138 +/*fin*/ {{mCL, mCW, mSS, mTW, mTW, mTW, mCL, mCW, mLA, mLI, mTW }},
1139 +/*ack*/ {{mCL, mES, mSS, mES, mFW, mTW, mCL, mCW, mCL, mLI, mES }},
1140 +/*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI, mSR }},
1141 +
1142 +/*     OUTPUT */
1143 +/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI, mSA        */
1144 +/*syn*/ {{mSS, mES, mSS, mSR, mSS, mSS, mSS, mSS, mSS, mLI, mSR }},
1145 +/*fin*/ {{mTW, mFW, mSS, mTW, mFW, mTW, mCL, mTW, mLA, mLI, mTW }},
1146 +/*ack*/ {{mES, mES, mSS, mES, mFW, mTW, mCL, mCW, mLA, mES, mES }},
1147 +/*rst*/ {{mCL, mCL, mSS, mCL, mCL, mTW, mCL, mCL, mCL, mCL, mCL }},
1148 +
1149 +/*     INPUT-ONLY */
1150 +/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI, mSA        */
1151 +/*syn*/ {{mSR, mES, mES, mSR, mSR, mSR, mSR, mSR, mSR, mSR, mSR }},
1152 +/*fin*/ {{mCL, mFW, mSS, mTW, mFW, mTW, mCL, mCW, mLA, mLI, mTW }},
1153 +/*ack*/ {{mCL, mES, mSS, mES, mFW, mTW, mCL, mCW, mCL, mLI, mES }},
1154 +/*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI, mCL }},
1155 +};
1156 +
1157 +struct masq_tcp_states_t masq_tcp_states_dos [] = {
1158 +/*     INPUT */
1159 +/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI, mSA        */
1160 +/*syn*/ {{mSR, mES, mES, mSR, mSR, mSR, mSR, mSR, mSR, mSR, mSA }},
1161 +/*fin*/ {{mCL, mCW, mSS, mTW, mTW, mTW, mCL, mCW, mLA, mLI, mSA }},
1162 +/*ack*/ {{mCL, mES, mSS, mSR, mFW, mTW, mCL, mCW, mCL, mLI, mSA }},
1163 +/*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI, mCL }},
1164 +
1165 +/*     OUTPUT */
1166 +/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI, mSA        */
1167 +/*syn*/ {{mSS, mES, mSS, mSA, mSS, mSS, mSS, mSS, mSS, mLI, mSA }},
1168 +/*fin*/ {{mTW, mFW, mSS, mTW, mFW, mTW, mCL, mTW, mLA, mLI, mTW }},
1169 +/*ack*/ {{mES, mES, mSS, mES, mFW, mTW, mCL, mCW, mLA, mES, mES }},
1170 +/*rst*/ {{mCL, mCL, mSS, mCL, mCL, mTW, mCL, mCL, mCL, mCL, mCL }},
1171 +
1172 +/*     INPUT-ONLY */
1173 +/*        mNO, mES, mSS, mSR, mFW, mTW, mCL, mCW, mLA, mLI, mSA        */
1174 +/*syn*/ {{mSA, mES, mES, mSR, mSA, mSA, mSA, mSA, mSA, mSA, mSA }},
1175 +/*fin*/ {{mCL, mFW, mSS, mTW, mFW, mTW, mCL, mCW, mLA, mLI, mTW }},
1176 +/*ack*/ {{mCL, mES, mSS, mES, mFW, mTW, mCL, mCW, mCL, mLI, mES }},
1177 +/*rst*/ {{mCL, mCL, mCL, mSR, mCL, mCL, mCL, mCL, mLA, mLI, mCL }},
1178 +};
1179 +
1180 +struct masq_tcp_states_t *ip_vs_state_table = masq_tcp_states;
1181 +
1182 +void ip_masq_secure_tcp_set(int on)
1183 +{
1184 +       if (on) {
1185 +               ip_vs_state_table = masq_tcp_states_dos;
1186 +               ip_vs_timeout_table = &masq_timeout_table_dos;
1187 +       } else {
1188 +               ip_vs_state_table = masq_tcp_states;
1189 +               ip_vs_timeout_table = &masq_timeout_table;
1190 +       }
1191 +}
1192 +
1193 +#endif /* CONFIG_IP_MASQUERADE_VS */
1194 +
1195 +#define MASQ_STATE_INPUT       0
1196 +#define MASQ_STATE_OUTPUT      4
1197 +#define MASQ_STATE_INPUT_ONLY  8
1198 +
1199 +static __inline__ int masq_tcp_state_idx(struct tcphdr *th, int state_off) 
1200  {
1201         /*
1202 -        *      [0-3]: input states, [4-7]: output.
1203 +        *      [0-3]: input states, [4-7]: output, [8-11] input only states.
1204          */
1205 -       if (output) 
1206 -               output=4;
1207 -
1208         if (th->rst)
1209 -               return output+3;
1210 +               return state_off+3;
1211         if (th->syn)
1212 -               return output+0;
1213 +               return state_off+0;
1214         if (th->fin)
1215 -               return output+1;
1216 +               return state_off+1;
1217         if (th->ack)
1218 -               return output+2;
1219 +               return state_off+2;
1220         return -1;
1221  }
1222  
1223  
1224 -
1225  static int masq_set_state_timeout(struct ip_masq *ms, int state)
1226  {
1227         struct ip_masq_timeout_table *mstim = ms->timeout_table;
1228 @@ -221,18 +379,34 @@
1229         return state;
1230  }
1231  
1232 -static int masq_tcp_state(struct ip_masq *ms, int output, struct tcphdr *th)
1233 +static int masq_tcp_state(struct ip_masq *ms, int state_off, struct tcphdr *th)
1234  {
1235         int state_idx;
1236         int new_state = IP_MASQ_S_CLOSE;
1237  
1238 -       if ((state_idx = masq_tcp_state_idx(th, output)) < 0) {
1239 +#ifdef CONFIG_IP_MASQUERADE_VS
1240 +       /*
1241 +         *    Update state offset to INPUT_ONLY if necessary
1242 +         *    or delete NO_OUTPUT flag if output packet detected
1243 +         */
1244 +       if (ms->flags & IP_MASQ_F_VS_NO_OUTPUT) {
1245 +               if (state_off == MASQ_STATE_OUTPUT)
1246 +                       ms->flags &= ~IP_MASQ_F_VS_NO_OUTPUT;
1247 +               else state_off = MASQ_STATE_INPUT_ONLY;
1248 +       } 
1249 +#endif
1250 +
1251 +       if ((state_idx = masq_tcp_state_idx(th, state_off)) < 0) {
1252                 IP_MASQ_DEBUG(1, "masq_state_idx(%d)=%d!!!\n", 
1253 -                       output, state_idx);
1254 +                       state_off, state_idx);
1255                 goto tcp_state_out;
1256         }
1257  
1258 +#ifdef CONFIG_IP_MASQUERADE_VS
1259 +       new_state = ip_vs_state_table[state_idx].next_state[ms->state];
1260 +#else
1261         new_state = masq_tcp_states[state_idx].next_state[ms->state];
1262 +#endif
1263         
1264  tcp_state_out:
1265         if (new_state!=ms->state)
1266 @@ -247,6 +421,15 @@
1267                                 ntohl(ms->daddr), ntohs(ms->dport),
1268                                 ip_masq_state_name(ms->state),
1269                                 ip_masq_state_name(new_state));
1270 +
1271 +#ifdef CONFIG_IP_MASQUERADE_VS
1272 +        /*
1273 +         *    Increase/Decrease the active connection counter and
1274 +         *    set ms->flags according to ms->state and new_state.
1275 +         */
1276 +        ip_vs_set_state(ms, new_state);
1277 +#endif /* CONFIG_IP_MASQUERADE_VS */
1278 +
1279         return masq_set_state_timeout(ms, new_state);
1280  }
1281  
1282 @@ -254,7 +437,7 @@
1283  /*
1284   *     Handle state transitions
1285   */
1286 -static int masq_set_state(struct ip_masq *ms, int output, struct iphdr *iph, void *tp)
1287 +static int masq_set_state(struct ip_masq *ms, int state_off, struct iphdr *iph, void *tp)
1288  {
1289         switch (iph->protocol) {
1290                 case IPPROTO_ICMP:
1291 @@ -262,7 +445,7 @@
1292                 case IPPROTO_UDP:
1293                         return masq_set_state_timeout(ms, IP_MASQ_S_UDP);
1294                 case IPPROTO_TCP:
1295 -                       return masq_tcp_state(ms, output, tp);
1296 +                       return masq_tcp_state(ms, state_off, tp);
1297         }
1298         return -1;
1299  }
1300 @@ -361,6 +544,9 @@
1301  
1302  EXPORT_SYMBOL(ip_masq_get_debug_level);
1303  EXPORT_SYMBOL(ip_masq_new);
1304 +#ifdef CONFIG_IP_MASQUERADE_VS
1305 +EXPORT_SYMBOL(ip_masq_new_vs);
1306 +#endif /* CONFIG_IP_MASQUERADE_VS */
1307  EXPORT_SYMBOL(ip_masq_listen);
1308  EXPORT_SYMBOL(ip_masq_free_ports);
1309  EXPORT_SYMBOL(ip_masq_out_get);
1310 @@ -423,9 +609,17 @@
1311  {
1312          if (tout) {
1313                  ms->timer.expires = jiffies+tout;
1314 +#ifdef CONFIG_IP_MASQUERADE_VS
1315 +                add_sltimer(&ms->timer);
1316 +#else
1317                  add_timer(&ms->timer);
1318 +#endif
1319          } else {
1320 +#ifdef CONFIG_IP_MASQUERADE_VS
1321 +                del_sltimer(&ms->timer);
1322 +#else
1323                  del_timer(&ms->timer);
1324 +#endif
1325          }
1326  }
1327  
1328 @@ -741,6 +935,10 @@
1329         struct ip_masq *ms;
1330  
1331         read_lock(&__ip_masq_lock);
1332 +#ifdef CONFIG_IP_MASQUERADE_VS
1333 +        ms = __ip_vs_out_get(protocol, s_addr, s_port, d_addr, d_port);
1334 +        if (ms == NULL)
1335 +#endif /* CONFIG_IP_MASQUERADE_VS */
1336         ms = __ip_masq_out_get(protocol, s_addr, s_port, d_addr, d_port);
1337         read_unlock(&__ip_masq_lock);
1338  
1339 @@ -754,7 +952,11 @@
1340         struct ip_masq *ms;
1341  
1342         read_lock(&__ip_masq_lock);
1343 -       ms =  __ip_masq_in_get(protocol, s_addr, s_port, d_addr, d_port);
1344 +#ifdef CONFIG_IP_MASQUERADE_VS
1345 +        ms = __ip_vs_in_get(protocol, s_addr, s_port, d_addr, d_port);
1346 +        if (ms == NULL)
1347 +#endif /* CONFIG_IP_MASQUERADE_VS */
1348 +        ms =  __ip_masq_in_get(protocol, s_addr, s_port, d_addr, d_port);
1349         read_unlock(&__ip_masq_lock);
1350  
1351         if (ms)
1352 @@ -791,7 +993,11 @@
1353  static void masq_expire(unsigned long data)
1354  {
1355         struct ip_masq *ms = (struct ip_masq *)data;
1356 +#ifdef CONFIG_IP_MASQUERADE_VS
1357 +       ms->timeout = MASQUERADE_EXPIRE_RETRY(ms);
1358 +#else
1359         ms->timeout = MASQUERADE_EXPIRE_RETRY;
1360 +#endif
1361  
1362         /*
1363          *      hey, I'm using it
1364 @@ -826,6 +1032,15 @@
1365         if (ms->control) 
1366                 ip_masq_control_del(ms);
1367  
1368 +#ifdef CONFIG_IP_MASQUERADE_VS
1369 +        if (ms->flags & IP_MASQ_F_VS) {
1370 +                if (ip_vs_unhash(ms)) {
1371 +                        ip_vs_unbind_masq(ms);
1372 +                        ip_masq_unbind_app(ms);
1373 +                }
1374 +        }
1375 +        else
1376 +#endif /* CONFIG_IP_MASQUERADE_VS */
1377          if (ip_masq_unhash(ms)) {
1378                 if (ms->flags&IP_MASQ_F_MPORT) {
1379                         atomic_dec(&mport_count);
1380 @@ -839,6 +1054,9 @@
1381          *      refcnt==1 implies I'm the only one referrer
1382          */
1383         if (atomic_read(&ms->refcnt) == 1) {
1384 +#ifdef IP_MASQ_MANY_STATE_TABLES
1385 +               ip_masq_timeout_detach(ms);
1386 +#endif
1387                 kfree_s(ms,sizeof(*ms));
1388                 sysctl_ip_always_defrag--;
1389                 MOD_DEC_USE_COUNT;
1390 @@ -1077,6 +1295,83 @@
1391          return NULL;
1392  }
1393  
1394 +
1395 +#ifdef CONFIG_IP_MASQUERADE_VS
1396 +/*
1397 + *  Create a new masquerade entry for IPVS, all parameters {maddr,
1398 + *  mport, saddr, sport, daddr, dport, mflags} are known. No need
1399 + *  to allocate a free mport. And, hash it into the ip_vs_table.
1400 + *
1401 + *  Be careful, it can be called from u-space
1402 + */
1403 +
1404 +struct ip_masq * ip_masq_new_vs(int proto, __u32 maddr, __u16 mport, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags)
1405 +{
1406 +        struct ip_masq *ms;
1407 +        static int n_fails = 0;
1408 +       int prio;
1409 +
1410 +       prio = (mflags&IP_MASQ_F_USER) ? GFP_KERNEL : GFP_ATOMIC;
1411 +
1412 +        ms = (struct ip_masq *) kmalloc(sizeof(struct ip_masq), prio);
1413 +        if (ms == NULL) {
1414 +                if (++n_fails < 5)
1415 +                        IP_VS_ERR("ip_masq_new_vs(proto=%s): no memory available.\n",
1416 +                                  masq_proto_name(proto));
1417 +                return NULL;
1418 +        }
1419 +       MOD_INC_USE_COUNT;
1420 +        
1421 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,14)
1422 +       sysctl_ip_always_defrag++;
1423 +#endif
1424 +        memset(ms, 0, sizeof(*ms));
1425 +       INIT_LIST_HEAD(&ms->s_list);
1426 +       INIT_LIST_HEAD(&ms->m_list);
1427 +       INIT_LIST_HEAD(&ms->d_list);
1428 +       init_timer(&ms->timer);
1429 +       ms->timer.data     = (unsigned long)ms;
1430 +       ms->timer.function = masq_expire;
1431 +       ip_masq_timeout_attach(ms,ip_vs_timeout_table);
1432 +        ms->protocol      = proto;
1433 +        ms->saddr         = saddr;
1434 +        ms->sport         = sport;
1435 +        ms->daddr         = daddr;
1436 +        ms->dport         = dport;
1437 +        ms->maddr          = maddr;
1438 +        ms->mport          = mport;
1439 +        ms->flags         = mflags;
1440 +        ms->app_data      = NULL;
1441 +        ms->control       = NULL;
1442 +       
1443 +       atomic_set(&ms->n_control,0);
1444 +       atomic_set(&ms->refcnt,0);
1445 +       atomic_set(&ms->in_pkts,0);
1446 +
1447 +        if (mflags & IP_MASQ_F_USER)   
1448 +                write_lock_bh(&__ip_masq_lock);
1449 +        else 
1450 +                write_lock(&__ip_masq_lock);
1451 +
1452 +        /*
1453 +         *  Hash it in the ip_vs_table
1454 +         */
1455 +        ip_vs_hash(ms);
1456 +
1457 +        if (mflags & IP_MASQ_F_USER)   
1458 +                write_unlock_bh(&__ip_masq_lock);
1459 +        else 
1460 +                write_unlock(&__ip_masq_lock);
1461 +
1462 +        ip_masq_bind_app(ms);
1463 +        n_fails = 0;
1464 +        atomic_inc(&ms->refcnt);
1465 +        masq_set_state_timeout(ms, IP_MASQ_S_NONE);
1466 +        return ms;
1467 +}
1468 +#endif /* CONFIG_IP_MASQUERADE_VS */
1469 +
1470 +
1471  /*
1472   *     Get transport protocol data offset, check against size
1473   *     return:
1474 @@ -1153,25 +1448,20 @@
1475                 return -1;
1476         }
1477  
1478 +#ifndef CONFIG_IP_MASQUERADE_VS
1479         /* Lets determine our maddr now, shall we? */
1480 -       if (maddr == 0) {
1481 -               struct rtable *rt;
1482 -               struct rtable *skb_rt = (struct rtable*)skb->dst;
1483 -               struct device *skb_dev = skb_rt->u.dst.dev;
1484 -
1485 -               if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos)|RTO_CONN, skb_dev?skb_dev->ifindex:0)) {
1486 -                       /* Fallback on old method */
1487 -                       /* This really shouldn't happen... */
1488 -                       maddr = inet_select_addr(skb_dev, skb_rt->rt_gateway, RT_SCOPE_UNIVERSE);
1489 -               } else {
1490 -                       /* Route lookup succeeded */
1491 -                       maddr = rt->rt_src;
1492 -                       ip_rt_put(rt);
1493 -               }
1494 +       if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) {
1495 +               return -1;
1496         }
1497 +#endif
1498  
1499         switch (iph->protocol) {
1500         case IPPROTO_ICMP:
1501 +#ifdef CONFIG_IP_MASQUERADE_VS
1502 +               if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) {
1503 +                       return -1;
1504 +               }
1505 +#endif
1506                 return(ip_fw_masq_icmp(skb_p, maddr));
1507         case IPPROTO_UDP:
1508                 if (h.uh->check == 0)
1509 @@ -1230,6 +1520,17 @@
1510  
1511          ms = ip_masq_out_get_iph(iph);
1512          if (ms!=NULL) {
1513 +#ifdef CONFIG_IP_MASQUERADE_VS
1514 +               if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) {
1515 +                       /*
1516 +                        *      Drop this packet but don't
1517 +                        *      start the timer from the beginning
1518 +                        */
1519 +                       __ip_masq_put(ms);
1520 +                       add_sltimer(&ms->timer);
1521 +                       return -1;
1522 +               }
1523 +#endif
1524  
1525                  /*
1526                   *     If sysctl !=0 and no pkt has been received yet
1527 @@ -1280,6 +1581,33 @@
1528                         ms->daddr = iph->daddr;
1529                 }
1530          } else {
1531 +#ifdef CONFIG_IP_MASQUERADE_VS
1532 +               struct ip_vs_dest *dest;
1533 +
1534 +               /*
1535 +                *      Check if the packet is from our real service
1536 +                */
1537 +               read_lock(&__ip_vs_lock);
1538 +               dest = __ip_vs_lookup_real_service(iph->protocol,
1539 +                                                   iph->saddr, h.portp[0]);
1540 +               read_unlock(&__ip_vs_lock);
1541 +               if (dest) {
1542 +                       /*
1543 +                        *      Notify the real server: there is
1544 +                        *      no existing entry if it is not RST packet
1545 +                         *      or not TCP packet.
1546 +                        */
1547 +                        if (!h.th->rst || iph->protocol != IPPROTO_TCP)
1548 +                                icmp_send(skb, ICMP_DEST_UNREACH,
1549 +                                          ICMP_PORT_UNREACH, 0);
1550 +                       return -1;
1551 +               }
1552 +
1553 +               if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) {
1554 +                       return -1;
1555 +               }
1556 +#endif
1557 +
1558                 /*
1559                  *      Nope, not found, create a new entry for it
1560                  */
1561 @@ -1392,11 +1720,17 @@
1562         IP_MASQ_DEBUG(2, "O-routed from %08X:%04X with masq.addr %08X\n",
1563                 ntohl(ms->maddr),ntohs(ms->mport),ntohl(maddr));
1564  
1565 -       masq_set_state(ms, 1, iph, h.portp);
1566 +#ifdef CONFIG_IP_MASQUERADE_VS
1567 +       /* do the IPVS statistics */
1568 +        if (ms->flags & IP_MASQ_F_VS)
1569 +                ip_vs_out_stats(ms, skb);
1570 +#endif
1571 +
1572 +       masq_set_state(ms, MASQ_STATE_OUTPUT, iph, h.portp);
1573         ip_masq_put(ms);
1574  
1575         return 0;
1576 - }
1577 +}
1578  
1579  /*
1580   *     Restore original addresses and ports in the original IP
1581 @@ -1438,6 +1772,12 @@
1582         ms = __ip_masq_out_get(iph->protocol,
1583                         iph->daddr, portp[1],
1584                         iph->saddr, portp[0]);
1585 +#ifdef CONFIG_IP_MASQUERADE_VS
1586 +        if (ms == NULL)
1587 +                ms = __ip_vs_out_get(iph->protocol,
1588 +                                     iph->daddr, portp[1],
1589 +                                     iph->saddr, portp[0]);
1590 +#endif /* CONFIG_IP_MASQUERADE_VS */
1591         read_unlock(&__ip_masq_lock);
1592         if (ms) {
1593                 IP_MASQ_DEBUG(1, "Incoming frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n",
1594 @@ -1459,6 +1799,12 @@
1595         ms = __ip_masq_in_get(iph->protocol,
1596                         iph->daddr, portp[1],
1597                         iph->saddr, portp[0]);
1598 +#ifdef CONFIG_IP_MASQUERADE_VS
1599 +        if (ms == NULL)
1600 +                ms = __ip_vs_in_get(iph->protocol,
1601 +                                    iph->daddr, portp[1],
1602 +                                    iph->saddr, portp[0]);
1603 +#endif /* CONFIG_IP_MASQUERADE_VS */
1604         read_unlock(&__ip_masq_lock);
1605         if (ms) {
1606                 IP_MASQ_DEBUG(1, "Outgoing frag_need rewrited from %d.%d.%d.%d to %d.%d.%d.%d\n",
1607 @@ -1469,8 +1815,8 @@
1608                 return 1;
1609         }
1610         return 0;
1611 -
1612  }
1613 +
1614  /*
1615   *     Handle ICMP messages in forward direction.
1616   *     Find any that might be relevant, check against existing connections,
1617 @@ -1556,7 +1902,7 @@
1618                        ntohs(icmp_id(icmph)),
1619                        icmph->type);
1620  
1621 -               masq_set_state(ms, 1, iph, icmph);
1622 +               masq_set_state(ms, MASQ_STATE_OUTPUT, iph, icmph);
1623                 ip_masq_put(ms);
1624  
1625                 return 1;
1626 @@ -1684,11 +2030,28 @@
1627                                pptr[1],
1628                                ciph->saddr,
1629                                pptr[0]);
1630 +#ifdef CONFIG_IP_MASQUERADE_VS
1631 +        if (ms == NULL) {
1632 +                ms = __ip_vs_out_get(ciph->protocol,
1633 +                                   ciph->daddr, pptr[1],
1634 +                                   ciph->saddr, pptr[0]);
1635 +        }
1636 +#endif /* CONFIG_IP_MASQUERADE_VS */
1637         read_unlock(&__ip_masq_lock);
1638  
1639         if (ms == NULL)
1640                 return 0;
1641  
1642 +#ifdef CONFIG_IP_MASQUERADE_VS
1643 +        if (IP_MASQ_VS_FWD(ms) != 0) {
1644 +                IP_VS_INFO("shouldn't get here, because tun/dr is on the half connection\n");
1645 +        }
1646 +
1647 +        /* do the IPVS statistics */
1648 +        if (ms->flags & IP_MASQ_F_VS)
1649 +                ip_vs_out_stats(ms, skb);
1650 +#endif /* CONFIG_IP_MASQUERADE_VS */
1651 +
1652         /* Now we do real damage to this packet...! */
1653         /* First change the source IP address, and recalc checksum */
1654         iph->saddr = ms->maddr;
1655 @@ -1739,6 +2102,87 @@
1656         return skb;
1657  }
1658  
1659 +#ifdef CONFIG_IP_MASQUERADE_VS
1660 +
1661 +/*
1662 + * Check whether this ICMP packet in the FORWARD path is for
1663 + * related IPVS connection and needs to be delivered locally
1664 + */
1665 +
1666 +int ip_vs_forwarding_related_icmp(struct sk_buff *skb)
1667 +{
1668 +       struct iphdr    *iph = skb->nh.iph;
1669 +       struct icmphdr  *icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
1670 +       unsigned short size = ntohs(iph->tot_len) - (iph->ihl * 4);
1671 +       struct iphdr    *ciph;  /* The ip header contained within the ICMP */
1672 +       __u16           *pptr;  /* port numbers from TCP/UDP contained header */
1673 +       struct ip_masq  *ms;
1674 +       union ip_masq_tphdr h;
1675 +       int doff;
1676 +        
1677 +       /*
1678 +        *      PACKET_HOST only, see ip_forward
1679 +        */
1680 +
1681 +       h.raw = (char*) iph + iph->ihl * 4;
1682 +
1683 +       doff = proto_doff(iph->protocol, h.raw, size);
1684 +
1685 +       if (doff <= 0) return 0;
1686 +
1687 +       IP_VS_DBG(10, "icmp fwd/rev (%d,%d) %u.%u.%u.%u -> %u.%u.%u.%u\n",
1688 +               icmph->type, ntohs(icmp_id(icmph)),
1689 +               NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
1690 +
1691 +       if ((icmph->type != ICMP_DEST_UNREACH) &&
1692 +           (icmph->type != ICMP_SOURCE_QUENCH) &&
1693 +           (icmph->type != ICMP_TIME_EXCEEDED))
1694 +               return 0;
1695 +
1696 +       /*
1697 +        * If we get here we have an ICMP error of one of the above 3 types
1698 +        * Now find the contained IP header
1699 +        */
1700 +
1701 +       ciph = (struct iphdr *) (icmph + 1);
1702 +       size -= sizeof(struct icmphdr);
1703 +       if (size < sizeof(struct iphdr)) return 0;
1704 +
1705 +       /* We are only interested ICMPs generated from TCP or UDP packets */
1706 +       if (ciph->protocol == IPPROTO_TCP) {
1707 +               if (size < sizeof(struct tcphdr)) return 0;
1708 +       }
1709 +       else
1710 +       if (ciph->protocol == IPPROTO_UDP) {
1711 +               if (size < sizeof(struct udphdr)) return 0;
1712 +       }
1713 +       else return 0;
1714 +
1715 +       /* We don't ensure for now the checksum is correct */
1716 +
1717 +       /* This is pretty much what __ip_masq_in_get_iph() does,
1718 +           except params are wrong way round */
1719 +        pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
1720 +        
1721 +       read_lock(&__ip_masq_lock);
1722 +        ms = __ip_vs_in_get(ciph->protocol,
1723 +                            ciph->daddr,
1724 +                            pptr[1],
1725 +                            ciph->saddr,
1726 +                            pptr[0]);
1727 +       read_unlock(&__ip_masq_lock);
1728 +
1729 +       if (!ms) return 0;
1730 +       IP_VS_DBG(10, "Delivering locally ICMP for %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u to %u.%u.%u.%u\n",
1731 +               NIPQUAD(ciph->daddr), ntohs(pptr[1]),
1732 +               NIPQUAD(ciph->saddr), ntohs(pptr[0]),
1733 +               NIPQUAD(ms->saddr));
1734 +       __ip_masq_put(ms);
1735 +
1736 +       return 1;
1737 +}
1738 +#endif /* CONFIG_IP_MASQUERADE_VS */
1739 +
1740  /*
1741   *     Handle ICMP messages in reverse (demasquerade) direction.
1742   *     Find any that might be relevant, check against existing connections,
1743 @@ -1812,7 +2256,7 @@
1744                        ntohs(icmp_id(icmph)),
1745                        icmph->type);
1746  
1747 -               masq_set_state(ms, 0, iph, icmph);
1748 +               masq_set_state(ms, MASQ_STATE_INPUT, iph, icmph);
1749                 ip_masq_put(ms);
1750  
1751                 return 1;
1752 @@ -1914,9 +2358,11 @@
1753          * *outgoing* so the ports are reversed (and addresses)
1754          */
1755         pptr = (__u16 *)&(((char *)ciph)[csize]);
1756 +#ifndef CONFIG_IP_MASQUERADE_VS
1757         if (ntohs(pptr[0]) < PORT_MASQ_BEGIN ||
1758             ntohs(pptr[0]) > PORT_MASQ_END)
1759                 return 0;
1760 +#endif
1761  
1762         /* Ensure the checksum is correct */
1763         if (ip_compute_csum((unsigned char *) icmph, len))
1764 @@ -1927,7 +2373,6 @@
1765                 return(-1);
1766         }
1767  
1768 -
1769         IP_MASQ_DEBUG(2, "Handling reverse ICMP for %08X:%04X -> %08X:%04X\n",
1770                ntohl(ciph->saddr), ntohs(pptr[0]),
1771                ntohl(ciph->daddr), ntohs(pptr[1]));
1772 @@ -1935,6 +2380,14 @@
1773  
1774         /* This is pretty much what __ip_masq_in_get_iph() does, except params are wrong way round */
1775         read_lock(&__ip_masq_lock);
1776 +#ifdef CONFIG_IP_MASQUERADE_VS
1777 +        ms = __ip_vs_in_get(ciph->protocol,
1778 +                            ciph->daddr,
1779 +                            pptr[1],
1780 +                            ciph->saddr,
1781 +                            pptr[0]);
1782 +        if (ms == NULL)
1783 +#endif /* CONFIG_IP_MASQUERADE_VS */
1784         ms = __ip_masq_in_get(ciph->protocol,
1785                               ciph->daddr,
1786                               pptr[1],
1787 @@ -1945,10 +2398,23 @@
1788         if (ms == NULL)
1789                 return 0;
1790  
1791 +#ifdef CONFIG_IP_MASQUERADE_VS
1792 +        /* do the IPVS statistics */
1793 +        if (ms->flags & IP_MASQ_F_VS)
1794 +                ip_vs_in_stats(ms, skb);
1795 +
1796 +        if (IP_MASQ_VS_FWD(ms) != 0) {
1797 +                int ret = ip_vs_forward(skb, ms);
1798 +                __ip_masq_put(ms);
1799 +                return ret;
1800 +        }
1801 +#endif /* CONFIG_IP_MASQUERADE_VS */
1802 +
1803         if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
1804                 __ip_masq_put(ms);
1805                 return -1;
1806         }
1807 +
1808         ciph = (struct iphdr *) (icmph + 1);
1809         pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
1810  
1811 @@ -1998,7 +2464,10 @@
1812         int csum = 0;
1813         int csum_ok = 0;
1814         __u32 maddr;
1815 -
1816 +#ifdef CONFIG_IP_MASQUERADE_VS
1817 +        struct ip_vs_service *svc = NULL;
1818 +#endif
1819 +        
1820         /*
1821          *      Big tappo: only PACKET_HOST (nor loopback neither mcasts)
1822          *      ... don't know why 1st test DOES NOT include 2nd (?)
1823 @@ -2039,13 +2508,21 @@
1824                 return(ip_fw_demasq_icmp(skb_p));
1825         case IPPROTO_TCP:
1826         case IPPROTO_UDP:
1827 -               /* 
1828 +               /*
1829                  *      Make sure packet is in the masq range 
1830                  *      ... or some mod-ule relaxes input range
1831                  *      ... or there is still some `special' mport opened
1832                  */
1833 +#ifdef CONFIG_IP_MASQUERADE_VS
1834 +                svc = ip_vs_lookup_service(skb->fwmark,
1835 +                                           iph->protocol, maddr, h.portp[1]);
1836 +                if (!svc &&
1837 +                    (ntohs(h.portp[1]) < PORT_MASQ_BEGIN
1838 +                               || ntohs(h.portp[1]) > PORT_MASQ_END)
1839 +#else
1840                 if ((ntohs(h.portp[1]) < PORT_MASQ_BEGIN
1841                                 || ntohs(h.portp[1]) > PORT_MASQ_END)
1842 +#endif /* CONFIG_IP_MASQUERADE_VS */
1843  #ifdef CONFIG_IP_MASQUERADE_MOD
1844                                 && (ip_masq_mod_in_rule(skb, iph) != 1) 
1845  #endif
1846 @@ -2100,6 +2577,21 @@
1847  
1848          ms = ip_masq_in_get_iph(iph);
1849  
1850 +#ifdef CONFIG_IP_MASQUERADE_VS
1851 +       /*
1852 +        * Checking the server status
1853 +        */
1854 +       if (ms && ms->dest && !(ms->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1855 +               /*
1856 +                * If the dest is not avaiable, don't restart the timer
1857 +                 * of the packet, but silently drop it.
1858 +                */
1859 +               add_sltimer(&ms->timer);
1860 +               __ip_masq_put(ms);
1861 +               return -1;
1862 +       }
1863 +#endif
1864 +
1865         /*
1866          *      Give additional modules a chance to create an entry
1867          */
1868 @@ -2116,6 +2608,27 @@
1869         ip_masq_mod_in_update(skb, iph, ms);
1870  #endif
1871  
1872 +#ifdef CONFIG_IP_MASQUERADE_VS
1873 +       if (!ms &&
1874 +            (h.th->syn || (iph->protocol!=IPPROTO_TCP)) && svc) {
1875 +               if (ip_masq_todrop()) {
1876 +                       /*
1877 +                        * It seems that we are very loaded.
1878 +                        * We have to drop this packet :(
1879 +                        */
1880 +                       return -1;
1881 +               }
1882 +               /* 
1883 +                * Let the virtual server select a real server
1884 +                * for the incomming connection, and create a
1885 +                 * masquerading entry.
1886 +                */ 
1887 +               ms = ip_vs_schedule(svc, iph);
1888 +                if (!ms)
1889 +                        return ip_vs_leave(svc, skb);
1890 +                ip_vs_conn_stats(ms, svc);
1891 +       }
1892 +#endif /* CONFIG_IP_MASQUERADE_VS */
1893  
1894          if (ms != NULL)
1895          {
1896 @@ -2168,13 +2681,43 @@
1897  
1898                  }
1899                 }
1900 +
1901 +#ifdef CONFIG_IP_MASQUERADE_VS
1902 +                /* do the IPVS statistics */
1903 +                if (ms->flags & IP_MASQ_F_VS)
1904 +                        ip_vs_in_stats(ms, skb);
1905 +                
1906 +               if (IP_MASQ_VS_FWD(ms) != 0) {
1907 +                        int ret;
1908 +
1909 +                        /*
1910 +                         *  Sorry for setting state of masq entry so early
1911 +                         *  no matter whether the packet is forwarded
1912 +                         *  successfully or not, because ip_vs_forward may
1913 +                         *  have already released the skb. Although it
1914 +                         *  brokes the original sematics, it won't lead to
1915 +                         *  serious errors. We look forward to fixing it
1916 +                         *  under the Rusty's netfilter framework both for
1917 +                         *  correctness and modularization.
1918 +                         */
1919 +                        masq_set_state(ms, MASQ_STATE_INPUT, iph, h.portp);
1920 +
1921 +                        ret = ip_vs_forward(skb, ms);
1922 +                        ip_masq_put(ms);
1923 +                        return ret;
1924 +               }
1925
1926 +                IP_VS_DBG(10, "masquerading packet...\n");
1927 +#endif /* CONFIG_IP_MASQUERADE_VS */
1928 +                
1929                 if ((skb=masq_skb_cow(skb_p, &iph, &h.raw)) == NULL) {
1930                         ip_masq_put(ms);
1931                         return -1;
1932                 }
1933 +
1934                  iph->daddr = ms->saddr;
1935                  h.portp[1] = ms->sport;
1936 -
1937 +                
1938                 /*
1939                  *      Invalidate csum saving if tunnel has masq helper
1940                  */
1941 @@ -2231,15 +2774,28 @@
1942                                         h.uh->check = 0xFFFF;
1943                                 break;
1944                 }
1945 -                ip_send_check(iph);
1946 +               ip_send_check(iph);
1947  
1948                  IP_MASQ_DEBUG(2, "I-routed to %08X:%04X\n",ntohl(iph->daddr),ntohs(h.portp[1]));
1949  
1950 -               masq_set_state (ms, 0, iph, h.portp);
1951 +               masq_set_state(ms, MASQ_STATE_INPUT, iph, h.portp);
1952                 ip_masq_put(ms);
1953  
1954                  return 1;
1955         }
1956 +#ifdef CONFIG_IP_MASQUERADE_VS
1957 +       if (svc) {
1958 +                /*
1959 +                 * Drop packet if it belongs to virtual service but no entry
1960 +                 * is found or created. Furthermore, send DEST_UNREACH icmp
1961 +                 * packet to clients if it is not RST or it is not TCP.
1962 +                 */
1963 +               if (!h.th->rst || iph->protocol != IPPROTO_TCP) {
1964 +                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1965 +               }
1966 +               return -1;
1967 +       }
1968 +#endif
1969  
1970         /* sorry, all this trouble for a no-hit :) */
1971         return 0;
1972 @@ -2350,7 +2906,6 @@
1973                 len += sprintf(buffer+len, "%-127s\n", temp);
1974  
1975                 if(len >= length) {
1976 -
1977                         read_unlock_bh(&__ip_masq_lock);
1978                         goto done;
1979                 }
1980 @@ -2358,9 +2913,52 @@
1981         read_unlock_bh(&__ip_masq_lock);
1982  
1983         }
1984 -done:
1985  
1986 +#ifdef CONFIG_IP_MASQUERADE_VS
1987 +        for(idx = 0; idx < IP_VS_TAB_SIZE; idx++) 
1988 +       {
1989 +       /*
1990 +        *      Lock is actually only need in next loop 
1991 +        *      we are called from uspace: must stop bh.
1992 +        */
1993 +       read_lock_bh(&__ip_masq_lock);
1994  
1995 +       l = &ip_vs_table[idx];
1996 +       for (e=l->next; e!=l; e=e->next) {
1997 +               ms = list_entry(e, struct ip_masq, m_list);
1998 +               pos += 128;
1999 +               if (pos <= offset) {
2000 +                       len = 0;
2001 +                       continue;
2002 +               }
2003 +
2004 +               /*
2005 +                *      We have locked the tables, no need to del/add timers
2006 +                *      nor cli()  8)
2007 +                */
2008 +
2009 +               sprintf(temp,"%s %08X:%04X %08X:%04X %04X %08X %6d %6d %7lu",
2010 +                       masq_proto_name(ms->protocol),
2011 +                       ntohl(ms->saddr), ntohs(ms->sport),
2012 +                       ntohl(ms->daddr), ntohs(ms->dport),
2013 +                       ntohs(ms->mport),
2014 +                       ms->out_seq.init_seq,
2015 +                       ms->out_seq.delta,
2016 +                       ms->out_seq.previous_delta,
2017 +                       ms->timer.expires-jiffies);
2018 +               len += sprintf(buffer+len, "%-127s\n", temp);
2019 +
2020 +               if(len >= length) {
2021 +                       read_unlock_bh(&__ip_masq_lock);
2022 +                       goto done;
2023 +               }
2024 +        }
2025 +       read_unlock_bh(&__ip_masq_lock);
2026 +
2027 +       }
2028 +#endif /* CONFIG_IP_MASQUERADE_VS */
2029 +
2030 +done:
2031         begin = len - (pos - offset);
2032         *start = buffer + begin;
2033         len -= begin;
2034 @@ -2386,17 +2984,29 @@
2035                                 len, sizeof(struct ip_fw_masq));
2036         } else {
2037                 masq = (struct ip_fw_masq *)m;
2038 -               if (masq->tcp_timeout)
2039 +               if (masq->tcp_timeout) {
2040                         masq_timeout_table.timeout[IP_MASQ_S_ESTABLISHED]
2041 +#ifdef CONFIG_IP_MASQUERADE_VS
2042 +                               = masq_timeout_table_dos.timeout[IP_MASQ_S_ESTABLISHED]
2043 +#endif
2044                                 = masq->tcp_timeout;
2045 +               }
2046  
2047 -               if (masq->tcp_fin_timeout)
2048 +               if (masq->tcp_fin_timeout) {
2049                         masq_timeout_table.timeout[IP_MASQ_S_FIN_WAIT]
2050 +#ifdef CONFIG_IP_MASQUERADE_VS
2051 +                               = masq_timeout_table_dos.timeout[IP_MASQ_S_FIN_WAIT]
2052 +#endif
2053                                 = masq->tcp_fin_timeout;
2054 +               }
2055  
2056 -               if (masq->udp_timeout)
2057 +               if (masq->udp_timeout) {
2058                         masq_timeout_table.timeout[IP_MASQ_S_UDP]
2059 +#ifdef CONFIG_IP_MASQUERADE_VS
2060 +                               = masq_timeout_table_dos.timeout[IP_MASQ_S_UDP]
2061 +#endif
2062                                 = masq->udp_timeout;
2063 +               }
2064                 ret = 0;
2065         }
2066         return ret;
2067 @@ -2468,6 +3078,11 @@
2068                         ret = ip_masq_mod_ctl(optname, &masq_ctl, optlen);
2069                         break;
2070  #endif
2071 +#ifdef CONFIG_IP_MASQUERADE_VS
2072 +               case IP_MASQ_TARGET_VS:
2073 +                       ret = ip_vs_ctl(optname, &masq_ctl, optlen);
2074 +                       break;
2075 +#endif
2076         }
2077  
2078         /*      
2079 @@ -2529,12 +3144,25 @@
2080         }
2081  }
2082  #endif /* CONFIG_PROC_FS */
2083 +
2084  /*
2085 - *     Wrapper over inet_select_addr()
2086 + *     Determine maddr from skb
2087   */
2088 -u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
2089 +int ip_masq_select_addr(struct sk_buff *skb, __u32 *maddr)
2090  {
2091 -       return inet_select_addr(dev, dst, scope);
2092 +        struct rtable *rt;
2093 +        struct rtable *skb_rt = (struct rtable*)skb->dst;
2094 +        struct device *skb_dev = skb_rt->u.dst.dev;
2095 +        struct iphdr *iph = skb->nh.iph;
2096 +
2097 +       if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos)|RTO_CONN, skb_dev?skb_dev->ifindex:0)) {
2098 +               return -1;
2099 +       } else {
2100 +               /* Route lookup succeeded */
2101 +               *maddr = rt->rt_src;
2102 +               ip_rt_put(rt);
2103 +               return 0;
2104 +       }
2105  }
2106  
2107  /*
2108 @@ -2587,7 +3215,7 @@
2109                 (char *) IPPROTO_ICMP,
2110                 ip_masq_user_info
2111         });
2112 -#endif 
2113 +#endif /* CONFIG_PROC_FS */
2114  #ifdef CONFIG_IP_MASQUERADE_IPAUTOFW
2115         ip_autofw_init();
2116  #endif
2117 @@ -2596,6 +3224,9 @@
2118  #endif
2119  #ifdef CONFIG_IP_MASQUERADE_MFW
2120         ip_mfw_init();
2121 +#endif
2122 +#ifdef CONFIG_IP_MASQUERADE_VS
2123 +        ip_vs_init();
2124  #endif
2125          ip_masq_app_init();
2126  
2127 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_vs.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs.c
2128 --- linux-2.2.19/net/ipv4/ip_vs.c       Thu Jan  1 08:00:00 1970
2129 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs.c      Thu Apr 19 22:37:31 2001
2130 @@ -0,0 +1,2963 @@
2131 +/*
2132 + * IPVS         An implementation of the IP virtual server support for the
2133 + *              LINUX operating system.  IPVS is now implemented as a part
2134 + *              of IP masquerading code. IPVS can be used to build a
2135 + *              high-performance and highly available server based on a
2136 + *              cluster of servers.
2137 + *
2138 + * Version:     $Id$
2139 + *
2140 + * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
2141 + *              Peter Kese <peter.kese@ijs.si>
2142 + *
2143 + *              This program is free software; you can redistribute it and/or
2144 + *              modify it under the terms of the GNU General Public License
2145 + *              as published by the Free Software Foundation; either version
2146 + *              2 of the License, or (at your option) any later version.
2147 + *
2148 + * Changes:
2149 + *     Wensong Zhang           :    fixed the overflow bug in ip_vs_procinfo
2150 + *     Wensong Zhang           :    added editing dest and service functions
2151 + *     Wensong Zhang           :    changed the names of some functions
2152 + *     Wensong Zhang           :    fixed the unlocking bug in ip_vs_del_dest
2153 + *     Wensong Zhang           :    added a separate hash table for IPVS
2154 + *     Wensong Zhang           :    added slow timer for IPVS masq entries
2155 + *     Julian Anastasov        :    fixed the number of active connections
2156 + *     Wensong Zhang           :    added persistent port
2157 + *     Wensong Zhang           :    fixed the incorrect lookup in hash table
2158 + *     Wensong Zhang           :    added server status checking
2159 + *     Wensong Zhang           :    fixed the incorrect slow timer vector layout
2160 + *     Wensong Zhang           :    fixed the sltimer added twice bug of mst
2161 + *     Julian Anastasov        :    fixed the IP_MASQ_F_VS_INACTIVE cleared bug after editing dest
2162 + *     Wensong Zhang           :    added the inactive connection counter
2163 + *     Wensong Zhang           :    changed the body of ip_vs_schedule
2164 + *     Julian Anastasov        :    fixed the unlocking bug in ip_vs_schedule
2165 + *     Julian Anastasov        :    fixed the uncounting bug in creating masqs by template
2166 + *     Wensong Zhang           :    changed some condition orders for a bit performance
2167 + *     Julian Anastasov        :    don't touch counters in ip_vs_unbind_masq for templates
2168 + *     Wensong Zhang           :    added the hash table for virtual services
2169 + *     Wensong Zhang           :    changed destination lists to d-linked lists
2170 + *     Wensong Zhang           :    changed the scheduler list to the d-linked list
2171 + *     Wensong Zhang           :    added new persistent service handling
2172 + *     Julian Anastasov        :    fixed the counting bug in ip_vs_unbind_masq again
2173 + *                                    (don't touch counters for templates)
2174 + *     Wensong Zhang           :    changed some IP_VS_ERR to IP_VS_DBG in the ip_vs_tunnel_xmit
2175 + *     Wensong Zhang           :    added different timeout support for persistent svc
2176 + *     Wensong Zhang           :    fixed the bug that persistent svc cannot be edited
2177 + *     Julian Anastasov        :    removed extra read_unlock in __ip_vs_lookup_service
2178 + *     Julian Anastasov        :    changed not to restart template timers if dest is unavailable
2179 + *     Julian Anastasov        :    added the destination trash
2180 + *     Wensong Zhang           :    added the update_service call in ip_vs_del_dest
2181 + *     Wensong Zhang           :    added the ip_vs_leave function
2182 + *     Lars Marowsky-Bree      :    added persistence granularity support
2183 + *     Julian Anastasov        :    changed some comestics things for debugging
2184 + *     Wensong Zhang           :    use vmalloc to allocate big ipvs hash table
2185 + *     Wensong Zhang           :    changed the tunneling/direct routing methods a little
2186 + *     Julian Anastasov        :    fixed the return bug of ip_vs_leave(-2 instead of -3)
2187 + *     Roberto Nibali          :    fixed the undefined variable bug in the IP_VS_DBG of ip_vs_dr_xmit
2188 + *     Julian Anastasov        :    changed ICMP_PROT_UNREACH to ICMP_PORT_UNREACH in ip_vs_leave
2189 + *     Wensong Zhang           :    added port zero support for persistent services
2190 + *     Wensong Zhang           :    fixed the bug that virtual ftp service blocks other services not listed in ipvs table
2191 + *     Wensong Zhang           :    invalidate a persistent template when its dest is unavailable
2192 + *     Julian Anastasov        :    changed two IP_VS_ERR calls to IP_VS_DBG
2193 + *     Wensong Zhang           :    added random drop of syn entries
2194 + *     Wensong Zhang           :    added random drop of UDP entris
2195 + *     Julian Anastasov        :    added droprate defense against DoS attack
2196 + *     Julian Anastasov        :    added secure_tcp defense against DoS attack
2197 + *     Wensong Zhang           :    revisited dropentry defense against DoS attach
2198 + *     Horms                   :    added the fwmark service feature
2199 + *     Wensong Zhang           :    changed to two service hash tables
2200 + *     Julian Anastasov        :    corrected trash_dest lookup for both
2201 + *                                  normal service and fwmark service
2202 + *
2203 + */
2204 +
2205 +#include <linux/config.h>
2206 +#include <linux/module.h>
2207 +#include <linux/types.h>
2208 +#include <linux/kernel.h>
2209 +#include <linux/errno.h>
2210 +#include <linux/vmalloc.h>
2211 +#include <linux/swap.h>
2212 +#include <net/ip_masq.h>
2213 +
2214 +#include <linux/sysctl.h>
2215 +#include <linux/ip_fw.h>
2216 +#include <linux/ip_masq.h>
2217 +#include <linux/proc_fs.h>
2218 +
2219 +#include <linux/inetdevice.h>
2220 +#include <linux/ip.h>
2221 +#include <net/icmp.h>
2222 +#include <net/ip.h>
2223 +#include <net/route.h>
2224 +#include <net/ip_vs.h>
2225 +
2226 +#ifdef CONFIG_KMOD
2227 +#include <linux/kmod.h>
2228 +#endif
2229 +
2230 +EXPORT_SYMBOL(register_ip_vs_scheduler);
2231 +EXPORT_SYMBOL(unregister_ip_vs_scheduler);
2232 +EXPORT_SYMBOL(ip_vs_bind_masq);
2233 +EXPORT_SYMBOL(ip_vs_unbind_masq);
2234 +EXPORT_SYMBOL(ip_vs_lookup_dest);
2235 +#ifdef CONFIG_IP_VS_DEBUG
2236 +EXPORT_SYMBOL(ip_vs_get_debug_level);
2237 +#endif
2238 +
2239 +int sysctl_ip_vs_drop_entry = 0;
2240 +int sysctl_ip_vs_drop_packet = 0;
2241 +int sysctl_ip_vs_secure_tcp = 0;
2242 +int sysctl_ip_vs_amemthresh = 1024;
2243 +int sysctl_ip_vs_am_droprate = 10;
2244 +
2245 +#ifdef CONFIG_IP_VS_DEBUG
2246 +static int sysctl_ip_vs_debug_level = 0;
2247 +
2248 +int ip_vs_get_debug_level(void)
2249 +{
2250 +       return sysctl_ip_vs_debug_level;
2251 +}
2252 +#endif
2253 +
2254 +
2255 +int ip_vs_dropentry = 0;
2256 +
2257 +static inline void update_defense_level(void)
2258 +{
2259 +       int ip_vs_amem = nr_free_pages+page_cache_size+(buffermem>>PAGE_SHIFT);
2260 +       int nomem = (ip_vs_amem < sysctl_ip_vs_amemthresh);
2261 +
2262 +       /* drop_entry */
2263 +       switch (sysctl_ip_vs_drop_entry) {
2264 +       case 0:
2265 +               ip_vs_dropentry = 0;
2266 +               break;
2267 +       case 1:
2268 +               if (nomem) {
2269 +                       ip_vs_dropentry = 1;
2270 +                       sysctl_ip_vs_drop_entry = 2;
2271 +               } else {
2272 +                       ip_vs_dropentry = 0;
2273 +               }
2274 +               break;
2275 +       case 2:
2276 +               if (nomem) {
2277 +                       ip_vs_dropentry = 1;
2278 +               } else {
2279 +                       ip_vs_dropentry = 0;
2280 +                       sysctl_ip_vs_drop_entry = 1;
2281 +               };
2282 +               break;
2283 +       case 3:
2284 +               ip_vs_dropentry = 1;
2285 +               break;
2286 +       }
2287 +
2288 +       /* drop_packet */
2289 +       switch (sysctl_ip_vs_drop_packet) {
2290 +       case 0:
2291 +               ip_masq_drop_rate = 0;
2292 +               break;
2293 +       case 1:
2294 +               if (nomem) {
2295 +                       ip_masq_drop_rate = ip_masq_drop_counter
2296 +                               = sysctl_ip_vs_amemthresh /
2297 +                               (sysctl_ip_vs_amemthresh-ip_vs_amem);
2298 +                       sysctl_ip_vs_drop_packet = 2;
2299 +               } else {
2300 +                       ip_masq_drop_rate = 0;
2301 +               }
2302 +               break;
2303 +       case 2:
2304 +               if (nomem) {
2305 +                       ip_masq_drop_rate = ip_masq_drop_counter
2306 +                               = sysctl_ip_vs_amemthresh /
2307 +                               (sysctl_ip_vs_amemthresh-ip_vs_amem);
2308 +               } else {
2309 +                       ip_masq_drop_rate = 0;
2310 +                       sysctl_ip_vs_drop_packet = 1;
2311 +               }
2312 +               break;
2313 +       case 3:
2314 +               ip_masq_drop_rate = sysctl_ip_vs_am_droprate;
2315 +               break;
2316 +       }
2317 +
2318 +       /* secure_tcp */
2319 +       switch (sysctl_ip_vs_secure_tcp) {
2320 +       case 0:
2321 +               ip_masq_secure_tcp_set(0);
2322 +               break;
2323 +       case 1:
2324 +               if (nomem) {
2325 +                       ip_masq_secure_tcp_set(1);
2326 +                       sysctl_ip_vs_secure_tcp = 2;
2327 +               } else {
2328 +                       ip_masq_secure_tcp_set(0);
2329 +               }
2330 +               break;
2331 +       case 2:
2332 +               if (nomem) {
2333 +                       ip_masq_secure_tcp_set(1);
2334 +               } else {
2335 +                       ip_masq_secure_tcp_set(0);
2336 +                       sysctl_ip_vs_secure_tcp = 1;
2337 +               }
2338 +               break;
2339 +       case 3:
2340 +               ip_masq_secure_tcp_set(1);
2341 +               break;
2342 +       }
2343 +}
2344 +
2345 +
2346 +static inline int todrop_entry(struct ip_masq *ms)
2347 +{
2348 +       /*
2349 +        * The drop rate array needs tuning for real environments.
2350 +        */
2351 +       static char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
2352 +       static char todrop_counter[9] = {0};
2353 +       int i;
2354 +
2355 +       if (ms->timeout+jiffies-ms->timer.expires < 60*HZ)
2356 +               return 0;
2357 +
2358 +       i = atomic_read(&ms->in_pkts);
2359 +       if (i > 8) return 0;
2360 +
2361 +       if (!todrop_rate[i]) return 0;
2362 +       if (--todrop_counter[i] > 0) return 0;
2363 +
2364 +       todrop_counter[i] = todrop_rate[i];
2365 +       return 1;
2366 +}
2367 +
2368 +static inline void ip_vs_random_dropentry(void)
2369 +{
2370 +       int i;
2371 +       struct ip_masq *ms;
2372 +       struct list_head *l,*e;
2373 +       struct ip_masq *mst;
2374 +       void (*fn)(unsigned long);
2375 +
2376 +       /*
2377 +        * Randomly scan 1/32 of the whole table every second
2378 +        */
2379 +       for (i=0; i < (IP_VS_TAB_SIZE>>5); i++) {
2380 +               /*
2381 +                *  Lock is actually needed in this loop.
2382 +                */
2383 +               write_lock(&__ip_masq_lock);
2384 +
2385 +               l = &ip_vs_table[net_random()&IP_VS_TAB_MASK];
2386 +               for (e=l->next; e!=l; e=e->next) {
2387 +                       ms = list_entry(e, struct ip_masq, m_list);
2388 +                       if (ms->dport == 0)
2389 +                               /* masq template */
2390 +                               continue;
2391 +                       switch(ms->state) {
2392 +                       case IP_MASQ_S_SYN_RECV:
2393 +                       case IP_MASQ_S_SYNACK:
2394 +                               break;
2395 +
2396 +                       case IP_MASQ_S_ESTABLISHED:
2397 +                       case IP_MASQ_S_UDP:
2398 +                               if (todrop_entry(ms))
2399 +                                       break;
2400 +                               continue;
2401 +
2402 +                       default:
2403 +                               continue;
2404 +                       }
2405 +
2406 +                       /*
2407 +                        * Drop the entry, and drop its mst if not referenced
2408 +                        */
2409 +                       write_unlock(&__ip_masq_lock);
2410 +                       IP_VS_DBG(4, "Drop masq\n");
2411 +                       mst = ms->control;
2412 +                       fn = (ms->timer).function;
2413 +                       del_sltimer(&ms->timer);
2414 +                       fn((unsigned long)ms);
2415 +                       if (mst && !atomic_read(&mst->n_control)) {
2416 +                               IP_VS_DBG(4, "Drop masq template\n");
2417 +                               del_sltimer(&mst->timer);
2418 +                               fn((unsigned long)mst);
2419 +                       }
2420 +                       write_lock(&__ip_masq_lock);
2421 +               }
2422 +               write_unlock(&__ip_masq_lock);
2423 +       }
2424 +}
2425 +
2426 +
2427 +/*
2428 + * The following block implements slow timers for IPVS, most code is stolen
2429 + * from linux/kernel/sched.c
2430 + * Slow timer is used to avoid the overhead of cascading timers, when lots
2431 + * of masq entries (>50,000) are cluttered in the system.
2432 + */
2433 +#define SHIFT_BITS 6
2434 +#define TVN_BITS 8
2435 +#define TVR_BITS 10
2436 +#define TVN_SIZE (1 << TVN_BITS)
2437 +#define TVR_SIZE (1 << TVR_BITS)
2438 +#define TVN_MASK (TVN_SIZE - 1)
2439 +#define TVR_MASK (TVR_SIZE - 1)
2440 +
2441 +struct sltimer_vec {
2442 +       int index;
2443 +       struct timer_list *vec[TVN_SIZE];
2444 +};
2445 +
2446 +struct sltimer_vec_root {
2447 +       int index;
2448 +       struct timer_list *vec[TVR_SIZE];
2449 +};
2450 +
2451 +static struct sltimer_vec sltv3 = { 0 };
2452 +static struct sltimer_vec sltv2 = { 0 };
2453 +static struct sltimer_vec_root sltv1 = { 0 };
2454 +
2455 +static struct sltimer_vec * const sltvecs[] = {
2456 +       (struct sltimer_vec *)&sltv1, &sltv2, &sltv3
2457 +};
2458 +
2459 +#define NOOF_SLTVECS (sizeof(sltvecs) / sizeof(sltvecs[0]))
2460 +
2461 +static unsigned long sltimer_jiffies = 0;
2462 +
2463 +static inline void insert_sltimer(struct timer_list *timer,
2464 +                               struct timer_list **vec, int idx)
2465 +{
2466 +       if ((timer->next = vec[idx]))
2467 +               vec[idx]->prev = timer;
2468 +       vec[idx] = timer;
2469 +       timer->prev = (struct timer_list *)&vec[idx];
2470 +}
2471 +
2472 +static inline void internal_add_sltimer(struct timer_list *timer)
2473 +{
2474 +       /*
2475 +        * must be cli-ed when calling this
2476 +        */
2477 +       unsigned long expires = timer->expires;
2478 +       unsigned long idx = (expires - sltimer_jiffies) >> SHIFT_BITS;
2479 +
2480 +       if (idx < TVR_SIZE) {
2481 +               int i = (expires >> SHIFT_BITS) & TVR_MASK;
2482 +               insert_sltimer(timer, sltv1.vec, i);
2483 +       } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
2484 +               int i = (expires >> (SHIFT_BITS+TVR_BITS)) & TVN_MASK;
2485 +               insert_sltimer(timer, sltv2.vec, i);
2486 +       } else if ((signed long) idx < 0) {
2487 +               /*
2488 +                * can happen if you add a timer with expires == jiffies,
2489 +                * or you set a timer to go off in the past
2490 +                */
2491 +               insert_sltimer(timer, sltv1.vec, sltv1.index);
2492 +       } else if (idx <= 0xffffffffUL) {
2493 +               int i = (expires >> (SHIFT_BITS+TVR_BITS+TVN_BITS)) & TVN_MASK;
2494 +               insert_sltimer(timer, sltv3.vec, i);
2495 +       } else {
2496 +               /* Can only get here on architectures with 64-bit jiffies */
2497 +               timer->next = timer->prev = timer;
2498 +       }
2499 +}
2500 +
2501 +rwlock_t  sltimerlist_lock = RW_LOCK_UNLOCKED;
2502 +
2503 +void add_sltimer(struct timer_list *timer)
2504 +{
2505 +       write_lock(&sltimerlist_lock);
2506 +       if (timer->prev)
2507 +               goto bug;
2508 +       internal_add_sltimer(timer);
2509 +out:
2510 +       write_unlock(&sltimerlist_lock);
2511 +       return;
2512 +
2513 +bug:
2514 +       printk("bug: kernel sltimer added twice at %p.\n",
2515 +              __builtin_return_address(0));
2516 +       goto out;
2517 +}
2518 +
2519 +static inline int detach_sltimer(struct timer_list *timer)
2520 +{
2521 +       struct timer_list *prev = timer->prev;
2522 +       if (prev) {
2523 +               struct timer_list *next = timer->next;
2524 +               prev->next = next;
2525 +               if (next)
2526 +                       next->prev = prev;
2527 +               return 1;
2528 +       }
2529 +       return 0;
2530 +}
2531 +
2532 +void mod_sltimer(struct timer_list *timer, unsigned long expires)
2533 +{
2534 +       write_lock(&sltimerlist_lock);
2535 +       timer->expires = expires;
2536 +       detach_sltimer(timer);
2537 +       internal_add_sltimer(timer);
2538 +       write_unlock(&sltimerlist_lock);
2539 +}
2540 +
2541 +int del_sltimer(struct timer_list * timer)
2542 +{
2543 +       int ret;
2544 +
2545 +       write_lock(&sltimerlist_lock);
2546 +       ret = detach_sltimer(timer);
2547 +       timer->next = timer->prev = 0;
2548 +       write_unlock(&sltimerlist_lock);
2549 +       return ret;
2550 +}
2551 +
2552 +
2553 +static inline void cascade_sltimers(struct sltimer_vec *tv)
2554 +{
2555 +       /*
2556 +        * cascade all the timers from tv up one level
2557 +        */
2558 +       struct timer_list *timer;
2559 +       timer = tv->vec[tv->index];
2560 +       /*
2561 +        * We are removing _all_ timers from the list, so we don't  have to
2562 +        * detach them individually, just clear the list afterwards.
2563 +        */
2564 +       while (timer) {
2565 +               struct timer_list *tmp = timer;
2566 +               timer = timer->next;
2567 +               internal_add_sltimer(tmp);
2568 +       }
2569 +       tv->vec[tv->index] = NULL;
2570 +       tv->index = (tv->index + 1) & TVN_MASK;
2571 +}
2572 +
2573 +static inline void run_sltimer_list(void)
2574 +{
2575 +       write_lock(&sltimerlist_lock);
2576 +       while ((long)(jiffies - sltimer_jiffies) >= 0) {
2577 +               struct timer_list *timer;
2578 +               if (!sltv1.index) {
2579 +                       int n = 1;
2580 +                       do {
2581 +                               cascade_sltimers(sltvecs[n]);
2582 +                       } while (sltvecs[n]->index == 1 && ++n < NOOF_SLTVECS);
2583 +               }
2584 +               while ((timer = sltv1.vec[sltv1.index])) {
2585 +                       void (*fn)(unsigned long) = timer->function;
2586 +                       unsigned long data = timer->data;
2587 +                       detach_sltimer(timer);
2588 +                       timer->next = timer->prev = NULL;
2589 +                       write_unlock(&sltimerlist_lock);
2590 +                       fn(data);
2591 +                       write_lock(&sltimerlist_lock);
2592 +               }
2593 +               sltimer_jiffies += 1<<SHIFT_BITS;
2594 +               sltv1.index = (sltv1.index + 1) & TVR_MASK;
2595 +       }
2596 +       write_unlock(&sltimerlist_lock);
2597 +}
2598 +
2599 +static void sltimer_handler(unsigned long data);
2600 +
2601 +struct timer_list       slow_timer = {
2602 +       NULL, NULL,
2603 +       0, 0,
2604 +       sltimer_handler,
2605 +};
2606 +
2607 +/*
2608 + *  Slow timer handler is activated every second
2609 + */
2610 +#define SLTIMER_PERIOD       1*HZ
2611 +
2612 +void sltimer_handler(unsigned long data)
2613 +{
2614 +       run_sltimer_list();
2615 +
2616 +       update_defense_level();
2617 +       if (ip_vs_dropentry)
2618 +               ip_vs_random_dropentry();
2619 +
2620 +       mod_timer(&slow_timer, (jiffies + SLTIMER_PERIOD));
2621 +}
2622 +
2623 +
2624 +/*
2625 + *  The port number of FTP service (in network order).
2626 + */
2627 +#define FTPPORT  __constant_htons(21)
2628 +#define FTPDATA  __constant_htons(20)
2629 +
2630 +/*
2631 + *  Lock for IPVS
2632 + */
2633 +rwlock_t __ip_vs_lock = RW_LOCK_UNLOCKED;
2634 +
2635 +/*
2636 + *  Hash table: for input and output packets lookups of IPVS
2637 + */
2638 +#define IP_MASQ_NTABLES 3
2639 +
2640 +struct list_head *ip_vs_table;
2641 +
2642 +/*
2643 + *  Hash table: for virtual service lookups
2644 + */
2645 +#define IP_VS_SVC_TAB_BITS 8
2646 +#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
2647 +#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
2648 +
2649 +/* the service table hashed by <protocol, addr, port> */
2650 +struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
2651 +/* the service table hashed by fwmark */
2652 +struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
2653 +
2654 +/*
2655 + *  Hash table: for real service lookups
2656 + */
2657 +#define IP_VS_RTAB_BITS 4
2658 +#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
2659 +#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
2660 +
2661 +struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
2662 +
2663 +/*
2664 + * IPVS scheduler list
2665 + */
2666 +struct list_head ip_vs_schedulers;
2667 +
2668 +/*
2669 + * Trash for destinations
2670 + */
2671 +struct list_head ip_vs_dest_trash;
2672 +
2673 +/*
2674 + * FTP & NULL virtual service counters
2675 + */
2676 +atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
2677 +atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
2678 +
2679 +/*
2680 + *  Register a scheduler in the scheduler list
2681 + */
2682 +int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
2683 +{
2684 +       if (!scheduler) {
2685 +               IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
2686 +               return -EINVAL;
2687 +       }
2688 +
2689 +       if (!scheduler->name) {
2690 +               IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
2691 +               return -EINVAL;
2692 +       }
2693 +
2694 +       if (scheduler->n_list.next != &scheduler->n_list) {
2695 +               IP_VS_ERR("register_ip_vs_scheduler(): scheduler already linked\n");
2696 +               return -EINVAL;
2697 +       }
2698 +
2699 +       /*
2700 +        *      Add it into the d-linked scheduler list
2701 +        */
2702 +       list_add(&scheduler->n_list, &ip_vs_schedulers);
2703 +
2704 +       return 0;
2705 +}
2706 +
2707 +
2708 +/*
2709 + *  Unregister a scheduler in the scheduler list
2710 + */
2711 +int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
2712 +{
2713 +       if (!scheduler) {
2714 +               IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
2715 +               return -EINVAL;
2716 +       }
2717 +
2718 +       /*
2719 +        *      Only allow unregistration if it is not referenced
2720 +        */
2721 +       if (atomic_read(&scheduler->refcnt))  {
2722 +               IP_VS_ERR("unregister_ip_vs_scheduler(): is in use by %d guys. failed\n",
2723 +                         atomic_read(&scheduler->refcnt));
2724 +               return -EINVAL;
2725 +       }
2726 +
2727 +       if (scheduler->n_list.next == &scheduler->n_list) {
2728 +               IP_VS_ERR("unregister_ip_vs_scheduler(): scheduler is not in the list. failed\n");
2729 +               return -EINVAL;
2730 +       }
2731 +
2732 +       /*
2733 +        *      Removed it from the d-linked scheduler list
2734 +        */
2735 +       list_del(&scheduler->n_list);
2736 +
2737 +       return 0;
2738 +}
2739 +
2740 +
2741 +/*
2742 + *  Bind a service with a scheduler
2743 + *  Must called with the __ip_vs_lock lock, and return bool.
2744 + */
2745 +int ip_vs_bind_scheduler(struct ip_vs_service *svc,
2746 +                        struct ip_vs_scheduler *scheduler)
2747 +{
2748 +       if (svc == NULL) {
2749 +               IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
2750 +               return -EINVAL;
2751 +       }
2752 +       if (scheduler == NULL) {
2753 +               IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
2754 +               return -EINVAL;
2755 +       }
2756 +
2757 +       svc->scheduler = scheduler;
2758 +       atomic_inc(&scheduler->refcnt);
2759 +
2760 +       if(scheduler->init_service)
2761 +               if(scheduler->init_service(svc) != 0) {
2762 +                       IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
2763 +                       return -EINVAL;
2764 +               }
2765 +
2766 +       return 0;
2767 +}
2768 +
2769 +
2770 +/*
2771 + *  Unbind a service with its scheduler
2772 + *  Must called with the __ip_vs_lock lock, and return bool.
2773 + */
2774 +int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
2775 +{
2776 +       struct ip_vs_scheduler *sched;
2777 +
2778 +       if (svc == NULL) {
2779 +               IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
2780 +               return -EINVAL;
2781 +       }
2782 +
2783 +       sched = svc->scheduler;
2784 +       if (sched == NULL) {
2785 +               IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
2786 +               return -EINVAL;
2787 +       }
2788 +
2789 +       if(sched->done_service)
2790 +               if(sched->done_service(svc) != 0) {
2791 +                       IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
2792 +                       return -EINVAL;
2793 +               }
2794 +
2795 +       atomic_dec(&sched->refcnt);
2796 +       svc->scheduler = NULL;
2797 +
2798 +       return 0;
2799 +}
2800 +
2801 +
2802 +/*
2803 + *    Get scheduler in the scheduler list by name
2804 + */
2805 +struct ip_vs_scheduler * ip_vs_sched_getbyname(const char *sched_name)
2806 +{
2807 +       struct ip_vs_scheduler *sched;
2808 +       struct list_head *l, *e;
2809 +
2810 +       IP_VS_DBG(6, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
2811 +                 sched_name);
2812 +
2813 +       read_lock_bh(&__ip_vs_lock);
2814 +
2815 +       l = &ip_vs_schedulers;
2816 +       for (e=l->next; e!=l; e=e->next) {
2817 +               sched = list_entry(e, struct ip_vs_scheduler, n_list);
2818 +               if (strcmp(sched_name, sched->name)==0) {
2819 +                       /* HIT */
2820 +                       read_unlock_bh(&__ip_vs_lock);
2821 +                       return sched;
2822 +               }
2823 +       }
2824 +
2825 +       read_unlock_bh(&__ip_vs_lock);
2826 +       return NULL;
2827 +}
2828 +
2829 +
2830 +/*
2831 + *  Lookup scheduler and try to load it if it doesn't exist
2832 + */
2833 +struct ip_vs_scheduler * ip_vs_lookup_scheduler(const char *sched_name)
2834 +{
2835 +       struct ip_vs_scheduler *sched;
2836 +
2837 +       /*
2838 +        *  Search for the scheduler by sched_name
2839 +        */
2840 +       sched = ip_vs_sched_getbyname(sched_name);
2841 +
2842 +       /*
2843 +        *  If scheduler not found, load the module and search again
2844 +        */
2845 +       if (sched == NULL) {
2846 +               char module_name[IP_MASQ_TNAME_MAX+8];
2847 +               sprintf(module_name,"ip_vs_%s",sched_name);
2848 +#ifdef CONFIG_KMOD
2849 +               request_module(module_name);
2850 +#endif /* CONFIG_KMOD */
2851 +               sched = ip_vs_sched_getbyname(sched_name);
2852 +       }
2853 +
2854 +       return sched;
2855 +}
2856 +
2857 +
2858 +/*
2859 + *     Returns hash value for IPVS masq entry
2860 + */
2861 +
2862 +static __inline__ unsigned
2863 +ip_vs_hash_key(unsigned proto, __u32 addr, __u16 port)
2864 +{
2865 +       unsigned addrh = ntohl(addr);
2866 +
2867 +       return (proto^addrh^(addrh>>IP_VS_TAB_BITS)^ntohs(port))
2868 +               & IP_VS_TAB_MASK;
2869 +}
2870 +
2871 +
2872 +/*
2873 + *     Hashes ip_masq in ip_vs_table by proto,addr,port.
2874 + *     should be called with locked tables.
2875 + *     returns bool success.
2876 + */
2877 +int ip_vs_hash(struct ip_masq *ms)
2878 +{
2879 +       unsigned hash;
2880 +
2881 +       if (ms->flags & IP_MASQ_F_HASHED) {
2882 +               IP_VS_ERR("ip_vs_hash(): request for already hashed, "
2883 +                         "called from %p\n", __builtin_return_address(0));
2884 +               return 0;
2885 +       }
2886 +
2887 +       /*
2888 +        * Note: because ip_masq_put sets masq expire only if its
2889 +        *       refcnt==IP_MASQ_NTABLES, otherwise the masq entry
2890 +        *       will never expire.
2891 +        */
2892 +       atomic_add(IP_MASQ_NTABLES, &ms->refcnt);
2893 +
2894 +       /*
2895 +        *      Hash by proto,d{addr,port},
2896 +        *      which are client address and port in IPVS.
2897 +        */
2898 +       hash = ip_vs_hash_key(ms->protocol, ms->daddr, ms->dport);
2899 +       list_add(&ms->m_list, &ip_vs_table[hash]);
2900 +
2901 +       ms->flags |= IP_MASQ_F_HASHED;
2902 +       return 1;
2903 +}
2904 +
2905 +
2906 +/*
2907 + *     Unhashes ip_masq from ip_vs_table.
2908 + *     should be called with locked tables.
2909 + *     returns bool success.
2910 + */
2911 +int ip_vs_unhash(struct ip_masq *ms)
2912 +{
2913 +       if (!(ms->flags & IP_MASQ_F_HASHED)) {
2914 +               IP_VS_ERR("ip_vs_unhash(): request for unhash flagged, "
2915 +                         "called from %p\n", __builtin_return_address(0));
2916 +               return 0;
2917 +       }
2918 +
2919 +       /*
2920 +        * Remove it from the list and decrease its reference counter.
2921 +        */
2922 +       list_del(&ms->m_list);
2923 +       atomic_sub(IP_MASQ_NTABLES, &ms->refcnt);
2924 +
2925 +       ms->flags &= ~IP_MASQ_F_HASHED;
2926 +       return 1;
2927 +}
2928 +
2929 +
2930 +/*
2931 + *  Gets ip_masq associated with supplied parameters in the ip_vs_table.
2932 + *  Called for pkts coming from OUTside-to-INside.
2933 + *     s_addr, s_port: pkt source address (foreign host)
2934 + *     d_addr, d_port: pkt dest address (load balancer)
2935 + *  Caller must lock tables
2936 + */
2937 +struct ip_masq * __ip_vs_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
2938 +{
2939 +       unsigned hash;
2940 +       struct ip_masq *ms;
2941 +       struct list_head *l,*e;
2942 +
2943 +       hash = ip_vs_hash_key(protocol, s_addr, s_port);
2944 +
2945 +       l = &ip_vs_table[hash];
2946 +       for (e=l->next; e!=l; e=e->next) {
2947 +               ms = list_entry(e, struct ip_masq, m_list);
2948 +               if (s_addr==ms->daddr && s_port==ms->dport &&
2949 +                   d_port==ms->mport && d_addr==ms->maddr &&
2950 +                   protocol==ms->protocol) {
2951 +                       /* HIT */
2952 +                       atomic_inc(&ms->refcnt);
2953 +                       goto out;
2954 +               }
2955 +       }
2956 +       ms = NULL;
2957 +
2958 +  out:
2959 +       IP_VS_DBG(7, "look/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
2960 +                 masq_proto_name(protocol),
2961 +                 NIPQUAD(s_addr), ntohs(s_port),
2962 +                 NIPQUAD(d_addr), ntohs(d_port),
2963 +                 ms?"hit":"not hit");
2964 +
2965 +       return ms;
2966 +}
2967 +
2968 +
2969 +/*
2970 + *  Gets ip_masq associated with supplied parameters in the ip_vs_table.
2971 + *  Called for pkts coming from inside-to-OUTside.
2972 + *     s_addr, s_port: pkt source address (inside host)
2973 + *     d_addr, d_port: pkt dest address (foreign host)
2974 + *  Caller must lock tables
2975 + */
2976 +struct ip_masq * __ip_vs_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
2977 +{
2978 +       unsigned hash;
2979 +       struct ip_masq *ms;
2980 +       struct list_head *l,*e;
2981 +
2982 +       /*
2983 +        *      Check for "full" addressed entries
2984 +        */
2985 +       hash = ip_vs_hash_key(protocol, d_addr, d_port);
2986 +
2987 +       l = &ip_vs_table[hash];
2988 +       for (e=l->next; e!=l; e=e->next) {
2989 +               ms = list_entry(e, struct ip_masq, m_list);
2990 +               if (d_addr == ms->daddr && d_port == ms->dport &&
2991 +                   s_port == ms->sport && s_addr == ms->saddr &&
2992 +                   protocol == ms->protocol) {
2993 +                       /* HIT */
2994 +                       atomic_inc(&ms->refcnt);
2995 +                       goto out;
2996 +               }
2997 +       }
2998 +       ms = NULL;
2999 +
3000 +  out:
3001 +       IP_VS_DBG(7, "look/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
3002 +                 masq_proto_name(protocol),
3003 +                 NIPQUAD(s_addr), ntohs(s_port),
3004 +                 NIPQUAD(d_addr), ntohs(d_port),
3005 +                 ms?"hit":"not hit");
3006 +
3007 +       return ms;
3008 +}
3009 +
3010 +
3011 +/*
3012 + *     Called by ip_vs_sched_persist to look for masq template.
3013 + */
3014 +static __inline__ struct ip_masq *ip_vs_in_get
3015 +(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
3016 +{
3017 +       struct ip_masq *ms;
3018 +
3019 +       read_lock(&__ip_masq_lock);
3020 +       ms = __ip_vs_in_get(protocol, s_addr, s_port, d_addr, d_port);
3021 +       read_unlock(&__ip_masq_lock);
3022 +
3023 +       return ms;
3024 +}
3025 +
3026 +
3027 +/*
3028 + *     Returns hash value for virtual service
3029 + */
3030 +static __inline__ unsigned
3031 +ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
3032 +{
3033 +       register unsigned porth = ntohs(port);
3034 +
3035 +       return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
3036 +               & IP_VS_SVC_TAB_MASK;
3037 +}
3038 +
3039 +/*
3040 + *     Returns hash value of fwmark for virtual service lookup
3041 + */
3042 +static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
3043 +{
3044 +       return fwmark & IP_VS_SVC_TAB_MASK;
3045 +}
3046 +
3047 +/*
3048 + *     Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
3049 + *      or in the ip_vs_svc_fwm_table by fwmark.
3050 + *      Should be called with locked tables.
3051 + *     Returns bool success.
3052 + */
3053 +int ip_vs_svc_hash(struct ip_vs_service *svc)
3054 +{
3055 +       unsigned hash;
3056 +
3057 +       if (svc->flags & IP_VS_SVC_F_HASHED) {
3058 +               IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
3059 +                         "called from %p\n", __builtin_return_address(0));
3060 +               return 0;
3061 +       }
3062 +
3063 +       if (svc->fwmark == 0) {
3064 +               /*
3065 +                *      Hash by <protocol,addr,port> in ip_vs_svc_table
3066 +                */
3067 +               hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
3068 +               list_add(&svc->s_list, &ip_vs_svc_table[hash]);
3069 +       } else {
3070 +               /*
3071 +                *      Hash by fwmark in ip_vs_svc_fwm_table
3072 +                */
3073 +               hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
3074 +               list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
3075 +       }
3076 +
3077 +       svc->flags |= IP_VS_SVC_F_HASHED;
3078 +       return 1;
3079 +}
3080 +
3081 +
3082 +/*
3083 + *     Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
3084 + *     Should be called with locked tables.
3085 + *     Returns bool success.
3086 + */
3087 +int ip_vs_svc_unhash(struct ip_vs_service *svc)
3088 +{
3089 +       if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
3090 +               IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
3091 +                         "called from %p\n", __builtin_return_address(0));
3092 +               return 0;
3093 +       }
3094 +
3095 +       if (svc->fwmark == 0) {
3096 +               /*
3097 +                * Remove it from the ip_vs_svc_table table.
3098 +                */
3099 +               list_del(&svc->s_list);
3100 +       } else {
3101 +               /*
3102 +                * Remove it from the ip_vs_svc_fwm_table table.
3103 +                */
3104 +               list_del(&svc->f_list);
3105 +       }
3106 +
3107 +       svc->flags &= ~IP_VS_SVC_F_HASHED;
3108 +       return 1;
3109 +}
3110 +
3111 +
3112 +/*
3113 + *  Lookup service by {proto,addr,port} in the service table.
3114 + */
3115 +static __inline__ struct ip_vs_service *
3116 +__ip_vs_lookup_service(__u16 protocol, __u32 vaddr, __u16 vport)
3117 +{
3118 +       unsigned hash;
3119 +       struct ip_vs_service *svc;
3120 +       struct list_head *l,*e;
3121 +
3122 +       /*
3123 +        *      Check for "full" addressed entries
3124 +        *      Note: as long as IP_VS_SVC_TAB_BITS is larger than zero,
3125 +        *      <TCP,addr,port> and <UDP,addr,port> have different hash
3126 +        *      keys, there is no need to do protcol checking.
3127 +        */
3128 +       hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
3129 +
3130 +       l = &ip_vs_svc_table[hash];
3131 +       for (e=l->next; e!=l; e=e->next) {
3132 +               svc = list_entry(e, struct ip_vs_service, s_list);
3133 +               if ((svc->addr == vaddr)
3134 +                   && (svc->port == vport)) {
3135 +                       /* HIT */
3136 +                       return svc;
3137 +               }
3138 +       }
3139 +
3140 +       return NULL;
3141 +}
3142 +
3143 +
3144 +/*
3145 + *  Lookup service by fwmark in the service table.
3146 + */
3147 +static __inline__ struct ip_vs_service * __ip_vs_lookup_svc_fwm(__u32 fwmark)
3148 +{
3149 +       unsigned hash;
3150 +       struct ip_vs_service *svc;
3151 +       struct list_head *l,*e;
3152 +
3153 +       /*
3154 +        *      Check for fwmark-indexed entries
3155 +        */
3156 +       hash = ip_vs_svc_fwm_hashkey(fwmark);
3157 +
3158 +       l = &ip_vs_svc_fwm_table[hash];
3159 +       for (e=l->next; e!=l; e=e->next) {
3160 +               svc = list_entry(e, struct ip_vs_service, f_list);
3161 +               if (svc->fwmark == fwmark) {
3162 +                       /* HIT */
3163 +                       return svc;
3164 +               }
3165 +       }
3166 +
3167 +       return NULL;
3168 +}
3169 +
3170 +struct ip_vs_service *
3171 +ip_vs_lookup_service(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
3172 +{
3173 +       struct ip_vs_service *svc;
3174 +
3175 +       read_lock(&__ip_vs_lock);
3176 +
3177 +       if (fwmark) {
3178 +               /*
3179 +                *      Check the table hashed by fwmark first
3180 +                */
3181 +               svc = __ip_vs_lookup_svc_fwm(fwmark);
3182 +               if (svc)
3183 +                       goto out;
3184 +       }
3185 +
3186 +       /*
3187 +        *      Check the table hashed by <protocol,addr,port>
3188 +        *      first for "full" addressed entries
3189 +        */
3190 +       svc = __ip_vs_lookup_service(protocol, vaddr, vport);
3191 +
3192 +       if (svc == NULL
3193 +           && protocol == IPPROTO_TCP
3194 +           && atomic_read(&ip_vs_ftpsvc_counter)
3195 +           && (vport==FTPDATA || ntohs(vport)>=PROT_SOCK)){
3196 +               /*
3197 +                * Check if ftp service entry exists, the packet
3198 +                * might belong to FTP data connections.
3199 +                */
3200 +               svc = __ip_vs_lookup_service(protocol, vaddr, FTPPORT);
3201 +       }
3202 +
3203 +       if (svc == NULL
3204 +           && atomic_read(&ip_vs_nullsvc_counter)) {
3205 +               /*
3206 +                * Check if the catch-all port (port zero) exists
3207 +                */
3208 +               svc = __ip_vs_lookup_service(protocol, vaddr, 0);
3209 +       }
3210 +
3211 + out:
3212 +       read_unlock(&__ip_vs_lock);
3213 +
3214 +       IP_VS_DBG(5, "lookup_service fwm %d %s %u.%u.%u.%u:%d %s\n",
3215 +                 fwmark,
3216 +                 masq_proto_name(protocol),
3217 +                 NIPQUAD(vaddr), ntohs(vport),
3218 +                 svc?"hit":"not hit");
3219 +
3220 +       return svc;
3221 +}
3222 +
3223 +
3224 +/*
3225 + *     Returns hash value for real service
3226 + */
3227 +static __inline__ unsigned
3228 +ip_vs_rs_hashkey(__u32 addr, __u16 port)
3229 +{
3230 +       register unsigned porth = ntohs(port);
3231 +
3232 +       return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) & IP_VS_RTAB_MASK;
3233 +}
3234 +
3235 +/*
3236 + *     Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
3237 + *     should be called with locked tables.
3238 + *     returns bool success.
3239 + */
3240 +int ip_vs_rs_hash(struct ip_vs_dest *dest)
3241 +{
3242 +       unsigned hash;
3243 +
3244 +       if (!list_empty(&dest->d_list)) {
3245 +               return 0;
3246 +       }
3247 +
3248 +       /*
3249 +        *      Hash by proto,addr,port,
3250 +        *      which are the parameters of the real service.
3251 +        */
3252 +       hash = ip_vs_rs_hashkey(dest->addr, dest->port);
3253 +       list_add(&dest->d_list, &ip_vs_rtable[hash]);
3254 +
3255 +       return 1;
3256 +}
3257 +
3258 +/*
3259 + *     UNhashes ip_vs_dest from ip_vs_rtable.
3260 + *     should be called with locked tables.
3261 + *     returns bool success.
3262 + */
3263 +int ip_vs_rs_unhash(struct ip_vs_dest *dest)
3264 +{
3265 +       /*
3266 +        * Remove it from the ip_vs_rtable table.
3267 +        */
3268 +       if (!list_empty(&dest->d_list)) {
3269 +               list_del(&dest->d_list);
3270 +               INIT_LIST_HEAD(&dest->d_list);
3271 +       }
3272 +
3273 +       return 1;
3274 +}
3275 +
3276 +/*
3277 + *  Lookup real service by {proto,addr,port} in the real service table.
3278 + */
3279 +struct ip_vs_dest * __ip_vs_lookup_real_service(__u16 protocol,
3280 +                                               __u32 daddr, __u16 dport)
3281 +{
3282 +       unsigned hash;
3283 +       struct ip_vs_dest *dest;
3284 +       struct list_head *l,*e;
3285 +
3286 +       /*
3287 +        *      Check for "full" addressed entries
3288 +        *      Return the first found entry
3289 +        */
3290 +       hash = ip_vs_rs_hashkey(daddr, dport);
3291 +
3292 +       l = &ip_vs_rtable[hash];
3293 +       for (e=l->next; e!=l; e=e->next) {
3294 +               dest = list_entry(e, struct ip_vs_dest, d_list);
3295 +               if ((dest->addr == daddr)
3296 +                   && (dest->port == dport)
3297 +                   && ((dest->protocol == protocol) || dest->vfwmark)) {
3298 +                       /* HIT */
3299 +                       return dest;
3300 +               }
3301 +       }
3302 +
3303 +       return NULL;
3304 +}
3305 +
3306 +/*
3307 + *  Lookup destination by {addr,port} in the given service
3308 + */
3309 +struct ip_vs_dest * ip_vs_lookup_dest(struct ip_vs_service *svc,
3310 +                                     __u32 daddr, __u16 dport)
3311 +{
3312 +       struct ip_vs_dest *dest;
3313 +       struct list_head *l, *e;
3314 +
3315 +       read_lock_bh(&__ip_vs_lock);
3316 +
3317 +       /*
3318 +        * Find the destination for the given service
3319 +        */
3320 +       l = &svc->destinations;
3321 +       for (e=l->next; e!=l; e=e->next) {
3322 +               dest = list_entry(e, struct ip_vs_dest, n_list);
3323 +               if ((dest->addr == daddr) && (dest->port == dport)) {
3324 +                       /* HIT */
3325 +                       read_unlock_bh(&__ip_vs_lock);
3326 +                       return dest;
3327 +               }
3328 +       }
3329 +
3330 +       read_unlock_bh(&__ip_vs_lock);
3331 +       return NULL;
3332 +}
3333 +
3334 +
3335 +/*
3336 + *  Lookup dest by {svc,addr,port} in the destination trash.
3337 + *  Called by ip_vs_add_dest with the __ip_vs_lock.
3338 + *  The destination trash is used to hold the destinations that are removed
3339 + *  from the service table but are still referenced by some masq entries.
3340 + *  The reason to add the destination trash is when the dest is temporary
3341 + *  down (either by administrator or by monitor program), the dest can be
3342 + *  picked back from the trash, the remaining connections to the dest can
3343 + *  continue, and the counting information of the dest is also useful for
3344 + *  scheduling.
3345 + */
3346 +struct ip_vs_dest * __ip_vs_get_trash_dest(struct ip_vs_service *svc,
3347 +                                          __u32 daddr, __u16 dport)
3348 +{
3349 +       struct ip_vs_dest *dest;
3350 +       struct list_head *l, *e;
3351 +
3352 +       /*
3353 +        * Find the destination in trash
3354 +        */
3355 +       l = &ip_vs_dest_trash;
3356 +       for (e=l->next; e!=l; e=e->next) {
3357 +               dest = list_entry(e, struct ip_vs_dest, n_list);
3358 +               IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%d still in trash, "
3359 +                         "refcnt=%d\n",
3360 +                         dest->vfwmark,
3361 +                         NIPQUAD(dest->addr), ntohs(dest->port),
3362 +                         atomic_read(&dest->refcnt));
3363 +               if (dest->addr == daddr &&
3364 +                   dest->port == dport &&
3365 +                   dest->vfwmark == svc->fwmark &&
3366 +                   (svc->fwmark ||
3367 +                    (dest->protocol == svc->protocol &&
3368 +                     dest->vaddr == svc->addr &&
3369 +                     dest->vport == svc->port))) {
3370 +                       /* HIT */
3371 +                       return dest;
3372 +               }
3373 +
3374 +               /*
3375 +                * Try to purge the destination from trash if not referenced
3376 +                */
3377 +               if (atomic_read(&dest->refcnt) == 1) {
3378 +                       IP_VS_DBG(3, "Remove destination %u/%u.%u.%u.%u:%d "
3379 +                                 "from trash\n",
3380 +                                 dest->vfwmark,
3381 +                                 NIPQUAD(dest->addr), ntohs(dest->port));
3382 +                       e = e->prev;
3383 +                       list_del(&dest->n_list);
3384 +                       kfree_s(dest, sizeof(*dest));
3385 +               }
3386 +       }
3387 +       return NULL;
3388 +}
3389 +
3390 +
3391 +/*
3392 + *  Update a destination in the given service
3393 + */
3394 +void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
3395 +       struct ip_masq_ctl *mctl)
3396 +{
3397 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
3398 +
3399 +       /*
3400 +        *    Set the weight and the flags
3401 +        */
3402 +       dest->weight = mm->weight;
3403 +       dest->masq_flags = mm->masq_flags;
3404 +
3405 +       dest->masq_flags |= IP_MASQ_F_VS;
3406 +       dest->masq_flags |= IP_MASQ_F_VS_INACTIVE;
3407 +
3408 +       /*
3409 +        *    Check if local node and update the flags
3410 +        */
3411 +       if (inet_addr_type(mm->daddr) == RTN_LOCAL) {
3412 +               dest->masq_flags = (dest->masq_flags & ~IP_MASQ_F_VS_FWD_MASK)
3413 +                       | IP_MASQ_F_VS_LOCALNODE;
3414 +       }
3415 +
3416 +       /*
3417 +        *    Set the IP_MASQ_F_VS_NO_OUTPUT flag if not masquerading
3418 +        */
3419 +       if ((dest->masq_flags & IP_MASQ_F_VS_FWD_MASK) != 0) {
3420 +               dest->masq_flags |= IP_MASQ_F_VS_NO_OUTPUT;
3421 +       } else {
3422 +               /*
3423 +                *    Put the real service in ip_vs_rtable if not present.
3424 +                *    For now only for NAT!
3425 +                */
3426 +               ip_vs_rs_hash(dest);
3427 +       }
3428 +
3429 +
3430 +       /*
3431 +        *    Set the dest status flags
3432 +        */
3433 +       dest->flags |= IP_VS_DEST_F_AVAILABLE;
3434 +       dest->svc = svc;
3435 +}
3436 +
3437 +
3438 +/*
3439 + *  Create a destination for the given service
3440 + */
3441 +struct ip_vs_dest *ip_vs_new_dest(struct ip_vs_service *svc,
3442 +                                 struct ip_masq_ctl *mctl)
3443 +{
3444 +       struct ip_vs_dest *dest;
3445 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
3446 +
3447 +       EnterFunction(2);
3448 +
3449 +       dest = (struct ip_vs_dest*) kmalloc(sizeof(struct ip_vs_dest),
3450 +                                           GFP_ATOMIC);
3451 +       if (dest == NULL) {
3452 +               IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
3453 +               return NULL;
3454 +       }
3455 +       memset(dest, 0, sizeof(struct ip_vs_dest));
3456 +
3457 +       dest->protocol = svc->protocol;
3458 +       dest->vaddr = svc->addr;
3459 +       dest->vport = svc->port;
3460 +       dest->vfwmark = svc->fwmark;
3461 +       dest->addr = mm->daddr;
3462 +       dest->port = mm->dport;
3463 +
3464 +       atomic_set(&dest->activeconns, 0);
3465 +       atomic_set(&dest->inactconns, 0);
3466 +       atomic_set(&dest->refcnt, 0);
3467 +
3468 +       INIT_LIST_HEAD(&dest->d_list);
3469 +       dest->stats.lock = SPIN_LOCK_UNLOCKED;
3470 +       __ip_vs_update_dest(svc, dest, mctl);
3471 +
3472 +       LeaveFunction(2);
3473 +
3474 +       return dest;
3475 +}
3476 +
3477 +
3478 +/*
3479 + *  Add a destination into an existing service
3480 + */
3481 +int ip_vs_add_dest(struct ip_vs_service *svc, struct ip_masq_ctl *mctl)
3482 +{
3483 +       struct ip_vs_dest *dest;
3484 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
3485 +       __u32 daddr = mm->daddr;
3486 +       __u16 dport = mm->dport;
3487 +
3488 +       EnterFunction(2);
3489 +
3490 +       if (mm->weight < 0) {
3491 +               IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
3492 +               return -ERANGE;
3493 +       }
3494 +
3495 +       /*
3496 +        * Check if the dest already exists in the list
3497 +        */
3498 +       dest = ip_vs_lookup_dest(svc, daddr, dport);
3499 +       if (dest != NULL) {
3500 +               IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
3501 +               return -EEXIST;
3502 +       }
3503 +
3504 +       write_lock_bh(&__ip_vs_lock);
3505 +
3506 +       /*
3507 +        * Check if the dest already exists in the trash and
3508 +        * is from the same service
3509 +        */
3510 +       dest = __ip_vs_get_trash_dest(svc, daddr, dport);
3511 +       if (dest != NULL) {
3512 +               IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%d from trash, "
3513 +                         "refcnt=%d, service %u.%u.%u.%u:%d\n",
3514 +                         NIPQUAD(daddr), ntohs(dport),
3515 +                         atomic_read(&dest->refcnt),
3516 +                         NIPQUAD(dest->vaddr),
3517 +                         ntohs(dest->vport));
3518 +
3519 +               /*
3520 +                * Get the destination from the trash
3521 +                */
3522 +               list_del(&dest->n_list);
3523 +               list_add(&dest->n_list, &svc->destinations);
3524 +
3525 +               __ip_vs_update_dest(svc, dest, mctl);
3526 +
3527 +               write_unlock_bh(&__ip_vs_lock);
3528 +               return 0;
3529 +       }
3530 +
3531 +       /*
3532 +        * Allocate and initialize the dest structure
3533 +        */
3534 +       dest = ip_vs_new_dest(svc, mctl);
3535 +       if (dest == NULL) {
3536 +               write_unlock_bh(&__ip_vs_lock);
3537 +               IP_VS_ERR("ip_vs_add_dest(): out of memory\n");
3538 +               return -ENOMEM;
3539 +       }
3540 +
3541 +       /*
3542 +        * Add the dest entry into the list
3543 +        */
3544 +       list_add(&dest->n_list, &svc->destinations);
3545 +       atomic_inc(&dest->refcnt);
3546 +
3547 +       write_unlock_bh(&__ip_vs_lock);
3548 +
3549 +       LeaveFunction(2);
3550 +       return 0;
3551 +}
3552 +
3553 +
3554 +/*
3555 + *  Edit a destination in the given service
3556 + */
3557 +int ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_masq_ctl *mctl)
3558 +{
3559 +       struct ip_vs_dest *dest;
3560 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
3561 +       __u32 daddr = mm->daddr;
3562 +       __u16 dport = mm->dport;
3563 +
3564 +       EnterFunction(2);
3565 +
3566 +       if (mm->weight < 0) {
3567 +               IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
3568 +               return -ERANGE;
3569 +       }
3570 +
3571 +       /*
3572 +        *    Lookup the destination list
3573 +        */
3574 +       dest = ip_vs_lookup_dest(svc, daddr, dport);
3575 +       if (dest == NULL) {
3576 +               IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
3577 +               return -ENOENT;
3578 +       }
3579 +
3580 +       write_lock_bh(&__ip_vs_lock);
3581 +
3582 +       __ip_vs_update_dest(svc, dest, mctl);
3583 +
3584 +       write_unlock_bh(&__ip_vs_lock);
3585 +
3586 +       LeaveFunction(2);
3587 +       return 0;
3588 +}
3589 +
3590 +
3591 +/*
3592 + *  Delete a destination from the given service
3593 + */
3594 +void __ip_vs_del_dest(struct ip_vs_dest *dest)
3595 +{
3596 +       dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
3597 +
3598 +       /*
3599 +        *  Remove it from the d-linked destination list.
3600 +        */
3601 +       list_del(&dest->n_list);
3602 +
3603 +       /*
3604 +        *  Remove it from the d-linked list with the real services.
3605 +        */
3606 +       ip_vs_rs_unhash(dest);
3607 +
3608 +       /*
3609 +        *  Decrease the refcnt of the dest, and free the dest
3610 +        *  if nobody refers to it (refcnt=0). Otherwise, throw
3611 +        *  the destination into the trash.
3612 +        */
3613 +       if (atomic_dec_and_test(&dest->refcnt))
3614 +               kfree_s(dest, sizeof(*dest));
3615 +       else {
3616 +               IP_VS_DBG(3, "Move dest %u.%u.%u.%u:%d into trash, "
3617 +                         "refcnt=%d\n",
3618 +                         NIPQUAD(dest->addr), ntohs(dest->port),
3619 +                         atomic_read(&dest->refcnt));
3620 +               list_add(&dest->n_list, &ip_vs_dest_trash);
3621 +               atomic_inc(&dest->refcnt);
3622 +       }
3623 +}
3624 +
3625 +int ip_vs_del_dest(struct ip_vs_service *svc, struct ip_masq_ctl *mctl)
3626 +{
3627 +       struct ip_vs_dest *dest;
3628 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
3629 +       __u32 daddr = mm->daddr;
3630 +       __u16 dport = mm->dport;
3631 +
3632 +       EnterFunction(2);
3633 +
3634 +       /*
3635 +        *    Lookup the destination list
3636 +        */
3637 +       dest = ip_vs_lookup_dest(svc, daddr, dport);
3638 +       if (dest == NULL) {
3639 +               IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
3640 +               return -ENOENT;
3641 +       }
3642 +
3643 +       write_lock_bh(&__ip_vs_lock);
3644 +
3645 +       /*
3646 +        *  Remove dest from the destination list
3647 +        */
3648 +       __ip_vs_del_dest(dest);
3649 +
3650 +       /*
3651 +        *  Called the update_service function of its scheduler
3652 +        */
3653 +       svc->scheduler->update_service(svc);
3654 +
3655 +       write_unlock_bh(&__ip_vs_lock);
3656 +
3657 +       LeaveFunction(2);
3658 +
3659 +       return 0;
3660 +}
3661 +
3662 +
3663 +/*
3664 + *  Add a service into the service hash table
3665 + */
3666 +int ip_vs_add_service(struct ip_masq_ctl *mctl)
3667 +{
3668 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
3669 +       __u16  protocol = mm->protocol;
3670 +       __u32 vaddr = mm->vaddr;
3671 +       __u16 vport = mm->vport;
3672 +       __u32 vfwmark = mm->vfwmark;
3673 +
3674 +       int ret = 0;
3675 +       struct ip_vs_scheduler *sched;
3676 +       struct ip_vs_service *svc;
3677 +
3678 +       EnterFunction(2);
3679 +
3680 +       /*
3681 +        * Lookup the scheduler, by 'mctl->m_tname'
3682 +        */
3683 +       sched = ip_vs_lookup_scheduler(mctl->m_tname);
3684 +       if (sched == NULL) {
3685 +               IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
3686 +                          mctl->m_tname);
3687 +               return -ENOENT;
3688 +       }
3689 +
3690 +       write_lock_bh(&__ip_vs_lock);
3691 +
3692 +       /*
3693 +        * Check if the service already exists
3694 +        */
3695 +       if (vfwmark == 0)
3696 +               svc = __ip_vs_lookup_service(protocol, vaddr, vport);
3697 +       else
3698 +               svc = __ip_vs_lookup_svc_fwm(vfwmark);
3699 +
3700 +       if (svc != NULL) {
3701 +               IP_VS_DBG(1, "ip_vs_add_service: service already exists.\n");
3702 +               ret = -EEXIST;
3703 +               goto out;
3704 +       }
3705 +
3706 +       svc = (struct ip_vs_service*)
3707 +               kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
3708 +       if (svc == NULL) {
3709 +               IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
3710 +               ret = -ENOMEM;
3711 +               goto out;
3712 +       }
3713 +       memset(svc, 0, sizeof(struct ip_vs_service));
3714 +
3715 +       svc->protocol = protocol;
3716 +       svc->addr = vaddr;
3717 +       svc->port = vport;
3718 +       svc->fwmark = vfwmark;
3719 +       svc->flags = mm->vs_flags;
3720 +       svc->timeout = mm->timeout;
3721 +       svc->netmask = mm->netmask;
3722 +
3723 +       INIT_LIST_HEAD(&svc->destinations);
3724 +       svc->stats.lock = SPIN_LOCK_UNLOCKED;
3725 +
3726 +       /*
3727 +        *    Bind the scheduler
3728 +        */
3729 +       ip_vs_bind_scheduler(svc, sched);
3730 +
3731 +       /*
3732 +        *    Hash the service into the service table
3733 +        */
3734 +       ip_vs_svc_hash(svc);
3735 +
3736 +       /*
3737 +        *    Update the virtual service counters
3738 +        */
3739 +       if (vport == FTPPORT)
3740 +               atomic_inc(&ip_vs_ftpsvc_counter);
3741 +       else if (vport == 0)
3742 +               atomic_inc(&ip_vs_nullsvc_counter);
3743 +
3744 +  out:
3745 +       write_unlock_bh(&__ip_vs_lock);
3746 +       LeaveFunction(2);
3747 +       return ret;
3748 +}
3749 +
3750 +
3751 +/*
3752 + *  Edit a service and bind it with a new scheduler
3753 + */
3754 +int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_masq_ctl *mctl)
3755 +{
3756 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
3757 +       struct ip_vs_scheduler *sched;
3758 +
3759 +       EnterFunction(2);
3760 +
3761 +       /*
3762 +        * Lookup the scheduler, by 'mctl->m_tname'
3763 +        */
3764 +       sched = ip_vs_lookup_scheduler(mctl->m_tname);
3765 +       if (sched == NULL) {
3766 +               IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
3767 +                          mctl->m_tname);
3768 +               return -ENOENT;
3769 +       }
3770 +
3771 +       write_lock_bh(&__ip_vs_lock);
3772 +
3773 +       /*
3774 +        *    Set the flags and timeout value
3775 +        */
3776 +       svc->flags = mm->vs_flags | IP_VS_SVC_F_HASHED;
3777 +       svc->timeout = mm->timeout;
3778 +       svc->netmask = mm->netmask;
3779 +
3780 +       /*
3781 +        *    Unbind the old scheduler
3782 +        */
3783 +       ip_vs_unbind_scheduler(svc);
3784 +
3785 +       /*
3786 +        *    Bind the new scheduler
3787 +        */
3788 +       ip_vs_bind_scheduler(svc, sched);
3789 +
3790 +       write_unlock_bh(&__ip_vs_lock);
3791 +
3792 +       LeaveFunction(2);
3793 +       return 0;
3794 +}
3795 +
3796 +
3797 +/*
3798 + *  Delete a service from the service list
3799 + */
3800 +int __ip_vs_del_service(struct ip_vs_service *svc)
3801 +{
3802 +       struct list_head *l;
3803 +       struct ip_vs_dest *dest;
3804 +
3805 +       /*
3806 +        *    Unbind scheduler
3807 +        */
3808 +       ip_vs_unbind_scheduler(svc);
3809 +
3810 +       /*
3811 +        *    Unlink the whole destination list
3812 +        */
3813 +       l = &svc->destinations;
3814 +       while (l->next != l) {
3815 +               dest = list_entry(l->next, struct ip_vs_dest, n_list);
3816 +               __ip_vs_del_dest(dest);
3817 +       }
3818 +
3819 +       /*
3820 +        *    Unhash it from the service table
3821 +        */
3822 +       if (ip_vs_svc_unhash(svc)) {
3823 +               /*
3824 +                *    Update the virtual service counters
3825 +                */
3826 +               if (svc->port == FTPPORT)
3827 +                       atomic_dec(&ip_vs_ftpsvc_counter);
3828 +               else if (svc->port == 0)
3829 +                       atomic_dec(&ip_vs_nullsvc_counter);
3830 +
3831 +               /*
3832 +                *    Free the service
3833 +                */
3834 +               kfree_s(svc, sizeof(struct ip_vs_service));
3835 +       } else {
3836 +               /*
3837 +                *  Called the update_service function of its scheduler
3838 +                */
3839 +               svc->scheduler->update_service(svc);
3840 +               return -EPERM;
3841 +       }
3842 +
3843 +       return 0;
3844 +}
3845 +
3846 +int ip_vs_del_service(struct ip_vs_service *svc)
3847 +{
3848 +       EnterFunction(2);
3849 +
3850 +       if (svc == NULL)
3851 +               return -EEXIST;
3852 +
3853 +       write_lock_bh(&__ip_vs_lock);
3854 +
3855 +       __ip_vs_del_service(svc);
3856 +
3857 +       write_unlock_bh(&__ip_vs_lock);
3858 +       LeaveFunction(2);
3859 +       return 0;
3860 +}
3861 +
3862 +
3863 +/*
3864 + *  Flush all the virtual services
3865 + */
3866 +int ip_vs_flush(void)
3867 +{
3868 +       int idx;
3869 +       struct ip_vs_service *svc;
3870 +       struct list_head *l;
3871 +
3872 +       write_lock_bh(&__ip_vs_lock);
3873 +
3874 +       /*
3875 +        *    Flush the service table hashed by <protocol,addr,port>
3876 +        */
3877 +       for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3878 +               l = &ip_vs_svc_table[idx];
3879 +               while (l->next != l) {
3880 +                       svc = list_entry(l->next,struct ip_vs_service,s_list);
3881 +
3882 +                       if (__ip_vs_del_service(svc))
3883 +                               goto out;
3884 +               }
3885 +       }
3886 +
3887 +       /*
3888 +        *    Flush the service table hashed by fwmark
3889 +        */
3890 +       for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3891 +               l = &ip_vs_svc_fwm_table[idx];
3892 +               while (l->next != l) {
3893 +                       svc = list_entry(l->next,struct ip_vs_service,f_list);
3894 +
3895 +                       if (__ip_vs_del_service(svc))
3896 +                               goto out;
3897 +               }
3898 +       }
3899 +
3900 +  out:
3901 +       write_unlock_bh(&__ip_vs_lock);
3902 +       return 0;
3903 +}
3904 +
3905 +
3906 +/*
3907 + *  Change the connection counter and the flags if the masq state changes
3908 + *  Called by the masq_tcp_state function.
3909 + */
3910 +void ip_vs_set_state(struct ip_masq *ms, int new_state)
3911 +{
3912 +       struct ip_vs_dest *dest = ms->dest;
3913 +
3914 +       if (dest &&
3915 +           (ms->flags & IP_MASQ_F_VS) && (new_state != ms->state)) {
3916 +               if (!(ms->flags & IP_MASQ_F_VS_INACTIVE) &&
3917 +                   (new_state != IP_MASQ_S_ESTABLISHED)) {
3918 +                       atomic_dec(&dest->activeconns);
3919 +                       atomic_inc(&dest->inactconns);
3920 +                       ms->flags |= IP_MASQ_F_VS_INACTIVE;
3921 +               } else if ((ms->flags & IP_MASQ_F_VS_INACTIVE) &&
3922 +                          (new_state == IP_MASQ_S_ESTABLISHED)) {
3923 +                       atomic_inc(&dest->activeconns);
3924 +                       atomic_dec(&dest->inactconns);
3925 +                       ms->flags &= ~IP_MASQ_F_VS_INACTIVE;
3926 +               }
3927 +
3928 +               IP_VS_DBG(8, "Set-state masq fwd:%c s:%s c:%u.%u.%u.%u:%d "
3929 +                         "v:%u.%u.%u.%u:%d d:%u.%u.%u.%u:%d flg:%X cnt:%d\n",
3930 +                         ip_vs_fwd_tag(ms), ip_masq_state_name(ms->state),
3931 +                         NIPQUAD(ms->daddr), ntohs(ms->dport),
3932 +                         NIPQUAD(ms->maddr), ntohs(ms->mport),
3933 +                         NIPQUAD(ms->saddr), ntohs(ms->sport),
3934 +                         ms->flags, atomic_read(&ms->refcnt));
3935 +       }
3936 +}
3937 +
3938 +
3939 +/*
3940 + *  Bind a masq entry with a virtual service destination
3941 + *  Called when a new masq entry is created for VS.
3942 + */
3943 +void ip_vs_bind_masq(struct ip_masq *ms, struct ip_vs_dest *dest)
3944 +{
3945 +       ms->flags |= dest->masq_flags;
3946 +       ms->dest = dest;
3947 +
3948 +       /*
3949 +        *    Increase the refcnt counter of the dest.
3950 +        */
3951 +       atomic_inc(&dest->refcnt);
3952 +
3953 +       IP_VS_DBG(9, "Bind-masq fwd:%c s:%s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
3954 +                 "d:%u.%u.%u.%u:%d flg:%X cnt:%d destcnt:%d\n",
3955 +                 ip_vs_fwd_tag(ms), ip_masq_state_name(ms->state),
3956 +                 NIPQUAD(ms->daddr), ntohs(ms->dport),
3957 +                 NIPQUAD(ms->maddr), ntohs(ms->mport),
3958 +                 NIPQUAD(ms->saddr), ntohs(ms->sport),
3959 +                 ms->flags, atomic_read(&ms->refcnt),
3960 +                 atomic_read(&dest->refcnt));
3961 +}
3962 +
3963 +
3964 +/*
3965 + *  Unbind a masq entry with its VS destination
3966 + *  Called by the masq_expire function.
3967 + */
3968 +void ip_vs_unbind_masq(struct ip_masq *ms)
3969 +{
3970 +       struct ip_vs_dest *dest = ms->dest;
3971 +
3972 +       IP_VS_DBG(9, "Unbind-masq fwd:%c s:%s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
3973 +                 "d:%u.%u.%u.%u:%d flg:%X cnt:%d destcnt:%d\n",
3974 +                 ip_vs_fwd_tag(ms), ip_masq_state_name(ms->state),
3975 +                 NIPQUAD(ms->daddr),ntohs(ms->dport),
3976 +                 NIPQUAD(ms->maddr),ntohs(ms->mport),
3977 +                 NIPQUAD(ms->saddr),ntohs(ms->sport),
3978 +                 ms->flags, atomic_read(&ms->refcnt),
3979 +                 atomic_read(&dest->refcnt));
3980 +
3981 +       if (dest) {
3982 +               /*
3983 +                * Decrease the inactconns or activeconns counter
3984 +                * if it is not a masq template (ms->dport!=0).
3985 +                */
3986 +               if (ms->dport) {
3987 +                       if (ms->flags & IP_MASQ_F_VS_INACTIVE) {
3988 +                               atomic_dec(&dest->inactconns);
3989 +                       } else {
3990 +                               atomic_dec(&dest->activeconns);
3991 +                       }
3992 +               }
3993 +
3994 +               /*
3995 +                *  Decrease the refcnt of the dest, and free the dest
3996 +                *  if nobody refers to it (refcnt=0).
3997 +                */
3998 +               if (atomic_dec_and_test(&dest->refcnt))
3999 +                       kfree_s(dest, sizeof(*dest));
4000 +       }
4001 +}
4002 +
4003 +
4004 +/*
4005 + *  Checking if the destination of a masq template is available.
4006 + *  If available, return 1, otherwise return 0 and invalidate this
4007 + *  masq template.
4008 + */
4009 +int ip_vs_check_template(struct ip_masq *mst)
4010 +{
4011 +       struct ip_vs_dest *dest = mst->dest;
4012 +
4013 +       /*
4014 +        * Checking the dest server status.
4015 +        */
4016 +       if ((dest == NULL) ||
4017 +           !(dest->flags & IP_VS_DEST_F_AVAILABLE)) {
4018 +               IP_VS_DBG(9, "check_template: dest not available for prot %s "
4019 +                         "src %u.%u.%u.%u:%d dest %u.%u.%u.%u:%d -> %X:%X\n",
4020 +                         masq_proto_name(mst->protocol),
4021 +                         NIPQUAD(mst->daddr), ntohs(mst->dport),
4022 +                         NIPQUAD(mst->maddr), ntohs(mst->mport),
4023 +                         (dest!=NULL)? ntohl(dest->addr):0,
4024 +                         (dest!=NULL)? ntohs(dest->port):0);
4025 +
4026 +               /*
4027 +                * Invalidate the masq template
4028 +                */
4029 +               ip_vs_unhash(mst);
4030 +               mst->sport = 65535;
4031 +               mst->mport = 65535;
4032 +               mst->dport = 0;
4033 +               ip_vs_hash(mst);
4034 +
4035 +               /*
4036 +                * Simply decrease the refcnt of the template,
4037 +                * don't restart its timer.
4038 +                */
4039 +               atomic_dec(&mst->refcnt);
4040 +               return 0;
4041 +       }
4042 +       return 1;
4043 +}
4044 +
4045 +
4046 +/*
4047 + *  IPVS persistent scheduling function
4048 + *  It creates a masq entry according to its template if exists, or selects
4049 + *  a server and creates a masq entry plus a template.
4050 + */
4051 +struct ip_masq *
4052 +ip_vs_sched_persist(struct ip_vs_service *svc, struct iphdr *iph)
4053 +{
4054 +       struct ip_masq *ms = NULL;
4055 +       struct ip_vs_dest *dest;
4056 +       const __u16 *portp;
4057 +       struct ip_masq *mst;
4058 +       __u16  dport;    /* destination port to forward */
4059 +       __u32  snet;     /* source network of the client, after masking */
4060 +
4061 +       portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
4062 +
4063 +       /* Mask saddr with the netmask to adjust template granularity */
4064 +       snet = iph->saddr & svc->netmask;
4065 +
4066 +       IP_VS_DBG(6, "P-schedule: src %u.%u.%u.%u:%d dest %u.%u.%u.%u:%d "
4067 +                 "snet %u.%u.%u.%u/%u.%u.%u.%u\n",
4068 +                 NIPQUAD(iph->saddr), ntohs(portp[0]),
4069 +                 NIPQUAD(iph->daddr), ntohs(portp[1]),
4070 +                 NIPQUAD(snet), NIPQUAD(svc->netmask));
4071 +
4072 +       /*
4073 +        * As far as we know, FTP is a very complicated network protocol, and
4074 +        * it uses control connection and data connections. For active FTP,
4075 +        * FTP server initilize data connection to the client, its source port
4076 +        * is often 20. For passive FTP, FTP server tells the clients the port
4077 +        * that it passively listens to,  and the client issues the data
4078 +        * connection. In the tunneling or direct routing mode, the load
4079 +        * balancer is on the client-to-server half of connection, the port
4080 +        * number is unknown to the load balancer. So, a template masq like
4081 +        * <daddr, 0, maddr, 0, saddr, 0> is created for persistent FTP
4082 +        * service, and a template like <daddr, 0, maddr, mport, saddr, sport>
4083 +        * is created for other persistent services.
4084 +        */
4085 +       if (portp[1] == svc->port) {
4086 +               /* Check if a template already exists */
4087 +               if (svc->port != FTPPORT)
4088 +                       mst = ip_vs_in_get(iph->protocol, snet, 0,
4089 +                                          iph->daddr, portp[1]);
4090 +               else
4091 +                       mst = ip_vs_in_get(iph->protocol, snet, 0,
4092 +                                          iph->daddr, 0);
4093 +
4094 +               if (!mst || !ip_vs_check_template(mst)) {
4095 +                       /*
4096 +                        * No template found or the dest of the masq
4097 +                        * template is not available.
4098 +                        */
4099 +                       read_lock(&__ip_vs_lock);
4100 +
4101 +                       dest = svc->scheduler->schedule(svc, iph);
4102 +                       if (dest == NULL) {
4103 +                               IP_VS_DBG(1, "P-schedule: no dest found.\n");
4104 +                               read_unlock(&__ip_vs_lock);
4105 +                               return NULL;
4106 +                       }
4107 +
4108 +                       /*
4109 +                        * Create a template like <protocol,daddr,0,
4110 +                        * maddr,mport,saddr,sport> for non-ftp service,
4111 +                        * and <protocol,daddr,0,maddr,0,saddr,0>
4112 +                        * for ftp service.
4113 +                        */
4114 +                       if (svc->port != FTPPORT)
4115 +                               mst = ip_masq_new_vs(iph->protocol,
4116 +                                                    iph->daddr, portp[1],
4117 +                                                    dest->addr, dest->port,
4118 +                                                    snet, 0,
4119 +                                                    0);
4120 +                       else
4121 +                               mst = ip_masq_new_vs(iph->protocol,
4122 +                                                    iph->daddr, 0,
4123 +                                                    dest->addr, 0,
4124 +                                                    snet, 0,
4125 +                                                    0);
4126 +                       if (mst == NULL) {
4127 +                               IP_VS_ERR("ip_masq_new_vs template failed\n");
4128 +                               read_unlock(&__ip_vs_lock);
4129 +                               return NULL;
4130 +                       }
4131 +
4132 +                       /*
4133 +                        * Bind the template with dest and set timeout.
4134 +                        */
4135 +                       ip_vs_bind_masq(mst, dest);
4136 +                       mst->timeout = svc->timeout;
4137 +
4138 +                       read_unlock(&__ip_vs_lock);
4139 +               } else {
4140 +                       /*
4141 +                        * Template found and its destination is available.
4142 +                        */
4143 +                       dest = mst->dest;
4144 +
4145 +                       /*
4146 +                        * Delete its timer so that it can be put back.
4147 +                        */
4148 +                       del_sltimer(&mst->timer);
4149 +               }
4150 +               dport = dest->port;
4151 +       } else {
4152 +               /*
4153 +                * Note: persistent fwmark-based services and persistent
4154 +                * port zero service are handled here.
4155 +                * fwmark template: <IPPROTO_IP,daddr,0,fwmark,0,saddr,0>
4156 +                * port zero template: <protocol,daddr,0,maddr,0,saddr,0>
4157 +                */
4158 +               if (svc->fwmark)
4159 +                       mst = ip_vs_in_get(IPPROTO_IP, snet, 0,
4160 +                                          htonl(svc->fwmark), 0);
4161 +               else
4162 +                       mst = ip_vs_in_get(iph->protocol,
4163 +                                          snet, 0, iph->daddr, 0);
4164 +
4165 +               if (!mst || !ip_vs_check_template(mst)) {
4166 +                       /*
4167 +                        * If it is not persistent port zero, return NULL.
4168 +                        */
4169 +                       if (svc->port)
4170 +                               return NULL;
4171 +
4172 +                       read_lock(&__ip_vs_lock);
4173 +
4174 +                       dest = svc->scheduler->schedule(svc, iph);
4175 +                       if (dest == NULL) {
4176 +                               IP_VS_DBG(1, "P-schedule: no dest found.\n");
4177 +                               read_unlock(&__ip_vs_lock);
4178 +                               return NULL;
4179 +                       }
4180 +
4181 +                       /*
4182 +                        * Create a template according to the service
4183 +                        */
4184 +                       if (svc->fwmark)
4185 +                               mst = ip_masq_new_vs(IPPROTO_IP,
4186 +                                                    htonl(svc->fwmark), 0,
4187 +                                                    dest->addr, 0,
4188 +                                                    snet, 0,
4189 +                                                    0);
4190 +                       else
4191 +                               mst = ip_masq_new_vs(iph->protocol,
4192 +                                                    iph->daddr, 0,
4193 +                                                    dest->addr, 0,
4194 +                                                    snet, 0,
4195 +                                                    0);
4196 +                       if (mst == NULL) {
4197 +                               IP_VS_ERR("ip_masq_new_vs template failed\n");
4198 +                               read_unlock(&__ip_vs_lock);
4199 +                               return NULL;
4200 +                       }
4201 +
4202 +                       /*
4203 +                        * Bind the template with dest and set timeout.
4204 +                        */
4205 +                       ip_vs_bind_masq(mst, dest);
4206 +                       mst->timeout = svc->timeout;
4207 +                       read_unlock(&__ip_vs_lock);
4208 +               } else {
4209 +                       dest = mst->dest;
4210 +
4211 +                       /*
4212 +                        * Delete its timer so that it can be put back.
4213 +                        */
4214 +                       del_sltimer(&mst->timer);
4215 +               }
4216 +               dport = portp[1];
4217 +       }
4218 +
4219 +       /*
4220 +        *    Create a new masq according to the template
4221 +        */
4222 +       ms = ip_masq_new_vs(iph->protocol,
4223 +                           iph->daddr, portp[1],
4224 +                           dest->addr, dport,
4225 +                           iph->saddr, portp[0],
4226 +                           0);
4227 +       if (ms == NULL) {
4228 +               IP_VS_ERR("ip_masq_new_vs failed\n");
4229 +               ip_masq_put(mst);
4230 +               return NULL;
4231 +       }
4232 +
4233 +       /*
4234 +        *    Bind the masq entry with the vs dest.
4235 +        */
4236 +       ip_vs_bind_masq(ms, dest);
4237 +
4238 +       /*
4239 +        *    Increase the inactive connection counter
4240 +        *    because it is in Syn-Received
4241 +        *    state (inactive) when the masq is created.
4242 +        */
4243 +       atomic_inc(&dest->inactconns);
4244 +
4245 +       /*
4246 +        *    Add its control
4247 +        */
4248 +       ip_masq_control_add(ms, mst);
4249 +
4250 +       ip_masq_put(mst);
4251 +       return ms;
4252 +}
4253 +
4254 +
4255 +/*
4256 + *  IPVS main scheduling function
4257 + *  It selects a server according to the virtual service, and
4258 + *  creates a masq entry.
4259 + */
4260 +struct ip_masq *ip_vs_schedule(struct ip_vs_service *svc, struct iphdr *iph)
4261 +{
4262 +       struct ip_masq *ms = NULL;
4263 +       struct ip_vs_dest *dest;
4264 +       const __u16 *portp;
4265 +
4266 +       /*
4267 +        *    Persistent service
4268 +        */
4269 +       if (svc->flags & IP_VS_SVC_F_PERSISTENT)
4270 +               return ip_vs_sched_persist(svc, iph);
4271 +
4272 +       /*
4273 +        *    Non-persistent service
4274 +        */
4275 +       portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
4276 +       if (!svc->fwmark && portp[1] != svc->port) {
4277 +               if (!svc->port)
4278 +                       IP_VS_ERR("Schedule: port zero only supported in persistent services, check your ipvs configuration\n");
4279 +               return NULL;
4280 +       }
4281 +
4282 +       read_lock(&__ip_vs_lock);
4283 +
4284 +       dest = svc->scheduler->schedule(svc, iph);
4285 +       if (dest == NULL) {
4286 +               IP_VS_DBG(1, "Schedule: no dest found.\n");
4287 +               read_unlock(&__ip_vs_lock);
4288 +               return NULL;
4289 +       }
4290 +
4291 +       /*
4292 +        *    Create a masquerading entry.
4293 +        */
4294 +       ms = ip_masq_new_vs(iph->protocol,
4295 +                           iph->daddr, portp[1],
4296 +                           dest->addr, dest->port?dest->port:portp[1],
4297 +                           iph->saddr, portp[0],
4298 +                           0);
4299 +       if (ms == NULL) {
4300 +               IP_VS_ERR("Schedule: ip_masq_new_vs failed\n");
4301 +               read_unlock(&__ip_vs_lock);
4302 +               return NULL;
4303 +       }
4304 +
4305 +       /*
4306 +        *    Bind the masq entry with the vs dest.
4307 +        */
4308 +       ip_vs_bind_masq(ms, dest);
4309 +
4310 +       /*
4311 +        *    Increase the inactive connection counter because it is in
4312 +        *    Syn-Received state (inactive) when the masq is created.
4313 +        */
4314 +       atomic_inc(&dest->inactconns);
4315 +
4316 +       IP_VS_DBG(9, "Schedule masq fwd:%c s:%s c:%u.%u.%u.%u:%d "
4317 +                 "v:%u.%u.%u.%u:%d d:%u.%u.%u.%u:%d flg:%X cnt:%d\n",
4318 +                 ip_vs_fwd_tag(ms), ip_masq_state_name(ms->state),
4319 +                 NIPQUAD(ms->daddr),ntohs(ms->dport),
4320 +                 NIPQUAD(ms->maddr),ntohs(ms->mport),
4321 +                 NIPQUAD(ms->saddr),ntohs(ms->sport),
4322 +                 ms->flags, atomic_read(&ms->refcnt));
4323 +
4324 +       read_unlock(&__ip_vs_lock);
4325 +
4326 +       return ms;
4327 +}
4328 +
4329 +
4330 +/*
4331 + *  Pass or drop the packet.
4332 + *  Called by ip_fw_demasquerade, when the virtual service is available but
4333 + *  no destination is available for a new connection.
4334 + */
4335 +int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb)
4336 +{
4337 +       struct iphdr *iph = skb->nh.iph;
4338 +       __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
4339 +
4340 +       /*
4341 +        * When the virtual ftp service is presented, packets destined
4342 +        * for other services on the VIP may get here (except services
4343 +        * listed in the ipvs table), pass the packets, because it is
4344 +        * not ipvs job to decide to drop the packets.
4345 +        */
4346 +       if ((svc->port == FTPPORT) && (portp[1] != FTPPORT))
4347 +               return 0;
4348 +
4349 +       /*
4350 +        * Notify the client that the destination is unreachable, and
4351 +        * release the socket buffer.
4352 +        * Since it is in IP layer, the TCP socket is not actually
4353 +        * created, the TCP RST packet cannot be sent, instead that
4354 +        * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
4355 +        */
4356 +       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
4357 +       kfree_skb(skb);
4358 +       return -2;
4359 +}
4360 +
4361 +
4362 +/*
4363 + *     IPVS user control entry
4364 + */
4365 +int ip_vs_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
4366 +{
4367 +       struct ip_vs_service *svc = NULL;
4368 +       struct ip_vs_user *mm =  &mctl->u.vs_user;
4369 +       __u32 vaddr = mm->vaddr;
4370 +       __u16 vport = mm->vport;
4371 +       int proto_num = masq_proto_num(mm->protocol);
4372 +
4373 +       /*
4374 +        * Check the size of mctl, no overflow...
4375 +        */
4376 +       if (optlen != sizeof(*mctl))
4377 +               return -EINVAL;
4378 +
4379 +       /*
4380 +        * Flush all the virtual service...
4381 +        */
4382 +       if (mctl->m_cmd == IP_MASQ_CMD_FLUSH)
4383 +               return ip_vs_flush();
4384 +
4385 +       /*
4386 +        * Check for valid protocol: TCP or UDP
4387 +        */
4388 +       if (mm->vfwmark == 0 && (proto_num < 0 || proto_num > 1)) {
4389 +               IP_VS_INFO("vs_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s",
4390 +                          ntohs(mm->protocol),
4391 +                          NIPQUAD(vaddr), ntohs(vport), mctl->m_tname);
4392 +               return -EFAULT;
4393 +       }
4394 +
4395 +       /*
4396 +        * Lookup the exact service by (protocol, vaddr, vport)
4397 +        */
4398 +       read_lock(&__ip_vs_lock);
4399 +
4400 +       if (mm->vfwmark == 0)
4401 +               svc = __ip_vs_lookup_service(mm->protocol, vaddr, vport);
4402 +       else
4403 +               svc = __ip_vs_lookup_svc_fwm(mm->vfwmark);
4404 +
4405 +       read_unlock(&__ip_vs_lock);
4406 +
4407 +       switch (mctl->m_cmd) {
4408 +       case IP_MASQ_CMD_ADD:
4409 +               if (svc != NULL)
4410 +                       return -EEXIST;
4411 +
4412 +               return ip_vs_add_service(mctl);
4413 +
4414 +       case IP_MASQ_CMD_SET:
4415 +               if (svc == NULL)
4416 +                       return -ESRCH;
4417 +
4418 +               return ip_vs_edit_service(svc, mctl);
4419 +
4420 +       case IP_MASQ_CMD_DEL:
4421 +               if (svc == NULL)
4422 +                       return  -ESRCH;
4423 +               else
4424 +                       return ip_vs_del_service(svc);
4425 +
4426 +       case IP_MASQ_CMD_ADD_DEST:
4427 +               if (svc == NULL)
4428 +                       return  -ESRCH;
4429 +               else
4430 +                       return ip_vs_add_dest(svc, mctl);
4431 +
4432 +       case IP_MASQ_CMD_SET_DEST:
4433 +               if (svc == NULL)
4434 +                       return  -ESRCH;
4435 +               else
4436 +                       return ip_vs_edit_dest(svc, mctl);
4437 +
4438 +       case IP_MASQ_CMD_DEL_DEST:
4439 +               if (svc == NULL)
4440 +                       return  -ESRCH;
4441 +               else
4442 +                       return ip_vs_del_dest(svc, mctl);
4443 +       }
4444 +       return -EINVAL;
4445 +}
4446 +
4447 +
4448 +#ifdef CONFIG_SYSCTL
4449 +
4450 +static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
4451 +       struct file * filp,void *buffer, size_t *lenp)
4452 +{
4453 +       int *valp = ctl->data;
4454 +       int val = *valp;
4455 +       int ret;
4456 +
4457 +       ret = proc_dointvec(ctl, write, filp, buffer, lenp);
4458 +       if (write && (*valp != val)) {
4459 +               if ((*valp < 0) || (*valp > 3)) {
4460 +                       /* Restore the correct value */
4461 +                       *valp = val;
4462 +               } else {
4463 +                       update_defense_level();
4464 +               }
4465 +       }
4466 +       return ret;
4467 +}
4468 +
4469 +ctl_table ipv4_vs_table[] = {
4470 +#ifdef CONFIG_IP_VS_DEBUG
4471 +       {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
4472 +        &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
4473 +        &proc_dointvec},
4474 +#endif
4475 +       {NET_IPV4_VS_AMEMTHRESH, "amemthresh",
4476 +        &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
4477 +        &proc_dointvec},
4478 +       {NET_IPV4_VS_AMDROPRATE, "am_droprate",
4479 +        &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
4480 +        &proc_dointvec},
4481 +       {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
4482 +        &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
4483 +        &ip_vs_sysctl_defense_mode},
4484 +       {NET_IPV4_VS_DROP_PACKET, "drop_packet",
4485 +        &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
4486 +        &ip_vs_sysctl_defense_mode},
4487 +       {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
4488 +        &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
4489 +        &ip_vs_sysctl_defense_mode},
4490 +       {NET_IPV4_VS_TO_ES, "timeout_established",
4491 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_ESTABLISHED],
4492 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4493 +       {NET_IPV4_VS_TO_SS, "timeout_synsent",
4494 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_SYN_SENT],
4495 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4496 +       {NET_IPV4_VS_TO_SR, "timeout_synrecv",
4497 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_SYN_RECV],
4498 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4499 +       {NET_IPV4_VS_TO_FW, "timeout_finwait",
4500 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_FIN_WAIT],
4501 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4502 +       {NET_IPV4_VS_TO_TW, "timeout_timewait",
4503 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_TIME_WAIT],
4504 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4505 +       {NET_IPV4_VS_TO_CL, "timeout_close",
4506 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_CLOSE],
4507 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4508 +       {NET_IPV4_VS_TO_CW, "timeout_closewait",
4509 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_CLOSE_WAIT],
4510 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4511 +       {NET_IPV4_VS_TO_LA, "timeout_lastack",
4512 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_LAST_ACK],
4513 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4514 +       {NET_IPV4_VS_TO_LI, "timeout_listen",
4515 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_LISTEN],
4516 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4517 +       {NET_IPV4_VS_TO_SA, "timeout_synack",
4518 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_SYNACK],
4519 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4520 +       {NET_IPV4_VS_TO_UDP, "timeout_udp",
4521 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_UDP],
4522 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4523 +       {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
4524 +        &masq_timeout_table_dos.timeout[IP_MASQ_S_ICMP],
4525 +        sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
4526 +       {0}
4527 +};
4528 +#endif
4529 +
4530 +#ifdef CONFIG_PROC_FS
4531 +/*
4532 + *     Write the contents of the VS rule table to a PROCfs file.
4533 + */
4534 +static int ip_vs_procinfo(char *buf, char **start, off_t offset,
4535 +                         int length, int *eof, void *data)
4536 +{
4537 +       int len=0;
4538 +       off_t pos=0;
4539 +       char temp[64], temp2[32];
4540 +       int idx;
4541 +       struct ip_vs_service *svc;
4542 +       struct ip_vs_dest *dest;
4543 +       struct list_head *l, *e, *p, *q;
4544 +
4545 +       /*
4546 +        * Note: since the length of the buffer is usually the multiple
4547 +        * of 512, it is good to use fixed record of the divisor of 512,
4548 +        * so that records won't be truncated at buffer boundary.
4549 +        */
4550 +       pos = 192;
4551 +       if (pos > offset) {
4552 +               sprintf(temp,
4553 +                       "IP Virtual Server version %d.%d.%d (size=%d)",
4554 +                       NVERSION(IP_VS_VERSION_CODE), IP_VS_TAB_SIZE);
4555 +               len += sprintf(buf+len, "%-63s\n", temp);
4556 +               len += sprintf(buf+len, "%-63s\n",
4557 +                              "Prot LocalAddress:Port Scheduler Flags");
4558 +               len += sprintf(buf+len, "%-63s\n",
4559 +                              "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
4560 +       }
4561 +
4562 +       read_lock_bh(&__ip_vs_lock);
4563 +
4564 +       /* print the service table hashed by <protocol,addr,port> */
4565 +       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4566 +               l = &ip_vs_svc_table[idx];
4567 +               for (e=l->next; e!=l; e=e->next) {
4568 +                       svc = list_entry(e, struct ip_vs_service, s_list);
4569 +                       pos += 64;
4570 +                       if (pos > offset) {
4571 +                               if (svc->flags & IP_VS_SVC_F_PERSISTENT)
4572 +                                       sprintf(temp2, "persistent %d %08X",
4573 +                                               svc->timeout,
4574 +                                               ntohl(svc->netmask));
4575 +                               else
4576 +                                       temp2[0] = '\0';
4577 +
4578 +                               sprintf(temp, "%s  %08X:%04X %s %s",
4579 +                                       masq_proto_name(svc->protocol),
4580 +                                       ntohl(svc->addr),
4581 +                                       ntohs(svc->port),
4582 +                                       svc->scheduler->name, temp2);
4583 +                               len += sprintf(buf+len, "%-63s\n", temp);
4584 +                               if (len >= length)
4585 +                                       goto done;
4586 +                       }
4587 +
4588 +                       p = &svc->destinations;
4589 +                       for (q=p->next; q!=p; q=q->next) {
4590 +                               dest = list_entry(q, struct ip_vs_dest, n_list);
4591 +                               pos += 64;
4592 +                               if (pos <= offset)
4593 +                                       continue;
4594 +                               sprintf(temp,
4595 +                                       "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
4596 +                                       ntohl(dest->addr),
4597 +                                       ntohs(dest->port),
4598 +                                       ip_vs_fwd_name(dest->masq_flags),
4599 +                                       dest->weight,
4600 +                                       atomic_read(&dest->activeconns),
4601 +                                       atomic_read(&dest->inactconns));
4602 +                               len += sprintf(buf+len, "%-63s\n", temp);
4603 +                               if (len >= length)
4604 +                                       goto done;
4605 +                       }
4606 +               }
4607 +       }
4608 +
4609 +       /* print the service table hashed by fwmark */
4610 +       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4611 +               l = &ip_vs_svc_fwm_table[idx];
4612 +               for (e=l->next; e!=l; e=e->next) {
4613 +                       svc = list_entry(e, struct ip_vs_service, f_list);
4614 +                       pos += 64;
4615 +                       if (pos > offset) {
4616 +                               if (svc->flags & IP_VS_SVC_F_PERSISTENT)
4617 +                                       sprintf(temp2, "persistent %d %08X",
4618 +                                               svc->timeout,
4619 +                                               ntohl(svc->netmask));
4620 +                               else
4621 +                                       temp2[0] = '\0';
4622 +
4623 +                               sprintf(temp, "FWM  %08X %s %s",
4624 +                                       svc->fwmark,
4625 +                                       svc->scheduler->name, temp2);
4626 +                               len += sprintf(buf+len, "%-63s\n", temp);
4627 +                               if (len >= length)
4628 +                                       goto done;
4629 +                       }
4630 +
4631 +                       p = &svc->destinations;
4632 +                       for (q=p->next; q!=p; q=q->next) {
4633 +                               dest = list_entry(q, struct ip_vs_dest, n_list);
4634 +                               pos += 64;
4635 +                               if (pos <= offset)
4636 +                                       continue;
4637 +                               sprintf(temp,
4638 +                                       "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
4639 +                                       ntohl(dest->addr),
4640 +                                       ntohs(dest->port),
4641 +                                       ip_vs_fwd_name(dest->masq_flags),
4642 +                                       dest->weight,
4643 +                                       atomic_read(&dest->activeconns),
4644 +                                       atomic_read(&dest->inactconns));
4645 +                               len += sprintf(buf+len, "%-63s\n", temp);
4646 +                               if (len >= length)
4647 +                                       goto done;
4648 +                       }
4649 +               }
4650 +       }
4651 +
4652 +  done:
4653 +       read_unlock_bh(&__ip_vs_lock);
4654 +
4655 +       *start = buf+len-(pos-offset);          /* Start of wanted data */
4656 +       len = pos-offset;
4657 +       if (len > length)
4658 +               len = length;
4659 +       if (len < 0)
4660 +               len = 0;
4661 +       return len;
4662 +}
4663 +
4664 +struct proc_dir_entry ip_vs_proc_entry = {
4665 +       0,                      /* dynamic inode */
4666 +       2, "vs",                /* namelen and name */
4667 +       S_IFREG | S_IRUGO,      /* mode */
4668 +       1, 0, 0, 0,             /* nlinks, owner, group, size */
4669 +       &proc_net_inode_operations, /* operations */
4670 +       NULL,                   /* get_info */
4671 +       NULL,                   /* fill_inode */
4672 +       NULL, NULL, NULL,       /* next, parent, subdir */
4673 +       NULL,                   /* data */
4674 +       &ip_vs_procinfo,        /* function to generate proc data */
4675 +};
4676 +
4677 +
4678 +/*
4679 + *     Write the IPVS statistic information to a PROCfs file.
4680 + */
4681 +struct ip_vs_stats ip_vs_stats = {SPIN_LOCK_UNLOCKED, 0, 0};
4682 +
4683 +static int
4684 +ip_vs_stats_get_info(char *buf, char **start, off_t offset,
4685 +                    int length, int *eof, void *data)
4686 +{
4687 +       int idx;
4688 +       int len=0;
4689 +       off_t pos=0;
4690 +       char temp[128];
4691 +       struct ip_vs_service *svc;
4692 +       struct ip_vs_dest *dest;
4693 +       struct list_head *l, *e, *p, *q;
4694 +
4695 +       pos += 128;
4696 +       if (pos > offset) {
4697 +               len += sprintf(buf+len, "%-63s\n",
4698 +/*                                01234567 01234567 01234567 0123456701234567 0123456701234567 */
4699 +                              "TotalConns   InPkts  OutPkts          InBytes         OutBytes");
4700 +               spin_lock(&ip_vs_stats.lock);
4701 +               sprintf(temp, "  %8X %8X %8X %8X%08X %8X%08X",
4702 +                       ip_vs_stats.conns,
4703 +                       ip_vs_stats.inpkts,
4704 +                       ip_vs_stats.outpkts,
4705 +                       (__u32)(ip_vs_stats.inbytes >> 32),
4706 +                       (__u32)ip_vs_stats.inbytes,
4707 +                       (__u32)(ip_vs_stats.outbytes >> 32),
4708 +                       (__u32)ip_vs_stats.outbytes);
4709 +               spin_unlock(&ip_vs_stats.lock);
4710 +               len += sprintf(buf+len, "%-63s\n", temp);
4711 +       }
4712 +
4713 +       read_lock_bh(&__ip_vs_lock);
4714 +
4715 +       /* print the service statistics */
4716 +       pos += 128;
4717 +       if (pos > offset) {
4718 +               len += sprintf(buf+len, "%-127s\n",
4719 +                              "\nVirtual Service\n"
4720 +                              "Pro VirtService      Conns   InPkts  OutPkts          InBytes         OutBytes");
4721 +       }
4722 +
4723 +       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4724 +               l = &ip_vs_svc_table[idx];
4725 +               for (e=l->next; e!=l; e=e->next) {
4726 +                       svc = list_entry(e, struct ip_vs_service, s_list);
4727 +                       pos += 128;
4728 +                       if (pos <= offset)
4729 +                               continue;
4730 +                       spin_lock(&svc->stats.lock);
4731 +                       sprintf(temp, "%3s %08X:%04X %8X %8X %8X %8X%08X %8X%08X",
4732 +                               masq_proto_name(svc->protocol),
4733 +                               ntohl(svc->addr),
4734 +                               ntohs(svc->port),
4735 +                               svc->stats.conns,
4736 +                               svc->stats.inpkts,
4737 +                               svc->stats.outpkts,
4738 +                               (__u32)(svc->stats.inbytes >> 32),
4739 +                               (__u32)svc->stats.inbytes,
4740 +                               (__u32)(svc->stats.outbytes >> 32),
4741 +                               (__u32)svc->stats.outbytes);
4742 +                       spin_unlock(&svc->stats.lock);
4743 +                       len += sprintf(buf+len, "%-127s\n", temp);
4744 +                       if (pos >= offset+length)
4745 +                               goto done;
4746 +               }
4747 +       }
4748 +
4749 +       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4750 +               l = &ip_vs_svc_fwm_table[idx];
4751 +               for (e=l->next; e!=l; e=e->next) {
4752 +                       svc = list_entry(e, struct ip_vs_service, f_list);
4753 +                       pos += 128;
4754 +                       if (pos <= offset)
4755 +                               continue;
4756 +                       spin_lock(&svc->stats.lock);
4757 +                       sprintf(temp, "FWM %08X      %8X %8X %8X %8X%08X %8X%08X",
4758 +                               svc->fwmark,
4759 +                               svc->stats.conns,
4760 +                               svc->stats.inpkts,
4761 +                               svc->stats.outpkts,
4762 +                               (__u32)(svc->stats.inbytes >> 32),
4763 +                               (__u32)svc->stats.inbytes,
4764 +                               (__u32)(svc->stats.outbytes >> 32),
4765 +                               (__u32)svc->stats.outbytes);
4766 +                       spin_unlock(&svc->stats.lock);
4767 +                       len += sprintf(buf+len, "%-127s\n", temp);
4768 +                       if (pos >= offset+length)
4769 +                               goto done;
4770 +               }
4771 +       }
4772 +
4773 +       /* print the real server statistics */
4774 +       pos += 128;
4775 +       if (pos > offset) {
4776 +               len += sprintf(buf+len, "%-127s\n",
4777 +                              "\nReal Service\n"
4778 +                              "Pro VirtService   RealService      Conns   InPkts  OutPkts          InBytes         OutBytes");
4779 +       }
4780 +
4781 +       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4782 +               l = &ip_vs_svc_table[idx];
4783 +               for (e=l->next; e!=l; e=e->next) {
4784 +                       svc = list_entry(e, struct ip_vs_service, s_list);
4785 +                       p = &svc->destinations;
4786 +                       for (q=p->next; q!=p; q=q->next) {
4787 +                               dest = list_entry(q, struct ip_vs_dest, n_list);
4788 +                               pos += 128;
4789 +                               if (pos <= offset)
4790 +                                       continue;
4791 +                               spin_lock(&dest->stats.lock);
4792 +                               sprintf(temp,
4793 +                                       "%3s %08X:%04X %08X:%04X %8X %8X %8X %8X%08X %8X%08X",
4794 +                                       masq_proto_name(svc->protocol),
4795 +                                       ntohl(svc->addr),
4796 +                                       ntohs(svc->port),
4797 +                                       ntohl(dest->addr),
4798 +                                       ntohs(dest->port),
4799 +                                       dest->stats.conns,
4800 +                                       dest->stats.inpkts,
4801 +                                       dest->stats.outpkts,
4802 +                                       (__u32)(dest->stats.inbytes >> 32),
4803 +                                       (__u32)dest->stats.inbytes,
4804 +                                       (__u32)(dest->stats.outbytes >> 32),
4805 +                                       (__u32)dest->stats.outbytes);
4806 +                               spin_unlock(&dest->stats.lock);
4807 +                               len += sprintf(buf+len, "%-127s\n", temp);
4808 +                               if (pos >= offset+length)
4809 +                                       goto done;
4810 +                       }
4811 +               }
4812 +       }
4813 +
4814 +       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4815 +               l = &ip_vs_svc_fwm_table[idx];
4816 +               for (e=l->next; e!=l; e=e->next) {
4817 +                       svc = list_entry(e, struct ip_vs_service, f_list);
4818 +                       p = &svc->destinations;
4819 +                       for (q=p->next; q!=p; q=q->next) {
4820 +                               dest = list_entry(q,struct ip_vs_dest,n_list);
4821 +                               pos += 128;
4822 +                               if (pos <= offset)
4823 +                                       continue;
4824 +                               spin_lock(&dest->stats.lock);
4825 +                               sprintf(temp,
4826 +                                       "FWM %08X      %08X:%04X %8X %8X %8X %8X%08X %8X%08X",
4827 +                                       svc->fwmark,
4828 +                                       ntohl(dest->addr),
4829 +                                       ntohs(dest->port),
4830 +                                       dest->stats.conns,
4831 +                                       dest->stats.inpkts,
4832 +                                       dest->stats.outpkts,
4833 +                                       (__u32)(dest->stats.inbytes >> 32),
4834 +                                       (__u32)dest->stats.inbytes,
4835 +                                       (__u32)(dest->stats.outbytes >> 32),
4836 +                                       (__u32)dest->stats.outbytes);
4837 +                               spin_unlock(&dest->stats.lock);
4838 +                               len += sprintf(buf+len, "%-127s\n", temp);
4839 +                               if (pos >= offset+length)
4840 +                                       goto done;
4841 +                       }
4842 +               }
4843 +       }
4844 + done:
4845 +       read_unlock_bh(&__ip_vs_lock);
4846 +
4847 +       *start = buf+len-(pos-offset);          /* Start of wanted data */
4848 +       len = pos-offset;
4849 +       if (len > length)
4850 +               len = length;
4851 +       if (len < 0)
4852 +               len = 0;
4853 +       return len;
4854 +}
4855 +
4856 +struct proc_dir_entry ip_vs_stat_proc_entry = {
4857 +       0,                      /* dynamic inode */
4858 +       8, "vs_stats",          /* namelen and name */
4859 +       S_IFREG | S_IRUGO,      /* mode */
4860 +       1, 0, 0, 0,             /* nlinks, owner, group, size */
4861 +       &proc_net_inode_operations, /* operations */
4862 +       NULL,                   /* get_info */
4863 +       NULL,                   /* fill_inode */
4864 +       NULL, NULL, NULL,       /* next, parent, subdir */
4865 +       NULL,                   /* data */
4866 +       &ip_vs_stats_get_info,  /* function to generate proc data */
4867 +};
4868 +
4869 +#endif
4870 +
4871 +
4872 +/*
4873 + *   This function encapsulates the packet in a new IP header, its destination
4874 + *   will be set to the daddr. Most code of this function is from ipip.c.
4875 + *   Usage:
4876 + *     It is called in the ip_vs_forward() function. The load balancer
4877 + *     selects a real server from a cluster based on a scheduling algorithm,
4878 + *     encapsulates the packet and forwards it to the selected server. All real
4879 + *     servers are configured with "ifconfig tunl0 <Virtual IP Address> up".
4880 + *     When the server receives the encapsulated packet, it decapsulates the
4881 + *     packet, processes the request and return the reply packets directly to
4882 + *     the client without passing the load balancer. This can greatly
4883 + *     increase the scalability of virtual server.
4884 + *   Returns:
4885 + *     if succeeded, return 1; otherwise, return 0.
4886 + */
4887 +
4888 +int ip_vs_tunnel_xmit(struct sk_buff *skb, __u32 daddr)
4889 +{
4890 +       struct rtable *rt;                      /* Route to the other host */
4891 +       struct device *tdev;                    /* Device to other host */
4892 +       struct iphdr  *old_iph = skb->nh.iph;
4893 +       u8     tos = old_iph->tos;
4894 +       u16    df = old_iph->frag_off;
4895 +       struct iphdr  *iph;                     /* Our new IP header */
4896 +       int    max_headroom;                    /* The extra header space needed */
4897 +       u32    dst = daddr;
4898 +       u32    src = 0;
4899 +       int    mtu;
4900 +
4901 +       if (skb->protocol != __constant_htons(ETH_P_IP)) {
4902 +               IP_VS_DBG(0, "ip_vs_tunnel_xmit(): protocol error, ETH_P_IP: %d, skb protocol: %d\n",
4903 +                         __constant_htons(ETH_P_IP),skb->protocol);
4904 +               goto tx_error;
4905 +       }
4906 +
4907 +       if (ip_route_output(&rt, dst, src, RT_TOS(tos), 0)) {
4908 +               IP_VS_DBG(0, "ip_vs_tunnel_xmit(): route error, dest: "
4909 +                         "%u.%u.%u.%u\n", NIPQUAD(dst));
4910 +               goto tx_error_icmp;
4911 +       }
4912 +       tdev = rt->u.dst.dev;
4913 +
4914 +       mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
4915 +       if (mtu < 68) {
4916 +               ip_rt_put(rt);
4917 +               IP_VS_DBG(0, "ip_vs_tunnel_xmit(): mtu less than 68\n");
4918 +               goto tx_error;
4919 +       }
4920 +       if (skb->dst && mtu < skb->dst->pmtu)
4921 +               skb->dst->pmtu = mtu;
4922 +
4923 +       df |= (old_iph->frag_off&__constant_htons(IP_DF));
4924 +
4925 +       if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
4926 +               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
4927 +               ip_rt_put(rt);
4928 +               IP_VS_DBG(0, "ip_vs_tunnel_xmit(): frag needed\n");
4929 +               goto tx_error;
4930 +       }
4931 +
4932 +       skb->h.raw = skb->nh.raw;
4933 +
4934 +       /*
4935 +        * Okay, now see if we can stuff it in the buffer as-is.
4936 +        */
4937 +       max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
4938 +
4939 +       if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
4940 +               struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
4941 +               if (!new_skb) {
4942 +                       ip_rt_put(rt);
4943 +                       kfree_skb(skb);
4944 +                       IP_VS_ERR("ip_vs_tunnel_xmit(): no memory for new_skb\n");
4945 +                       return 0;
4946 +               }
4947 +               kfree_skb(skb);
4948 +               skb = new_skb;
4949 +       }
4950 +
4951 +       skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
4952 +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
4953 +       dst_release(skb->dst);
4954 +       skb->dst = &rt->u.dst;
4955 +
4956 +       /*
4957 +        *      Push down and install the IPIP header.
4958 +        */
4959 +
4960 +       iph                     =       skb->nh.iph;
4961 +       iph->version            =       4;
4962 +       iph->ihl                =       sizeof(struct iphdr)>>2;
4963 +       iph->frag_off           =       df;
4964 +       iph->protocol           =       IPPROTO_IPIP;
4965 +       iph->tos                =       tos;
4966 +       iph->daddr              =       rt->rt_dst;
4967 +       iph->saddr              =       rt->rt_src;
4968 +       iph->ttl                =       old_iph->ttl;
4969 +       iph->tot_len            =       htons(skb->len);
4970 +       iph->id                 =       htons(ip_id_count++);
4971 +       ip_send_check(iph);
4972 +
4973 +       IPCB(skb)->flags |= IPSKB_REDIRECTED;
4974 +       IPCB(skb)->flags |= IPSKB_MASQUERADED;
4975 +
4976 +       ip_send(skb);
4977 +       return 1;
4978 +
4979 +  tx_error_icmp:
4980 +       dst_link_failure(skb);
4981 +  tx_error:
4982 +       kfree_skb(skb);
4983 +       return 0;
4984 +}
4985 +
4986 +
4987 +/*
4988 + *      Direct Routing
4989 + */
4990 +int ip_vs_dr_xmit(struct sk_buff *skb, __u32 daddr)
4991 +{
4992 +       struct rtable *rt;                      /* Route to the other host */
4993 +       struct iphdr  *iph = skb->nh.iph;
4994 +       u8     tos = iph->tos;
4995 +       int    mtu;
4996 +
4997 +       if (ip_route_output(&rt, daddr, 0, RT_TOS(tos), 0)) {
4998 +               IP_VS_DBG(0, "ip_vs_dr_xmit(): route error, dest: %u.%u.%u.%u\n",
4999 +                         NIPQUAD(daddr));
5000 +               goto tx_error_icmp;
5001 +       }
5002 +
5003 +       /* MTU checking */
5004 +       mtu = rt->u.dst.pmtu;
5005 +       if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
5006 +               icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
5007 +               ip_rt_put(rt);
5008 +               IP_VS_DBG(0, "ip_vs_dr_xmit(): frag needed\n");
5009 +               goto tx_error;
5010 +       }
5011 +
5012 +       dst_release(skb->dst);
5013 +       skb->dst = &rt->u.dst;
5014 +
5015 +       IPCB(skb)->flags |= IPSKB_REDIRECTED;
5016 +       IPCB(skb)->flags |= IPSKB_MASQUERADED;
5017 +
5018 +       ip_send(skb);
5019 +       return 1;
5020 +
5021 +  tx_error_icmp:
5022 +       dst_link_failure(skb);
5023 +  tx_error:
5024 +       kfree_skb(skb);
5025 +       return 0;
5026 +}
5027 +
5028 +
5029 +/*
5030 + *     Initialize IP virtual server
5031 + */
5032 +__initfunc(int ip_vs_init(void))
5033 +{
5034 +       int idx;
5035 +
5036 +       /*
5037 +        * Allocate the ip_vs_table and initialize its list head.
5038 +        * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
5039 +        * ip_vs_schedulers and ip_vs_dest_trash.
5040 +        */
5041 +       if (!(ip_vs_table =
5042 +             vmalloc(IP_VS_TAB_SIZE*sizeof(struct list_head)))) {
5043 +               return -ENOMEM;
5044 +       }
5045 +       for(idx = 0; idx < IP_VS_TAB_SIZE; idx++)  {
5046 +               INIT_LIST_HEAD(&ip_vs_table[idx]);
5047 +       }
5048 +       IP_VS_INFO("Connection hash table configured "
5049 +                  "(size=%d, memory=%ldKbytes)\n",
5050 +                  IP_VS_TAB_SIZE,
5051 +                  (long) (IP_VS_TAB_SIZE*sizeof(struct list_head))/1024);
5052 +
5053 +       for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
5054 +               INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
5055 +               INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
5056 +       }
5057 +       for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
5058 +               INIT_LIST_HEAD(&ip_vs_rtable[idx]);
5059 +       }
5060 +       INIT_LIST_HEAD(&ip_vs_schedulers);
5061 +       INIT_LIST_HEAD(&ip_vs_dest_trash);
5062 +
5063 +       /*
5064 +        * Hook the slow_timer handler in the system timer.
5065 +        */
5066 +       slow_timer.expires = jiffies+SLTIMER_PERIOD;
5067 +       add_timer(&slow_timer);
5068 +
5069 +#ifdef CONFIG_PROC_FS
5070 +       ip_masq_proc_register(&ip_vs_proc_entry);
5071 +       ip_masq_proc_register(&ip_vs_stat_proc_entry);
5072 +#endif
5073 +
5074 +#ifdef CONFIG_IP_MASQUERADE_VS_RR
5075 +       ip_vs_rr_init();
5076 +#endif
5077 +#ifdef CONFIG_IP_MASQUERADE_VS_WRR
5078 +       ip_vs_wrr_init();
5079 +#endif
5080 +#ifdef CONFIG_IP_MASQUERADE_VS_LC
5081 +       ip_vs_lc_init();
5082 +#endif
5083 +#ifdef CONFIG_IP_MASQUERADE_VS_WLC
5084 +       ip_vs_wlc_init();
5085 +#endif
5086 +#ifdef CONFIG_IP_MASQUERADE_VS_LBLC
5087 +       ip_vs_lblc_init();
5088 +#endif
5089 +#ifdef CONFIG_IP_MASQUERADE_VS_LBLCR
5090 +       ip_vs_lblcr_init();
5091 +#endif
5092 +       return 0;
5093 +}
5094 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_vs_lblc.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_lblc.c
5095 --- linux-2.2.19/net/ipv4/ip_vs_lblc.c  Thu Jan  1 08:00:00 1970
5096 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_lblc.c Fri Feb  2 18:49:08 2001
5097 @@ -0,0 +1,645 @@
5098 +/*
5099 + * IPVS:        Locality-Based Least-Connection scheduling module
5100 + *
5101 + * Version:     $Id$
5102 + *
5103 + * Authors:     Wensong Zhang <wensong@gnuchina.org>
5104 + *
5105 + *              This program is free software; you can redistribute it and/or
5106 + *              modify it under the terms of the GNU General Public License
5107 + *              as published by the Free Software Foundation; either version
5108 + *              2 of the License, or (at your option) any later version.
5109 + *
5110 + * Changes:
5111 + *     Martin Hamilton         :    fixed the terrible locking bugs
5112 + *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
5113 + *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
5114 + *     Wensong Zhang           :    added doing full expiration check to
5115 + *                                   collect stale entries of 24+ hours when
5116 + *                                   no partial expire check in a half hour
5117 + *
5118 + */
5119 +
5120 +/*
5121 + * The lblc algorithm is as follows (pseudo code):
5122 + *
5123 + *       if cachenode[dest_ip] is null then
5124 + *               n, cachenode[dest_ip] <- {weighted least-conn node};
5125 + *       else
5126 + *               n <- cachenode[dest_ip];
5127 + *               if (n is dead) OR 
5128 + *                  (n.conns>n.weight AND 
5129 + *                   there is a node m with m.conns<m.weight/2) then
5130 + *                 n, cachenode[dest_ip] <- {weighted least-conn node};
5131 + *
5132 + *       return n;
5133 + *
5134 + * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
5135 + * me to write this module.
5136 + */
5137 +
5138 +#include <linux/config.h>
5139 +#include <linux/module.h>
5140 +#ifdef CONFIG_KMOD
5141 +#include <linux/kmod.h>
5142 +#endif
5143 +#include <linux/types.h>
5144 +#include <linux/kernel.h>
5145 +#include <linux/errno.h>
5146 +#include <linux/vmalloc.h>
5147 +#include <net/ip_masq.h>
5148 +#ifdef CONFIG_IP_MASQUERADE_MOD
5149 +#include <net/ip_masq_mod.h>
5150 +#endif
5151 +#include <linux/sysctl.h>
5152 +#include <linux/proc_fs.h>
5153 +#include <linux/ip_fw.h>
5154 +#include <net/ip_vs.h>
5155 +
5156 +
5157 +/*
5158 + *    It is for garbage collection of stale IPVS lblc entries,
5159 + *    when the table is full.
5160 + */
5161 +#define CHECK_EXPIRE_INTERVAL   (60*HZ)
5162 +#define ENTRY_TIMEOUT           (5*60*HZ)
5163 +
5164 +/*
5165 + *    It is for full expiration check.
5166 + *    When there is no partial expiration check (garbage collection)
5167 + *    in a half hour, do a full expiration check to collect stale
5168 + *    entries that haven't been touched for a day (by default).
5169 + */
5170 +#define COUNT_FOR_FULL_EXPIRATION   30
5171 +int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
5172 +
5173 +
5174 +/*
5175 + *     for IPVS lblc entry hash table
5176 + */
5177 +#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
5178 +#define CONFIG_IP_VS_LBLC_TAB_BITS      10
5179 +#endif
5180 +#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
5181 +#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
5182 +#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
5183 +
5184 +
5185 +/*
5186 + *      IPVS lblc entry represents an association between destination
5187 + *      IP address and its destination server
5188 + */
5189 +struct ip_vs_lblc_entry {
5190 +        struct list_head        list;
5191 +        __u32                   addr;           /* destination IP address */
5192 +        struct ip_vs_dest       *dest;          /* real server (cache) */
5193 +        unsigned long           lastuse;        /* last used time */
5194 +};
5195 +
5196 +
5197 +/*
5198 + *      IPVS lblc hash table
5199 + */
5200 +struct ip_vs_lblc_table {
5201 +        rwlock_t               lock;           /* lock for this table */
5202 +        struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
5203 +        atomic_t                entries;        /* number of entries */
5204 +        int                     max_size;       /* maximum size of entries */
5205 +        struct timer_list       periodic_timer; /* collect stale entries */
5206 +        int                     rover;          /* rover for expire check */
5207 +        int                     counter;        /* counter for no expire */
5208 +};
5209 +
5210 +
5211 +
5212 +/*
5213 + *      IPVS LBLC sysctl table
5214 + */
5215 +struct ip_vs_lblc_sysctl_table {
5216 +       struct ctl_table_header *sysctl_header;
5217 +       ctl_table vs_vars[2];
5218 +       ctl_table vs_dir[2];
5219 +       ctl_table ipv4_dir[2];
5220 +       ctl_table root_dir[2];
5221 +};
5222 +
5223 +
5224 +static struct ip_vs_lblc_sysctl_table lblc_sysctl_table = {
5225 +        NULL,
5226 +       {{NET_IPV4_VS_LBLC_EXPIRE, "lblc_expiration",
5227 +          &sysctl_ip_vs_lblc_expiration,
5228 +          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
5229 +         {0}},
5230 +        {{NET_IPV4_VS, "vs", NULL, 0, 0555, lblc_sysctl_table.vs_vars},
5231 +         {0}},
5232 +        {{NET_IPV4, "ipv4", NULL, 0, 0555, lblc_sysctl_table.vs_dir},
5233 +         {0}},
5234 +        {{CTL_NET, "net", NULL, 0, 0555, lblc_sysctl_table.ipv4_dir},
5235 +         {0}}
5236 +};
5237 +
5238 +
5239 +/*
5240 + *      new/free a ip_vs_lblc_entry, which is a mapping of a destination
5241 + *      IP address to a server.
5242 + */
5243 +static inline struct ip_vs_lblc_entry *
5244 +ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest)
5245 +{
5246 +        struct ip_vs_lblc_entry *en;
5247 +
5248 +        en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
5249 +        if (en == NULL) {
5250 +                IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
5251 +               return NULL;
5252 +        }
5253 +        
5254 +        INIT_LIST_HEAD(&en->list);
5255 +        en->addr = daddr;
5256 +
5257 +        atomic_inc(&dest->refcnt);
5258 +        en->dest = dest;
5259 +        
5260 +        return en;
5261 +}
5262 +
5263 +
5264 +static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
5265 +{
5266 +        list_del(&en->list);
5267 +        atomic_dec(&en->dest->refcnt);
5268 +        kfree(en);
5269 +}
5270 +
5271 +                
5272 +/*
5273 + *     Returns hash value for IPVS LBLC entry
5274 + */
5275 +static inline unsigned ip_vs_lblc_hashkey(__u32 addr)
5276 +{
5277 +        return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
5278 +}
5279 +
5280 +
5281 +/*
5282 + *     Hash an entry in the ip_vs_lblc_table.
5283 + *     returns bool success.
5284 + */
5285 +static int
5286 +ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
5287 +{
5288 +        unsigned hash;
5289 +
5290 +        if (!list_empty(&en->list)) {
5291 +                IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
5292 +                          "called from %p\n", __builtin_return_address(0));
5293 +                return 0;
5294 +        }
5295 +
5296 +        /*
5297 +         *     Hash by destination IP address
5298 +         */
5299 +        hash = ip_vs_lblc_hashkey(en->addr);
5300 +
5301 +        write_lock(&tbl->lock);
5302 +        list_add(&en->list, &tbl->bucket[hash]);
5303 +        atomic_inc(&tbl->entries);
5304 +        write_unlock(&tbl->lock);
5305 +        
5306 +        return 1;
5307 +}
5308 +
5309 +
5310 +#if 0000
5311 +/*
5312 + *     Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
5313 + *     returns bool success.
5314 + */
5315 +static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
5316 +                             struct ip_vs_lblc_entry *en)
5317 +{
5318 +        if (list_empty(&en->list)) {
5319 +                IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
5320 +                          "called from %p\n", __builtin_return_address(0));
5321 +                return 0;
5322 +        }
5323 +
5324 +        /*
5325 +         * Remove it from the table
5326 +         */
5327 +        write_lock(&tbl->lock);
5328 +        list_del(&en->list);
5329 +        INIT_LIST_HEAD(&en->list);
5330 +        write_unlock(&tbl->lock);
5331 +
5332 +        return 1;
5333 +}
5334 +#endif
5335 +
5336 +
5337 +/*
5338 + *  Get ip_vs_lblc_entry associated with supplied parameters.
5339 + */
5340 +static inline struct ip_vs_lblc_entry *
5341 +ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr)
5342 +{
5343 +        unsigned hash;
5344 +        struct ip_vs_lblc_entry *en;
5345 +        struct list_head *l,*e;
5346 +
5347 +        hash = ip_vs_lblc_hashkey(addr);
5348 +
5349 +        read_lock(&tbl->lock);
5350 +        
5351 +        l = &tbl->bucket[hash];
5352 +        for (e=l->next; e!=l; e=e->next) {
5353 +                en = list_entry(e, struct ip_vs_lblc_entry, list);
5354 +                if (en->addr == addr) {
5355 +                        /* HIT */
5356 +                        read_unlock(&tbl->lock);
5357 +                        return en;
5358 +                }
5359 +        }
5360 +
5361 +        read_unlock(&tbl->lock);
5362 +
5363 +        return NULL;
5364 +}
5365 +
5366 +
5367 +/*
5368 + *      Flush all the entries of the specified table.
5369 + */
5370 +static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
5371 +{
5372 +        int i;
5373 +        struct list_head *l;
5374 +        struct ip_vs_lblc_entry *en;
5375 +        
5376 +        for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
5377 +                write_lock(&tbl->lock);
5378 +                for (l=&tbl->bucket[i]; l->next!=l; ) {
5379 +                        en = list_entry(l->next,
5380 +                                        struct ip_vs_lblc_entry, list);
5381 +                        ip_vs_lblc_free(en);
5382 +                        atomic_dec(&tbl->entries);
5383 +                }
5384 +                write_unlock(&tbl->lock);
5385 +        }
5386 +}
5387 +
5388 +
5389 +static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
5390 +{
5391 +        unsigned long now = jiffies;
5392 +        int i, j;
5393 +        struct list_head *l, *e;
5394 +        struct ip_vs_lblc_entry *en;
5395 +
5396 +        for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
5397 +                j = (j + 1) & IP_VS_LBLC_TAB_MASK;
5398 +                e = l = &tbl->bucket[j];
5399 +                write_lock(&tbl->lock);
5400 +                while (e->next != l) {
5401 +                        en = list_entry(e->next,
5402 +                                        struct ip_vs_lblc_entry, list);
5403 +                        if ((now - en->lastuse) <
5404 +                            sysctl_ip_vs_lblc_expiration) {
5405 +                                e = e->next;
5406 +                                continue;
5407 +                        }
5408 +                        ip_vs_lblc_free(en);
5409 +                        atomic_dec(&tbl->entries);
5410 +                }
5411 +                write_unlock(&tbl->lock);
5412 +        }
5413 +        tbl->rover = j;
5414 +}
5415 +
5416 +
5417 +/*
5418 + *      Periodical timer handler for IPVS lblc table
5419 + *      It is used to collect stale entries when the number of entries
5420 + *      exceeds the maximum size of the table.
5421 + *
5422 + *      Fixme: we probably need more complicated algorithm to collect
5423 + *             entries that have not been used for a long time even
5424 + *             if the number of entries doesn't exceed the maximum size
5425 + *             of the table.
5426 + *      The full expiration check is for this purpose now.
5427 + */
5428 +static void ip_vs_lblc_check_expire(unsigned long data)
5429 +{
5430 +        struct ip_vs_lblc_table *tbl;
5431 +        unsigned long now = jiffies;
5432 +        int goal;
5433 +        int i, j;
5434 +        struct list_head *l, *e;
5435 +        struct ip_vs_lblc_entry *en;
5436 +        
5437 +        tbl = (struct ip_vs_lblc_table *)data;
5438 +
5439 +        if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
5440 +                /* do full expiration check */
5441 +                ip_vs_lblc_full_check(tbl);
5442 +                tbl->counter = 1;
5443 +                goto out;
5444 +        }
5445 +
5446 +        if (atomic_read(&tbl->entries) < tbl->max_size) {
5447 +                tbl->counter++;
5448 +                goto out;
5449 +        }
5450 +
5451 +        goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
5452 +        if (goal > tbl->max_size/2)
5453 +                goal = tbl->max_size/2;
5454 +
5455 +        for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
5456 +                j = (j + 1) & IP_VS_LBLC_TAB_MASK;
5457 +                e = l = &tbl->bucket[j];
5458 +                write_lock(&tbl->lock);
5459 +                while (e->next != l) {
5460 +                        en = list_entry(e->next,
5461 +                                        struct ip_vs_lblc_entry, list);
5462 +                        if ((now - en->lastuse) < ENTRY_TIMEOUT) {
5463 +                                e = e->next;
5464 +                                continue;
5465 +                        }
5466 +                        ip_vs_lblc_free(en);
5467 +                        atomic_dec(&tbl->entries);
5468 +                        goal--;
5469 +                }
5470 +                write_unlock(&tbl->lock);
5471 +                if (goal <= 0)
5472 +                        break;
5473 +        }
5474 +        tbl->rover = j;
5475 +        
5476 +  out:
5477 +        mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
5478 +}
5479 +
5480 +
5481 +static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
5482 +{
5483 +       int i;
5484 +        struct ip_vs_lblc_table *tbl;
5485 +        
5486 +        /*
5487 +         *    Allocate the ip_vs_lblc_table for this service
5488 +         */
5489 +        tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
5490 +        if (tbl == NULL) {
5491 +                IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
5492 +               return -ENOMEM;
5493 +        }
5494 +        svc->sched_data = tbl;
5495 +        IP_VS_DBG(0, "LBLC hash table (memory=%dbytes) allocated for "
5496 +                  "current service\n",
5497 +                  sizeof(struct ip_vs_lblc_table));
5498 +        
5499 +        /*
5500 +         *    Initialize the hash buckets
5501 +         */
5502 +        for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
5503 +                INIT_LIST_HEAD(&tbl->bucket[i]);
5504 +        }
5505 +        tbl->lock = RW_LOCK_UNLOCKED;
5506 +        tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
5507 +        tbl->rover = 0;
5508 +        tbl->counter = 1;
5509 +
5510 +        /*
5511 +         *    Hook periodic timer for garbage collection
5512 +         */
5513 +       init_timer(&tbl->periodic_timer);
5514 +        tbl->periodic_timer.data = (unsigned long)tbl;
5515 +        tbl->periodic_timer.function = ip_vs_lblc_check_expire;
5516 +        tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
5517 +        add_timer(&tbl->periodic_timer);
5518 +        
5519 +        MOD_INC_USE_COUNT;
5520 +        return 0;
5521 +}
5522 +
5523 +
5524 +static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
5525 +{
5526 +        struct ip_vs_lblc_table *tbl = svc->sched_data;
5527 +
5528 +        /* remove periodic timer */
5529 +        del_timer(&tbl->periodic_timer);
5530 +
5531 +        /* got to clean up table entries here */
5532 +        ip_vs_lblc_flush(tbl);
5533 +
5534 +        /* release the table itself */
5535 +        kfree(svc->sched_data);
5536 +        IP_VS_DBG(0, "LBLC hash table (memory=%dbytes) released\n",
5537 +                  sizeof(struct ip_vs_lblc_table));
5538 +
5539 +        MOD_DEC_USE_COUNT;
5540 +        return 0;
5541 +}
5542 +
5543 +
5544 +static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
5545 +{
5546 +        return 0;
5547 +}
5548 +
5549 +
5550 +static inline struct ip_vs_dest *
5551 +__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
5552 +{
5553 +        register struct list_head *l, *e;
5554 +        struct ip_vs_dest *dest, *least;
5555 +        int loh, doh;
5556 +
5557 +        l = &svc->destinations;
5558 +        if (l == l->next)
5559 +                return NULL;
5560 +
5561 +        /*
5562 +         * We think the overhead of processing active connections is fifty
5563 +         * times than that of inactive conncetions in average. (This fifty
5564 +         * times might be not accurate, we will change it later.) We use
5565 +         * the following formula to estimate the overhead:
5566 +         *                dest->activeconns*50 + dest->inactconns
5567 +         * and the load:
5568 +         *                (dest overhead) / dest->weight
5569 +         *
5570 +         * Remember -- no floats in kernel mode!!!
5571 +         * The comparison of h1*w2 > h2*w1 is equivalent to that of
5572 +         *                h1/w1 > h2/w2
5573 +         * if every weight is larger than zero.
5574 +         *
5575 +         * The server with weight=0 is quiesced and will not receive any
5576 +         * new connection.
5577 +         */
5578 +
5579 +        for (e=l->next; e!=l; e=e->next) {
5580 +                least = list_entry(e, struct ip_vs_dest, n_list);
5581 +                if (least->weight > 0) {
5582 +                        loh = atomic_read(&least->activeconns) * 50
5583 +                                + atomic_read(&least->inactconns);
5584 +                        goto nextstage;
5585 +                }
5586 +        }
5587 +        return NULL;
5588 +        
5589 +        /*
5590 +         *    Find the destination with the least load.
5591 +         */
5592 +  nextstage:
5593 +        for (e=e->next; e!=l; e=e->next)
5594 +        {
5595 +                dest = list_entry(e, struct ip_vs_dest, n_list);
5596 +                doh = atomic_read(&dest->activeconns) * 50
5597 +                        + atomic_read(&dest->inactconns);
5598 +                if (loh * dest->weight > doh * least->weight)
5599 +                {
5600 +                        least = dest;
5601 +                        loh = doh;
5602 +                }
5603 +        }
5604 +
5605 +        IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
5606 +                  "activeconns %d refcnt %d weight %d overhead %d\n",
5607 +                  NIPQUAD(least->addr), ntohs(least->port),
5608 +                  atomic_read(&least->activeconns),
5609 +                  atomic_read(&least->refcnt), least->weight, loh);
5610 +
5611 +        return least;
5612 +}
5613 +
5614 +
5615 +/*
5616 + *   If this destination server is overloaded and there is a less loaded
5617 + *   server, then return true.
5618 + */
5619 +static inline int
5620 +is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
5621 +{
5622 +        if (atomic_read(&dest->activeconns) > dest->weight) {
5623 +                register struct list_head *l, *e;
5624 +                struct ip_vs_dest *d;
5625 +        
5626 +                l = &svc->destinations;
5627 +                for (e=l->next; e!=l; e=e->next) {
5628 +                        d = list_entry(e, struct ip_vs_dest, n_list);
5629 +                        if (atomic_read(&d->activeconns)*2 < d->weight) {
5630 +                                return 1;
5631 +                        }
5632 +                }
5633 +        }
5634 +        return 0;
5635 +}
5636 +
5637 +
5638 +/*
5639 + *    Locality-Based (weighted) Least-Connection scheduling
5640 + */
5641 +static struct ip_vs_dest *
5642 +ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
5643 +{
5644 +        struct ip_vs_dest *dest;
5645 +        struct ip_vs_lblc_table *tbl;
5646 +        struct ip_vs_lblc_entry *en;
5647 +
5648 +        IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
5649 +
5650 +        tbl = (struct ip_vs_lblc_table *)svc->sched_data;
5651 +        en = ip_vs_lblc_get(tbl, iph->daddr);
5652 +        if (en == NULL) {
5653 +                dest = __ip_vs_wlc_schedule(svc, iph);
5654 +                if (dest == NULL) {
5655 +                        IP_VS_DBG(1, "no destination available\n");
5656 +                        return NULL;
5657 +                }
5658 +                en = ip_vs_lblc_new(iph->daddr, dest);
5659 +                if (en == NULL) {
5660 +                        return NULL;
5661 +                }
5662 +                ip_vs_lblc_hash(tbl, en);
5663 +        } else {
5664 +                dest = en->dest;
5665 +                if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
5666 +                   || dest->weight <= 0
5667 +                    || is_overloaded(dest, svc)) {
5668 +                        dest = __ip_vs_wlc_schedule(svc, iph);
5669 +                        if (dest == NULL) {
5670 +                                IP_VS_DBG(1, "no destination available\n");
5671 +                                return NULL;
5672 +                        }
5673 +                        atomic_dec(&en->dest->refcnt);
5674 +                        atomic_inc(&dest->refcnt);
5675 +                        en->dest = dest;
5676 +                }
5677 +        }
5678 +        en->lastuse = jiffies;
5679 +
5680 +        IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
5681 +                  "--> server %u.%u.%u.%u:%d\n",
5682 +                  NIPQUAD(en->addr),
5683 +                  NIPQUAD(dest->addr),
5684 +                  ntohs(dest->port));
5685 +                        
5686 +        return dest;
5687 +}
5688 +
5689 +
5690 +static struct ip_vs_scheduler ip_vs_lblc_scheduler =
5691 +{
5692 +        {0},                    /* n_list */
5693 +        "lblc",                 /* name */
5694 +        ATOMIC_INIT(0),         /* refcnt */
5695 +        ip_vs_lblc_init_svc,    /* service initializer */
5696 +        ip_vs_lblc_done_svc,    /* service done */
5697 +        ip_vs_lblc_update_svc,  /* service updater */
5698 +        ip_vs_lblc_schedule,    /* select a server from the destination list */
5699 +};
5700 +
5701 +
5702 +__initfunc(int ip_vs_lblc_init(void))
5703 +{
5704 +        IP_VS_INFO("Initializing LBLC scheduling\n");
5705 +        INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
5706 +        lblc_sysctl_table.sysctl_header =
5707 +                register_sysctl_table(lblc_sysctl_table.root_dir, 0);
5708 +        return register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
5709 +}
5710 +
5711 +
5712 +#ifdef MODULE
5713 +EXPORT_NO_SYMBOLS;
5714 +
5715 +int init_module(void)
5716 +{
5717 +        INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
5718 +
5719 +        /* module initialization by 'request_module' */
5720 +        if (register_ip_vs_scheduler(&ip_vs_lblc_scheduler) != 0)
5721 +                return -EIO;
5722 +
5723 +        lblc_sysctl_table.sysctl_header =
5724 +                register_sysctl_table(lblc_sysctl_table.root_dir, 0);
5725 +
5726 +        IP_VS_INFO("LBLC scheduling module loaded.\n");
5727 +
5728 +        return 0;
5729 +}
5730 +
5731 +void cleanup_module(void)
5732 +{
5733 +        /* module cleanup by 'release_module' */
5734 +        if (unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler) != 0) {
5735 +                IP_VS_INFO("cannot remove LBLC scheduling module\n");
5736 +         } else {
5737 +                IP_VS_INFO("LBLC scheduling module unloaded.\n");
5738 +        }
5739 +        unregister_sysctl_table(lblc_sysctl_table.sysctl_header);
5740 +}
5741 +
5742 +#endif /* MODULE */
5743 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_vs_lblcr.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_lblcr.c
5744 --- linux-2.2.19/net/ipv4/ip_vs_lblcr.c Thu Jan  1 08:00:00 1970
5745 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_lblcr.c        Tue Mar 27 17:37:00 2001
5746 @@ -0,0 +1,834 @@
5747 +/*
5748 + * IPVS:        Locality-Based Least-Connection with Replication scheduler
5749 + *
5750 + * Version:     $Id$
5751 + *
5752 + * Authors:     Wensong Zhang <wensong@gnuchina.org>
5753 + *
5754 + *              This program is free software; you can redistribute it and/or
5755 + *              modify it under the terms of the GNU General Public License
5756 + *              as published by the Free Software Foundation; either version
5757 + *              2 of the License, or (at your option) any later version.
5758 + *
5759 + * Changes:
5760 + *     Julian Anastasov        :    Added the missing (dest->weight>0)
5761 + *                                  condition in the ip_vs_dest_set_max.
5762 + *
5763 + */
5764 +
5765 +/*
5766 + * The lblc/r algorithm is as follows (pseudo code):
5767 + *
5768 + *       if serverSet[dest_ip] is null then
5769 + *               n, serverSet[dest_ip] <- {weighted least-conn node};
5770 + *       else
5771 + *               n <- {least-conn (alive) node in serverSet[dest_ip]};
5772 + *               if (n is null) OR
5773 + *                  (n.conns>n.weight AND 
5774 + *                   there is a node m with m.conns<m.weight/2) then
5775 + *                   n <- {weighted least-conn node};
5776 + *                   add n to serverSet[dest_ip];
5777 + *               if |serverSet[dest_ip]| > 1 AND
5778 + *                   now - serverSet[dest_ip].lastMod > T then
5779 + *                   m <- {most conn node in serverSet[dest_ip]};
5780 + *                   remove m from serverSet[dest_ip];
5781 + *       if serverSet[dest_ip] changed then
5782 + *               serverSet[dest_ip].lastMod <- now;
5783 + *
5784 + *       return n;
5785 + *
5786 + */
5787 +
5788 +#include <linux/config.h>
5789 +#include <linux/module.h>
5790 +#ifdef CONFIG_KMOD
5791 +#include <linux/kmod.h>
5792 +#endif
5793 +#include <linux/types.h>
5794 +#include <linux/kernel.h>
5795 +#include <linux/errno.h>
5796 +#include <linux/vmalloc.h>
5797 +#include <net/ip_masq.h>
5798 +#ifdef CONFIG_IP_MASQUERADE_MOD
5799 +#include <net/ip_masq_mod.h>
5800 +#endif
5801 +#include <linux/sysctl.h>
5802 +#include <linux/proc_fs.h>
5803 +#include <linux/ip_fw.h>
5804 +#include <net/ip_vs.h>
5805 +
5806 +
5807 +/*
5808 + *    It is for garbage collection of stale IPVS lblcr entries,
5809 + *    when the table is full.
5810 + */
5811 +#define CHECK_EXPIRE_INTERVAL   (60*HZ)
5812 +#define ENTRY_TIMEOUT           (6*60*HZ)
5813 +
5814 +/*
5815 + *    It is for full expiration check.
5816 + *    When there is no partial expiration check (garbage collection)
5817 + *    in a half hour, do a full expiration check to collect stale
5818 + *    entries that haven't been touched for a day.
5819 + */
5820 +#define COUNT_FOR_FULL_EXPIRATION   30
5821 +int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
5822 +
5823 +
5824 +/*
5825 + *     for IPVS lblcr entry hash table
5826 + */
5827 +#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
5828 +#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
5829 +#endif
5830 +#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
5831 +#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
5832 +#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
5833 +
5834 +
5835 +/*
5836 + *      IPVS destination set structure and operations
5837 + */
5838 +struct ip_vs_dest_list {
5839 +        struct ip_vs_dest_list  *next;          /* list link */
5840 +        struct ip_vs_dest       *dest;          /* destination server */
5841 +};
5842 +
5843 +struct ip_vs_dest_set {
5844 +        atomic_t                size;           /* set size */
5845 +        unsigned long           lastmod;        /* last modified time */
5846 +        struct ip_vs_dest_list  *list;          /* destination list */
5847 +        rwlock_t               lock;           /* lock for this list */
5848 +};
5849 +
5850 +
5851 +static struct ip_vs_dest_list *
5852 +ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
5853 +{
5854 +        struct ip_vs_dest_list *e;
5855 +
5856 +        for (e=set->list; e!=NULL; e=e->next) {
5857 +                if (e->dest == dest)
5858 +                        /* already existed */
5859 +                        return NULL;
5860 +        }
5861 +
5862 +        e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
5863 +        if (e == NULL) {
5864 +                IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
5865 +               return NULL;
5866 +        }
5867 +
5868 +        atomic_inc(&dest->refcnt);
5869 +        e->dest = dest;
5870 +
5871 +        /* link it to the list */
5872 +        write_lock(&set->lock);
5873 +        if (set->list != NULL) {
5874 +                e->next = set->list->next;
5875 +                set->list = e;
5876 +        } else {
5877 +                e->next = NULL;
5878 +                set->list = e;
5879 +        }
5880 +        write_unlock(&set->lock);
5881 +
5882 +        atomic_inc(&set->size);
5883 +        set->lastmod = jiffies;
5884 +        return e;
5885 +}
5886 +
5887 +static void
5888 +ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
5889 +{
5890 +        struct ip_vs_dest_list *e, **ep;
5891 +
5892 +        write_lock(&set->lock);
5893 +        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
5894 +                if (e->dest == dest) {
5895 +                        /* HIT */
5896 +                        *ep = e->next;
5897 +                        atomic_dec(&set->size);
5898 +                        set->lastmod = jiffies;
5899 +                        atomic_dec(&e->dest->refcnt);
5900 +                        kfree(e);
5901 +                        break;
5902 +                }
5903 +                ep = &e->next;
5904 +        }
5905 +        write_unlock(&set->lock);
5906 +}
5907 +
5908 +static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
5909 +{
5910 +        struct ip_vs_dest_list *e, **ep;
5911 +
5912 +        write_lock(&set->lock);
5913 +        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
5914 +                *ep = e->next;
5915 +                /*
5916 +                 * We don't kfree dest because it is refered either
5917 +                 * by its service or by the trash dest list.
5918 +                 */
5919 +                atomic_dec(&e->dest->refcnt);
5920 +                kfree(e);
5921 +        }
5922 +        write_unlock(&set->lock);
5923 +}
5924 +
5925 +/* get weighted least-connection node in the destination set */
5926 +static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
5927 +{
5928 +        register struct ip_vs_dest_list *e;
5929 +        struct ip_vs_dest *dest, *least;
5930 +        int loh, doh;
5931 +
5932 +        if (set == NULL)
5933 +                return NULL;
5934 +
5935 +        read_lock(&set->lock);
5936 +        /* select the first destination server, whose weight > 0 */
5937 +        for (e=set->list; e!=NULL; e=e->next) {
5938 +                least = e->dest;
5939 +                if ((least->weight > 0)
5940 +                    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
5941 +                        loh = atomic_read(&least->activeconns) * 50
5942 +                                + atomic_read(&least->inactconns);
5943 +                        goto nextstage;
5944 +                }
5945 +        }
5946 +        read_unlock(&set->lock);
5947 +        return NULL;
5948 +        
5949 +        /* find the destination with the weighted least load */
5950 +  nextstage:
5951 +        for (e=e->next; e!=NULL; e=e->next) {
5952 +                dest = e->dest;
5953 +                doh = atomic_read(&dest->activeconns) * 50
5954 +                        + atomic_read(&dest->inactconns);
5955 +                if ((loh*dest->weight > doh*least->weight)
5956 +                    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
5957 +                        least = dest;
5958 +                        loh = doh;
5959 +                }
5960 +        }
5961 +        read_unlock(&set->lock);
5962 +
5963 +        IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
5964 +                  "activeconns %d refcnt %d weight %d overhead %d\n",
5965 +                  NIPQUAD(least->addr), ntohs(least->port),
5966 +                  atomic_read(&least->activeconns),
5967 +                  atomic_read(&least->refcnt), least->weight, loh);
5968 +        return least;
5969 +}
5970 +
5971 +
5972 +/* get weighted most-connection node in the destination set */
5973 +static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
5974 +{
5975 +        register struct ip_vs_dest_list *e;
5976 +        struct ip_vs_dest *dest, *most;
5977 +        int moh, doh;
5978 +
5979 +        if (set == NULL)
5980 +                return NULL;
5981 +
5982 +        read_lock(&set->lock);
5983 +        /* select the first destination server, whose weight > 0 */
5984 +        for (e=set->list; e!=NULL; e=e->next) {
5985 +                most = e->dest;
5986 +                if (most->weight > 0) {
5987 +                        moh = atomic_read(&most->activeconns) * 50
5988 +                                + atomic_read(&most->inactconns);
5989 +                        goto nextstage;
5990 +                }
5991 +        }
5992 +        read_unlock(&set->lock);
5993 +        return NULL;
5994 +        
5995 +        /* find the destination with the weighted most load */
5996 +  nextstage:
5997 +        for (e=e->next; e!=NULL; e=e->next) {
5998 +                dest = e->dest;
5999 +                doh = atomic_read(&dest->activeconns) * 50
6000 +                        + atomic_read(&dest->inactconns);
6001 +                /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 
6002 +                if (moh*dest->weight < doh*most->weight
6003 +                    && dest->weight > 0) {
6004 +                        most = dest;
6005 +                        moh = doh;
6006 +                }
6007 +        }
6008 +        read_unlock(&set->lock);
6009 +
6010 +        IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
6011 +                  "activeconns %d refcnt %d weight %d overhead %d\n",
6012 +                  NIPQUAD(most->addr), ntohs(most->port),
6013 +                  atomic_read(&most->activeconns),
6014 +                  atomic_read(&most->refcnt), most->weight, moh);
6015 +        return most;
6016 +}
6017 +
6018 +
6019 +/*
6020 + *      IPVS lblcr entry represents an association between destination
6021 + *      IP address and its destination server set
6022 + */
6023 +struct ip_vs_lblcr_entry {
6024 +        struct list_head        list;
6025 +        __u32                   addr;           /* destination IP address */
6026 +        struct ip_vs_dest_set   set;            /* destination server set */
6027 +        unsigned long           lastuse;        /* last used time */
6028 +};
6029 +
6030 +
6031 +/*
6032 + *      IPVS lblcr hash table
6033 + */
6034 +struct ip_vs_lblcr_table {
6035 +        rwlock_t               lock;           /* lock for this table */
6036 +        struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
6037 +        atomic_t                entries;        /* number of entries */
6038 +        int                     max_size;       /* maximum size of entries */
6039 +        struct timer_list       periodic_timer; /* collect stale entries */
6040 +        int                     rover;          /* rover for expire check */
6041 +        int                     counter;        /* counter for no expire */
6042 +};
6043 +
6044 +
6045 +/*
6046 + *      IPVS LBLCR sysctl table
6047 + */
6048 +struct ip_vs_lblcr_sysctl_table {
6049 +       struct ctl_table_header *sysctl_header;
6050 +       ctl_table vs_vars[2];
6051 +       ctl_table vs_dir[2];
6052 +       ctl_table ipv4_dir[2];
6053 +       ctl_table root_dir[2];
6054 +};
6055 +
6056 +
6057 +static struct ip_vs_lblcr_sysctl_table lblcr_sysctl_table = {
6058 +        NULL,
6059 +       {{NET_IPV4_VS_LBLCR_EXPIRE, "lblcr_expiration",
6060 +          &sysctl_ip_vs_lblcr_expiration,
6061 +          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
6062 +         {0}},
6063 +        {{NET_IPV4_VS, "vs", NULL, 0, 0555, lblcr_sysctl_table.vs_vars},
6064 +         {0}},
6065 +        {{NET_IPV4, "ipv4", NULL, 0, 0555, lblcr_sysctl_table.vs_dir},
6066 +         {0}},
6067 +        {{CTL_NET, "net", NULL, 0, 0555, lblcr_sysctl_table.ipv4_dir},
6068 +         {0}}
6069 +};
6070 +
6071 +
6072 +/*
6073 + *      new/free a ip_vs_lblcr_entry, which is a mapping of a destination
6074 + *      IP address to a server.
6075 + */
6076 +static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr)
6077 +{
6078 +        struct ip_vs_lblcr_entry *en;
6079 +
6080 +        en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
6081 +        if (en == NULL) {
6082 +                IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
6083 +               return NULL;
6084 +        }
6085 +        
6086 +        INIT_LIST_HEAD(&en->list);
6087 +        en->addr = daddr;
6088 +
6089 +        /* initilize its dest set */
6090 +        atomic_set(&(en->set.size), 0);
6091 +        en->set.list = NULL;
6092 +        en->set.lock = RW_LOCK_UNLOCKED;
6093 +
6094 +        return en;
6095 +}
6096 +
6097 +
6098 +static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
6099 +{
6100 +        list_del(&en->list);
6101 +        ip_vs_dest_set_eraseall(&en->set);
6102 +        kfree(en);
6103 +}
6104 +
6105 +                
6106 +/*
6107 + *     Returns hash value for IPVS LBLCR entry
6108 + */
6109 +static inline unsigned ip_vs_lblcr_hashkey(__u32 addr)
6110 +{
6111 +        return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
6112 +}
6113 +
6114 +
6115 +/*
6116 + *     Hash an entry in the ip_vs_lblcr_table.
6117 + *     returns bool success.
6118 + */
6119 +static int
6120 +ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
6121 +{
6122 +        unsigned hash;
6123 +
6124 +        if (!list_empty(&en->list)) {
6125 +                IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
6126 +                          "called from %p\n", __builtin_return_address(0));
6127 +                return 0;
6128 +        }
6129 +
6130 +        /*
6131 +         *     Hash by destination IP address
6132 +         */
6133 +        hash = ip_vs_lblcr_hashkey(en->addr);
6134 +
6135 +        write_lock(&tbl->lock);
6136 +        list_add(&en->list, &tbl->bucket[hash]);
6137 +        atomic_inc(&tbl->entries);
6138 +        write_unlock(&tbl->lock);
6139 +        
6140 +        return 1;
6141 +}
6142 +
6143 +
6144 +#if 0000
6145 +/*
6146 + *     Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
6147 + *     returns bool success.
6148 + */
6149 +static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
6150 +                             struct ip_vs_lblcr_entry *en)
6151 +{
6152 +        if (list_empty(&en->list)) {
6153 +                IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
6154 +                          "called from %p\n", __builtin_return_address(0));
6155 +                return 0;
6156 +        }
6157 +
6158 +        /*
6159 +         * Remove it from the table
6160 +         */
6161 +        write_lock(&tbl->lock);
6162 +        list_del(&en->list);
6163 +        INIT_LIST_HEAD(&en->list);
6164 +        write_unlock(&tbl->lock);
6165 +
6166 +        return 1;
6167 +}
6168 +#endif
6169 +
6170 +
6171 +/*
6172 + *  Get ip_vs_lblcr_entry associated with supplied parameters.
6173 + */
6174 +static inline struct ip_vs_lblcr_entry *
6175 +ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __u32 addr)
6176 +{
6177 +        unsigned hash;
6178 +        struct ip_vs_lblcr_entry *en;
6179 +        struct list_head *l,*e;
6180 +
6181 +        hash = ip_vs_lblcr_hashkey(addr);
6182 +        l = &tbl->bucket[hash];
6183 +
6184 +        read_lock(&tbl->lock);
6185 +        
6186 +        for (e=l->next; e!=l; e=e->next) {
6187 +                en = list_entry(e, struct ip_vs_lblcr_entry, list);
6188 +                if (en->addr == addr) {
6189 +                        /* HIT */
6190 +                        read_unlock(&tbl->lock);
6191 +                        return en;
6192 +                }
6193 +        }
6194 +
6195 +        read_unlock(&tbl->lock);
6196 +
6197 +        return NULL;
6198 +}
6199 +
6200 +
6201 +/*
6202 + *      Flush all the entries of the specified table.
6203 + */
6204 +static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
6205 +{
6206 +        int i;
6207 +        struct list_head *l;
6208 +        struct ip_vs_lblcr_entry *en;
6209 +        
6210 +        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
6211 +                write_lock(&tbl->lock);
6212 +                for (l=&tbl->bucket[i]; l->next!=l; ) {
6213 +                        en = list_entry(l->next,
6214 +                                        struct ip_vs_lblcr_entry, list);
6215 +                        ip_vs_lblcr_free(en);
6216 +                        atomic_dec(&tbl->entries);
6217 +                }
6218 +                write_unlock(&tbl->lock);
6219 +        }
6220 +}
6221 +
6222 +
6223 +static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
6224 +{
6225 +        unsigned long now = jiffies;
6226 +        int i, j;
6227 +        struct list_head *l, *e;
6228 +        struct ip_vs_lblcr_entry *en;
6229 +
6230 +        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
6231 +                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
6232 +                e = l = &tbl->bucket[j];
6233 +                write_lock(&tbl->lock);
6234 +                while (e->next != l) {
6235 +                        en = list_entry(e->next,
6236 +                                        struct ip_vs_lblcr_entry, list);
6237 +                        if ((now - en->lastuse) <
6238 +                            sysctl_ip_vs_lblcr_expiration) {
6239 +                                e = e->next;
6240 +                                continue;
6241 +                        }
6242 +                        ip_vs_lblcr_free(en);
6243 +                        atomic_dec(&tbl->entries);
6244 +                }
6245 +                write_unlock(&tbl->lock);
6246 +        }
6247 +        tbl->rover = j;
6248 +}
6249 +
6250 +
6251 +/*
6252 + *      Periodical timer handler for IPVS lblcr table
6253 + *      It is used to collect stale entries when the number of entries
6254 + *      exceeds the maximum size of the table.
6255 + *
6256 + *      Fixme: we probably need more complicated algorithm to collect
6257 + *             entries that have not been used for a long time even
6258 + *             if the number of entries doesn't exceed the maximum size
6259 + *             of the table.
6260 + *      The full expiration check is for this purpose now.
6261 + */
6262 +static void ip_vs_lblcr_check_expire(unsigned long data)
6263 +{
6264 +        struct ip_vs_lblcr_table *tbl;
6265 +        unsigned long now = jiffies;
6266 +        int goal;
6267 +        int i, j;
6268 +        struct list_head *l, *e;
6269 +        struct ip_vs_lblcr_entry *en;
6270 +        
6271 +        tbl = (struct ip_vs_lblcr_table *)data;
6272 +
6273 +        if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
6274 +                /* do full expiration check */
6275 +                ip_vs_lblcr_full_check(tbl);
6276 +                tbl->counter = 1;
6277 +                goto out;
6278 +        }
6279 +                
6280 +        if (atomic_read(&tbl->entries) < tbl->max_size) {
6281 +                tbl->counter++;
6282 +                goto out;
6283 +        }
6284 +
6285 +        goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
6286 +        if (goal > tbl->max_size/2)
6287 +                goal = tbl->max_size/2;
6288 +
6289 +        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
6290 +                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
6291 +                e = l = &tbl->bucket[j];
6292 +                write_lock(&tbl->lock);
6293 +                while (e->next != l) {
6294 +                        en = list_entry(e->next,
6295 +                                        struct ip_vs_lblcr_entry, list);
6296 +                        if ((now - en->lastuse) < ENTRY_TIMEOUT) {
6297 +                                e = e->next;
6298 +                                continue;
6299 +                        }
6300 +                        ip_vs_lblcr_free(en);
6301 +                        atomic_dec(&tbl->entries);
6302 +                        goal--;
6303 +                }
6304 +                write_unlock(&tbl->lock);
6305 +                if (goal <= 0)
6306 +                        break;
6307 +        }
6308 +        tbl->rover = j;
6309 +        
6310 +  out:
6311 +        mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
6312 +}
6313 +
6314 +
6315 +static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
6316 +{
6317 +       int i;
6318 +        struct ip_vs_lblcr_table *tbl;
6319 +        
6320 +        /*
6321 +         *    Allocate the ip_vs_lblcr_table for this service
6322 +         */
6323 +        tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
6324 +        if (tbl == NULL) {
6325 +                IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
6326 +               return -ENOMEM;
6327 +        }
6328 +        svc->sched_data = tbl;
6329 +        IP_VS_DBG(0, "LBLCR hash table (memory=%dbytes) allocated for "
6330 +                  "current service\n",
6331 +                  sizeof(struct ip_vs_lblcr_table));
6332 +        
6333 +        /*
6334 +         *    Initialize the hash buckets
6335 +         */
6336 +        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
6337 +                INIT_LIST_HEAD(&tbl->bucket[i]);
6338 +        }
6339 +        tbl->lock = RW_LOCK_UNLOCKED;
6340 +        tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
6341 +        tbl->rover = 0;
6342 +        tbl->counter = 1;
6343 +
6344 +        /*
6345 +         *    Hook periodic timer for garbage collection
6346 +         */
6347 +       init_timer(&tbl->periodic_timer);
6348 +        tbl->periodic_timer.data = (unsigned long)tbl;
6349 +        tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
6350 +        tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
6351 +        add_timer(&tbl->periodic_timer);
6352 +        
6353 +        MOD_INC_USE_COUNT;
6354 +        return 0;
6355 +}
6356 +
6357 +
6358 +static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
6359 +{
6360 +        struct ip_vs_lblcr_table *tbl = svc->sched_data;
6361 +
6362 +        /* remove periodic timer */
6363 +        del_timer(&tbl->periodic_timer);
6364 +
6365 +        /* got to clean up table entries here */
6366 +        ip_vs_lblcr_flush(tbl);
6367 +
6368 +        /* release the table itself */
6369 +        kfree(svc->sched_data);
6370 +        IP_VS_DBG(0, "LBLCR hash table (memory=%dbytes) released\n",
6371 +                  sizeof(struct ip_vs_lblcr_table));
6372 +
6373 +        MOD_DEC_USE_COUNT;
6374 +        return 0;
6375 +}
6376 +
6377 +
6378 +static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc)
6379 +{
6380 +        return 0;
6381 +}
6382 +
6383 +
6384 +static inline struct ip_vs_dest *
6385 +__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
6386 +{
6387 +        register struct list_head *l, *e;
6388 +        struct ip_vs_dest *dest, *least;
6389 +        int loh, doh;
6390 +
6391 +        l = &svc->destinations;
6392 +        if (l == l->next)
6393 +                return NULL;
6394 +
6395 +        /*
6396 +         * We think the overhead of processing active connections is fifty
6397 +         * times than that of inactive conncetions in average. (This fifty
6398 +         * times might be not accurate, we will change it later.) We use
6399 +         * the following formula to estimate the overhead:
6400 +         *                dest->activeconns*50 + dest->inactconns
6401 +         * and the load:
6402 +         *                (dest overhead) / dest->weight
6403 +         *
6404 +         * Remember -- no floats in kernel mode!!!
6405 +         * The comparison of h1*w2 > h2*w1 is equivalent to that of
6406 +         *                h1/w1 > h2/w2
6407 +         * if every weight is larger than zero.
6408 +         *
6409 +         * The server with weight=0 is quiesced and will not receive any
6410 +         * new connection.
6411 +         */
6412 +
6413 +        for (e=l->next; e!=l; e=e->next) {
6414 +                least = list_entry(e, struct ip_vs_dest, n_list);
6415 +                if (least->weight > 0) {
6416 +                        loh = atomic_read(&least->activeconns) * 50
6417 +                                + atomic_read(&least->inactconns);
6418 +                        goto nextstage;
6419 +                }
6420 +        }
6421 +        return NULL;
6422 +        
6423 +        /*
6424 +         *    Find the destination with the least load.
6425 +         */
6426 +  nextstage:
6427 +        for (e=e->next; e!=l; e=e->next) {
6428 +                dest = list_entry(e, struct ip_vs_dest, n_list);
6429 +                doh = atomic_read(&dest->activeconns) * 50
6430 +                        + atomic_read(&dest->inactconns);
6431 +                if (loh*dest->weight > doh*least->weight) {
6432 +                        least = dest;
6433 +                        loh = doh;
6434 +                }
6435 +        }
6436 +
6437 +        IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
6438 +                  "activeconns %d refcnt %d weight %d overhead %d\n",
6439 +                  NIPQUAD(least->addr), ntohs(least->port),
6440 +                  atomic_read(&least->activeconns),
6441 +                  atomic_read(&least->refcnt), least->weight, loh);
6442 +
6443 +        return least;
6444 +}
6445 +
6446 +
6447 +/*
6448 + *   If this destination server is overloaded and there is a less loaded
6449 + *   server, then return true.
6450 + */
6451 +static inline int
6452 +is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
6453 +{
6454 +        if (atomic_read(&dest->activeconns) > dest->weight) {
6455 +                register struct list_head *l, *e;
6456 +                struct ip_vs_dest *d;
6457 +        
6458 +                l = &svc->destinations;
6459 +                for (e=l->next; e!=l; e=e->next) {
6460 +                        d = list_entry(e, struct ip_vs_dest, n_list);
6461 +                        if (atomic_read(&d->activeconns)*2 < d->weight) {
6462 +                                return 1;
6463 +                        }
6464 +                }
6465 +        }
6466 +        return 0;
6467 +}
6468 +
6469 +
6470 +/*
6471 + *    Locality-Based (weighted) Least-Connection scheduling
6472 + */
6473 +static struct ip_vs_dest *
6474 +ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
6475 +{
6476 +        struct ip_vs_dest *dest;
6477 +        struct ip_vs_lblcr_table *tbl;
6478 +        struct ip_vs_lblcr_entry *en;
6479 +
6480 +        IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
6481 +
6482 +        tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
6483 +        en = ip_vs_lblcr_get(tbl, iph->daddr);
6484 +        if (en == NULL) {
6485 +                dest = __ip_vs_wlc_schedule(svc, iph);
6486 +                if (dest == NULL) {
6487 +                        IP_VS_DBG(1, "no destination available\n");
6488 +                        return NULL;
6489 +                }
6490 +                en = ip_vs_lblcr_new(iph->daddr);
6491 +                if (en == NULL) {
6492 +                        return NULL;
6493 +                }
6494 +                ip_vs_dest_set_insert(&en->set, dest);
6495 +                ip_vs_lblcr_hash(tbl, en);
6496 +        } else {
6497 +                dest = ip_vs_dest_set_min(&en->set);
6498 +                if (!dest || is_overloaded(dest, svc)) {
6499 +                        dest = __ip_vs_wlc_schedule(svc, iph);
6500 +                        if (dest == NULL) {
6501 +                                IP_VS_DBG(1, "no destination available\n");
6502 +                                return NULL;
6503 +                        }
6504 +                        ip_vs_dest_set_insert(&en->set, dest);
6505 +                }
6506 +                if (atomic_read(&en->set.size) > 1 &&
6507 +                    jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
6508 +                        struct ip_vs_dest *m;
6509 +                        m = ip_vs_dest_set_max(&en->set);
6510 +                        if (m) ip_vs_dest_set_erase(&en->set, m);
6511 +                }
6512 +        }
6513 +        en->lastuse = jiffies;
6514 +
6515 +        IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
6516 +                  "--> server %u.%u.%u.%u:%d\n",
6517 +                  NIPQUAD(en->addr),
6518 +                  NIPQUAD(dest->addr),
6519 +                  ntohs(dest->port));
6520 +                        
6521 +        return dest;
6522 +}
6523 +
6524 +
6525 +/*
6526 + *      IPVS LBLCR Scheduler structure
6527 + */
6528 +static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
6529 +{
6530 +        {0},                     /* n_list */
6531 +        "lblcr",                 /* name */
6532 +        ATOMIC_INIT(0),          /* refcnt */
6533 +        ip_vs_lblcr_init_svc,    /* service initializer */
6534 +        ip_vs_lblcr_done_svc,    /* service done */
6535 +        ip_vs_lblcr_update_svc,  /* service updater */
6536 +        ip_vs_lblcr_schedule,    /* select a server from the destination list */
6537 +};
6538 +
6539 +
6540 +__initfunc(int ip_vs_lblcr_init(void))
6541 +{
6542 +        IP_VS_INFO("Initializing LBLCR scheduling\n");
6543 +        INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
6544 +        lblcr_sysctl_table.sysctl_header =
6545 +                register_sysctl_table(lblcr_sysctl_table.root_dir, 0);
6546 +        return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
6547 +}
6548 +
6549 +
6550 +#ifdef MODULE
6551 +EXPORT_NO_SYMBOLS;
6552 +
6553 +int init_module(void)
6554 +{
6555 +        INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
6556 +
6557 +        /* module initialization by 'request_module' */
6558 +        if (register_ip_vs_scheduler(&ip_vs_lblcr_scheduler) != 0)
6559 +                return -EIO;
6560 +
6561 +        lblcr_sysctl_table.sysctl_header =
6562 +                register_sysctl_table(lblcr_sysctl_table.root_dir, 0);
6563 +
6564 +        IP_VS_INFO("LBLCR scheduling module loaded.\n");
6565 +
6566 +        return 0;
6567 +}
6568 +
6569 +void cleanup_module(void)
6570 +{
6571 +        /* module cleanup by 'release_module' */
6572 +        if (unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler) != 0) {
6573 +                IP_VS_INFO("cannot remove LBLCR scheduling module\n");
6574 +         } else {
6575 +                IP_VS_INFO("LBLCR scheduling module unloaded.\n");
6576 +        }
6577 +        unregister_sysctl_table(lblcr_sysctl_table.sysctl_header);
6578 +}
6579 +
6580 +#endif /* MODULE */
6581 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_vs_lc.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_lc.c
6582 --- linux-2.2.19/net/ipv4/ip_vs_lc.c    Thu Jan  1 08:00:00 1970
6583 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_lc.c   Fri Nov 24 10:02:53 2000
6584 @@ -0,0 +1,159 @@
6585 +/*
6586 + * IPVS:        Least-Connection Scheduling module
6587 + *
6588 + * Version:     $Id$
6589 + *
6590 + * Authors:     Wensong Zhang <wensong@iinchina.net>
6591 + *
6592 + *              This program is free software; you can redistribute it and/or
6593 + *              modify it under the terms of the GNU General Public License
6594 + *              as published by the Free Software Foundation; either version
6595 + *              2 of the License, or (at your option) any later version.
6596 + *
6597 + * Changes:
6598 + *     Wensong Zhang            :     added the ip_vs_lc_update_svc
6599 + *     Wensong Zhang            :     added any dest with weight=0 is quiesced 
6600 + *
6601 + */
6602 +
6603 +#include <linux/config.h>
6604 +#include <linux/module.h>
6605 +#ifdef CONFIG_KMOD
6606 +#include <linux/kmod.h>
6607 +#endif
6608 +#include <linux/types.h>
6609 +#include <linux/kernel.h>
6610 +#include <linux/errno.h>
6611 +#include <net/ip_masq.h>
6612 +#ifdef CONFIG_IP_MASQUERADE_MOD
6613 +#include <net/ip_masq_mod.h>
6614 +#endif
6615 +#include <linux/ip_fw.h>
6616 +#include <net/ip_vs.h>
6617 +
6618 +
6619 +static int ip_vs_lc_init_svc(struct ip_vs_service *svc)
6620 +{
6621 +        MOD_INC_USE_COUNT;
6622 +        return 0;
6623 +}
6624 +
6625 +
6626 +static int ip_vs_lc_done_svc(struct ip_vs_service *svc)
6627 +{
6628 +        MOD_DEC_USE_COUNT;
6629 +        return 0;
6630 +}
6631 +
6632 +
6633 +static int ip_vs_lc_update_svc(struct ip_vs_service *svc)
6634 +{
6635 +        return 0;
6636 +}
6637 +
6638 +
6639 +/*
6640 + *    Least Connection scheduling
6641 + */
6642 +static struct ip_vs_dest* ip_vs_lc_schedule(struct ip_vs_service *svc,
6643 +                                            struct iphdr *iph)
6644 +{
6645 +       struct list_head *l, *e;
6646 +        struct ip_vs_dest *dest, *least;
6647 +       int lac, dac;
6648 +
6649 +       IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
6650 +
6651 +       l = &svc->destinations;
6652 +        if (l == l->next)
6653 +                return NULL;
6654 +
6655 +       /*
6656 +         * Simply select the server with the least number of
6657 +         *        (activeconns<<5) + inactconns
6658 +         * Except whose weight is equal to zero.
6659 +         * If the weight is equal to zero, it means that the server is
6660 +         * quiesced, the existing connections to the server still get
6661 +         * served, but no new connection is assigned to the server.
6662 +         */
6663 +
6664 +        for (e=l->next; e!=l; e=e->next) {
6665 +                least = list_entry (e, struct ip_vs_dest, n_list);
6666 +                if (least->weight > 0) {
6667 +                        lac = (atomic_read(&least->activeconns) << 5)
6668 +                                + atomic_read(&least->inactconns);
6669 +                        goto nextstage;
6670 +                }
6671 +        }
6672 +        return NULL;
6673 +        
6674 +        /*
6675 +         *    Find the destination with the least load.
6676 +         */
6677 +  nextstage:
6678 +       for (e=e->next; e!=l; e=e->next) {
6679 +               dest = list_entry(e, struct ip_vs_dest, n_list);
6680 +                if (dest->weight == 0)
6681 +                        continue;
6682 +                dac = (atomic_read(&dest->activeconns) << 5)
6683 +                        + atomic_read(&dest->inactconns);
6684 +               if (dac < lac) {
6685 +                       least = dest;
6686 +                       lac = dac;
6687 +               }
6688 +       }
6689 +
6690 +        IP_VS_DBG(6, "LC: server %d.%d.%d.%d:%d activeconns %d inactconns %d\n",
6691 +                  NIPQUAD(least->addr), ntohs(least->port),
6692 +                  atomic_read(&least->activeconns),
6693 +                  atomic_read(&least->inactconns));
6694 +
6695 +        return least;
6696 +}
6697 +
6698 +
6699 +static struct ip_vs_scheduler ip_vs_lc_scheduler = {
6700 +        {0},                    /* n_list */
6701 +        "lc",                   /* name */
6702 +        ATOMIC_INIT(0),         /* refcnt */
6703 +        ip_vs_lc_init_svc,      /* service initializer */
6704 +        ip_vs_lc_done_svc,      /* service done */
6705 +        ip_vs_lc_update_svc,    /* service updater */
6706 +        ip_vs_lc_schedule,      /* select a server from the destination list */
6707 +};
6708 +
6709 +
6710 +__initfunc(int ip_vs_lc_init(void))
6711 +{
6712 +        IP_VS_INFO("Initializing LC scheduling\n");
6713 +        INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
6714 +        return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
6715 +}
6716 +
6717 +
6718 +#ifdef MODULE
6719 +EXPORT_NO_SYMBOLS;
6720 +
6721 +int init_module(void)
6722 +{
6723 +        INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
6724 +
6725 +        /* module initialization by 'request_module' */
6726 +        if(register_ip_vs_scheduler(&ip_vs_lc_scheduler) != 0)
6727 +                return -EIO;
6728 +
6729 +        IP_VS_INFO("LC scheduling module loaded.\n");
6730 +
6731 +        return 0;
6732 +}
6733 +
6734 +void cleanup_module(void)
6735 +{
6736 +        /* module cleanup by 'release_module' */
6737 +        if(unregister_ip_vs_scheduler(&ip_vs_lc_scheduler) != 0)
6738 +                IP_VS_INFO("cannot remove LC scheduling module\n");
6739 +        else
6740 +                IP_VS_INFO("LC scheduling module unloaded.\n");
6741 +}
6742 +
6743 +#endif /* MODULE */
6744 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_vs_rr.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_rr.c
6745 --- linux-2.2.19/net/ipv4/ip_vs_rr.c    Thu Jan  1 08:00:00 1970
6746 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_rr.c   Fri Nov 24 10:04:12 2000
6747 @@ -0,0 +1,145 @@
6748 +/*
6749 + * IPVS:        Round-Robin Scheduling module
6750 + *
6751 + * Version:     $Id$
6752 + *
6753 + * Authors:     Wensong Zhang <wensong@iinchina.net>
6754 + *              Peter Kese <peter.kese@ijs.si>
6755 + *
6756 + *              This program is free software; you can redistribute it and/or
6757 + *              modify it under the terms of the GNU General Public License
6758 + *              as published by the Free Software Foundation; either version
6759 + *              2 of the License, or (at your option) any later version.
6760 + *
6761 + * Fixes/Changes:
6762 + *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
6763 + *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
6764 + *     Wensong Zhang            :     changed some comestics things for debugging
6765 + *     Wensong Zhang            :     changed for the d-linked destination list
6766 + *     Wensong Zhang            :     added the ip_vs_rr_update_svc
6767 + *     Wensong Zhang            :     added any dest with weight=0 is quiesced 
6768 + *
6769 + */
6770 +
6771 +#include <linux/config.h>
6772 +#include <linux/module.h>
6773 +#ifdef CONFIG_KMOD
6774 +#include <linux/kmod.h>
6775 +#endif
6776 +#include <linux/types.h>
6777 +#include <linux/kernel.h>
6778 +#include <linux/errno.h>
6779 +#include <net/ip_masq.h>
6780 +#ifdef CONFIG_IP_MASQUERADE_MOD
6781 +#include <net/ip_masq_mod.h>
6782 +#endif
6783 +#include <linux/ip_fw.h>
6784 +#include <net/ip_vs.h>
6785 +
6786 +
6787 +static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
6788 +{
6789 +        svc->sched_data = &svc->destinations;
6790 +        MOD_INC_USE_COUNT;
6791 +        return 0;
6792 +}
6793 +
6794 +
6795 +static int ip_vs_rr_done_svc(struct ip_vs_service *svc)
6796 +{
6797 +        MOD_DEC_USE_COUNT;
6798 +        return 0;
6799 +}
6800 +
6801 +
6802 +static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
6803 +{
6804 +        svc->sched_data = &svc->destinations;
6805 +        return 0;
6806 +}
6807 +
6808 +
6809 +/*
6810 + * Round-Robin Scheduling
6811 + */
6812 +static struct ip_vs_dest* ip_vs_rr_schedule(struct ip_vs_service *svc,
6813 +                                            struct iphdr *iph)
6814 +{
6815 +       register struct list_head *p, *q;
6816 +        struct ip_vs_dest *dest;
6817 +
6818 +        IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
6819 +
6820 +        p = (struct list_head *)svc->sched_data;
6821 +        p = p->next;
6822 +        q = p;
6823 +        do {
6824 +                if (q == &svc->destinations) {
6825 +                        q = q->next;
6826 +                        continue;
6827 +                }
6828 +                dest = list_entry(q, struct ip_vs_dest, n_list);
6829 +                if (dest->weight > 0)
6830 +                        /* HIT */
6831 +                        goto out;
6832 +                q = q->next;
6833 +        } while (q != p);
6834 +        return NULL;
6835 +
6836 +  out:
6837 +        svc->sched_data = q;
6838 +        IP_VS_DBG(6, "RR: server %d.%d.%d.%d:%d "
6839 +                  "activeconns %d refcnt %d weight %d\n",
6840 +                  NIPQUAD(dest->addr), ntohs(dest->port),
6841 +                  atomic_read(&dest->activeconns),
6842 +                  atomic_read(&dest->refcnt), dest->weight);
6843 +
6844 +       return dest;
6845 +}
6846 +
6847 +
6848 +static struct ip_vs_scheduler ip_vs_rr_scheduler = {
6849 +        {0},                    /* n_list */
6850 +        "rr",                   /* name */
6851 +        ATOMIC_INIT(0),         /* refcnt */
6852 +        ip_vs_rr_init_svc,      /* service initializer */
6853 +        ip_vs_rr_done_svc,      /* service done */
6854 +        ip_vs_rr_update_svc,    /* service updater */
6855 +        ip_vs_rr_schedule,      /* select a server from the destination list */
6856 +};
6857 +
6858 +
6859 +__initfunc(int ip_vs_rr_init(void))
6860 +{
6861 +        IP_VS_INFO("Initializing RR scheduling\n");
6862 +        INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
6863 +        return register_ip_vs_scheduler(&ip_vs_rr_scheduler) ;
6864 +}
6865 +
6866 +
6867 +#ifdef MODULE
6868 +EXPORT_NO_SYMBOLS;
6869 +
6870 +int init_module(void)
6871 +{
6872 +        INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
6873 +
6874 +        /* module initialization by 'request_module' */
6875 +        if(register_ip_vs_scheduler(&ip_vs_rr_scheduler) != 0)
6876 +                return -EIO;
6877 +
6878 +        IP_VS_INFO("RR scheduling module loaded.\n");
6879 +
6880 +        return 0;
6881 +}
6882 +
6883 +void cleanup_module(void)
6884 +{
6885 +        /* module cleanup by 'release_module' */
6886 +        if(unregister_ip_vs_scheduler(&ip_vs_rr_scheduler) != 0)
6887 +                IP_VS_INFO("cannot remove RR scheduling module\n");
6888 +        else
6889 +                IP_VS_INFO("RR scheduling module unloaded.\n");
6890 +}
6891 +
6892 +#endif /* MODULE */
6893 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_vs_wlc.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_wlc.c
6894 --- linux-2.2.19/net/ipv4/ip_vs_wlc.c   Thu Jan  1 08:00:00 1970
6895 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_wlc.c  Fri Nov 24 09:59:32 2000
6896 @@ -0,0 +1,176 @@
6897 +/*
6898 + * IPVS:        Weighted Least-Connection Scheduling module
6899 + *
6900 + * Version:     $Id$
6901 + *
6902 + * Authors:     Wensong Zhang <wensong@iinchina.net>
6903 + *              Peter Kese <peter.kese@ijs.si>
6904 + *
6905 + *              This program is free software; you can redistribute it and/or
6906 + *              modify it under the terms of the GNU General Public License
6907 + *              as published by the Free Software Foundation; either version
6908 + *              2 of the License, or (at your option) any later version.
6909 + *
6910 + * Changes:
6911 + *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
6912 + *     Wensong Zhang            :     changed to use the inactconns in scheduling
6913 + *     Wensong Zhang            :     changed some comestics things for debugging
6914 + *     Wensong Zhang            :     changed for the d-linked destination list
6915 + *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
6916 + *     Wensong Zhang            :     added any dest with weight=0 is quiesced 
6917 + *
6918 + */
6919 +
6920 +#include <linux/config.h>
6921 +#include <linux/module.h>
6922 +#ifdef CONFIG_KMOD
6923 +#include <linux/kmod.h>
6924 +#endif
6925 +#include <linux/types.h>
6926 +#include <linux/kernel.h>
6927 +#include <linux/errno.h>
6928 +#include <net/ip_masq.h>
6929 +#ifdef CONFIG_IP_MASQUERADE_MOD
6930 +#include <net/ip_masq_mod.h>
6931 +#endif
6932 +#include <linux/ip_fw.h>
6933 +#include <net/ip_vs.h>
6934 +
6935 +
6936 +static int 
6937 +ip_vs_wlc_init_svc(struct ip_vs_service *svc)
6938 +{
6939 +        MOD_INC_USE_COUNT;
6940 +        return 0;
6941 +}
6942 +
6943 +
6944 +static int 
6945 +ip_vs_wlc_done_svc(struct ip_vs_service *svc)
6946 +{
6947 +        MOD_DEC_USE_COUNT;
6948 +        return 0;
6949 +}
6950 +
6951 +
6952 +static int 
6953 +ip_vs_wlc_update_svc(struct ip_vs_service *svc)
6954 +{
6955 +        return 0;
6956 +}
6957 +
6958 +
6959 +/*
6960 + *    Weighted Least Connection scheduling
6961 + */
6962 +static struct ip_vs_dest *
6963 +ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
6964 +{
6965 +        register struct list_head *l, *e;
6966 +        struct ip_vs_dest *dest, *least;
6967 +        int loh, doh;
6968 +
6969 +        IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
6970 +
6971 +        l = &svc->destinations;
6972 +        if (l == l->next)
6973 +                return NULL;
6974 +
6975 +        /*
6976 +         * We think the overhead of processing active connections is fifty
6977 +         * times than that of inactive conncetions in average. (This fifty
6978 +         * times might be not accurate, we will change it later.) We use
6979 +         * the following formula to estimate the overhead:
6980 +         *                dest->activeconns*50 + dest->inactconns
6981 +         * and the load:
6982 +         *                (dest overhead) / dest->weight
6983 +         *
6984 +         * Remember -- no floats in kernel mode!!!
6985 +         * The comparison of h1*w2 > h2*w1 is equivalent to that of
6986 +         *                h1/w1 > h2/w2
6987 +         * if every weight is larger than zero.
6988 +         *
6989 +         * The server with weight=0 is quiesced and will not receive any
6990 +         * new connection.
6991 +         */
6992 +
6993 +        for (e=l->next; e!=l; e=e->next) {
6994 +                least = list_entry(e, struct ip_vs_dest, n_list);
6995 +                if (least->weight > 0) {
6996 +                        loh = atomic_read(&least->activeconns) * 50
6997 +                                + atomic_read(&least->inactconns);
6998 +                        goto nextstage;
6999 +                }
7000 +        }
7001 +        return NULL;
7002 +        
7003 +        /*
7004 +         *    Find the destination with the least load.
7005 +         */
7006 +  nextstage:
7007 +        for (e=e->next; e!=l; e=e->next) {
7008 +                dest = list_entry(e, struct ip_vs_dest, n_list);
7009 +                doh = atomic_read(&dest->activeconns) * 50
7010 +                        + atomic_read(&dest->inactconns);
7011 +                if (loh * dest->weight > doh * least->weight) {
7012 +                        least = dest;
7013 +                        loh = doh;
7014 +                }
7015 +        }
7016 +
7017 +        IP_VS_DBG(6, "WLC: server %d.%d.%d.%d:%d "
7018 +                  "activeconns %d refcnt %d weight %d overhead %d\n",
7019 +                  NIPQUAD(least->addr), ntohs(least->port),
7020 +                  atomic_read(&least->activeconns),
7021 +                  atomic_read(&least->refcnt), least->weight, loh);
7022 +
7023 +        return least;
7024 +}
7025 +
7026 +
7027 +static struct ip_vs_scheduler ip_vs_wlc_scheduler =
7028 +{
7029 +        {0},                    /* n_list */
7030 +        "wlc",                  /* name */
7031 +        ATOMIC_INIT (0),        /* refcnt */
7032 +        ip_vs_wlc_init_svc,     /* service initializer */
7033 +        ip_vs_wlc_done_svc,     /* service done */
7034 +        ip_vs_wlc_update_svc,   /* service updater */
7035 +        ip_vs_wlc_schedule,     /* select a server from the destination list */
7036 +};
7037 +
7038 +
7039 +__initfunc(int ip_vs_wlc_init (void))
7040 +{
7041 +        IP_VS_INFO("Initializing WLC scheduling\n");
7042 +        INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
7043 +        return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
7044 +}
7045 +
7046 +
7047 +#ifdef MODULE
7048 +EXPORT_NO_SYMBOLS;
7049 +
7050 +int init_module(void)
7051 +{
7052 +        INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
7053 +
7054 +        /* module initialization by 'request_module' */
7055 +        if (register_ip_vs_scheduler(&ip_vs_wlc_scheduler) != 0)
7056 +                return -EIO;
7057 +
7058 +        IP_VS_INFO("WLC scheduling module loaded.\n");
7059 +
7060 +        return 0;
7061 +}
7062 +
7063 +void cleanup_module(void)
7064 +{
7065 +        /* module cleanup by 'release_module' */
7066 +        if (unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler) != 0)
7067 +                IP_VS_INFO("cannot remove WLC scheduling module\n");
7068 +        else
7069 +                IP_VS_INFO("WLC scheduling module unloaded.\n");
7070 +}
7071 +
7072 +#endif /* MODULE */
7073 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/ip_vs_wrr.c linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_wrr.c
7074 --- linux-2.2.19/net/ipv4/ip_vs_wrr.c   Thu Jan  1 08:00:00 1970
7075 +++ linux-2.2.19-vs-1.0.7/net/ipv4/ip_vs_wrr.c  Fri Nov 24 09:57:23 2000
7076 @@ -0,0 +1,209 @@
7077 +/*
7078 + * IPVS:        Weighted Round-Robin Scheduling module
7079 + *
7080 + * Version:     $Id$
7081 + *
7082 + * Authors:     Wensong Zhang <wensong@iinchina.net>
7083 + *
7084 + *              This program is free software; you can redistribute it and/or
7085 + *              modify it under the terms of the GNU General Public License
7086 + *              as published by the Free Software Foundation; either version
7087 + *              2 of the License, or (at your option) any later version.
7088 + *
7089 + * Changes:
7090 + *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
7091 + *     Wensong Zhang            :     changed some comestics things for debugging
7092 + *     Wensong Zhang            :     changed for the d-linked destination list
7093 + *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
7094 + *     Julian Anastasov         :     return -ENOMEM instead of ENOMEM in the
7095 + *                                    ip_vs_wrr_init_svc
7096 + *     Julian Anastasov         :     fixed the bug of returning destination
7097 + *                                    with weight 0 when all weights are zero
7098 + *
7099 + */
7100 +
7101 +#include <linux/config.h>
7102 +#include <linux/module.h>
7103 +#ifdef CONFIG_KMOD
7104 +#include <linux/kmod.h>
7105 +#endif
7106 +#include <linux/types.h>
7107 +#include <linux/kernel.h>
7108 +#include <linux/errno.h>
7109 +#include <net/ip_masq.h>
7110 +#ifdef CONFIG_IP_MASQUERADE_MOD
7111 +#include <net/ip_masq_mod.h>
7112 +#endif
7113 +#include <linux/ip_fw.h>
7114 +#include <net/ip_vs.h>
7115 +
7116 +/*
7117 + * current destination pointer for weighted round-robin scheduling
7118 + */
7119 +struct ip_vs_wrr_mark {
7120 +        struct list_head *cl;        /* current list head */
7121 +        int cw;                      /* current weight */
7122 +};
7123 +
7124 +
7125 +static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
7126 +{
7127 +       /*
7128 +         *    Allocate the mark variable for WRR scheduling
7129 +         */
7130 +        svc->sched_data = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
7131 +
7132 +        if (svc->sched_data == NULL) {
7133 +                IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
7134 +               return -ENOMEM;
7135 +        }
7136 +        memset(svc->sched_data, 0, sizeof(struct ip_vs_wrr_mark));
7137 +
7138 +        ((struct ip_vs_wrr_mark*)svc->sched_data)->cl = &svc->destinations;
7139 +
7140 +        MOD_INC_USE_COUNT;
7141 +        return 0;
7142 +}
7143 +
7144 +
7145 +static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
7146 +{
7147 +        /*
7148 +         *    Release the mark variable
7149 +         */
7150 +        kfree_s(svc->sched_data, sizeof(struct ip_vs_wrr_mark));
7151 +        
7152 +        MOD_DEC_USE_COUNT;
7153 +        return 0;
7154 +}
7155 +
7156 +
7157 +static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
7158 +{
7159 +        ((struct ip_vs_wrr_mark*)svc->sched_data)->cl = &svc->destinations;
7160 +        return 0;
7161 +}
7162 +
7163 +
7164 +/*
7165 + *    Get the maximum weight of the service destinations.
7166 + */
7167 +int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
7168 +{
7169 +        register struct list_head *l, *e;
7170 +        struct ip_vs_dest *dest;
7171 +        int weight = 0;
7172 +
7173 +        l = &svc->destinations;
7174 +        for (e=l->next; e!=l; e=e->next) {
7175 +                dest = list_entry(e, struct ip_vs_dest, n_list);
7176 +                if (dest->weight > weight)
7177 +                        weight = dest->weight;
7178 +        }
7179 +
7180 +        return weight;
7181 +}
7182 +
7183 +        
7184 +/*
7185 + *    Weighted Round-Robin Scheduling
7186 + */
7187 +static struct ip_vs_dest* ip_vs_wrr_schedule(struct ip_vs_service *svc,
7188 +                                             struct iphdr *iph)
7189 +{
7190 +        struct ip_vs_dest *dest;
7191 +        struct ip_vs_wrr_mark *mark = svc->sched_data;
7192 +
7193 +       IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
7194 +
7195 +        /*
7196 +         * This loop will always terminate, because 0<mark->cw<max_weight,
7197 +         * and at least one server has its weight equal to max_weight.
7198 +         */
7199 +        while (1) {
7200 +                if (mark->cl == &svc->destinations) {
7201 +                        /* it is at the head of the destination list */
7202 +                        
7203 +                        if (mark->cl == mark->cl->next)
7204 +                                /* no dest entry */
7205 +                                return NULL;
7206 +
7207 +                        mark->cl = svc->destinations.next;
7208 +                        mark->cw--;
7209 +                        if (mark->cw <= 0) {
7210 +                                mark->cw = ip_vs_wrr_max_weight(svc);
7211 +                                /*
7212 +                                 * Still zero, which means no availabe servers.
7213 +                                 */
7214 +                                if (mark->cw == 0) {
7215 +                                        mark->cl = &svc->destinations;
7216 +                                        IP_VS_INFO("ip_vs_wrr_schedule(): "
7217 +                                                   "no available servers\n");
7218 +                                        return NULL;
7219 +                                }
7220 +                        }
7221 +                }
7222 +                else mark->cl = mark->cl->next;
7223 +
7224 +                if (mark->cl != &svc->destinations) {
7225 +                        /* not at the head of the list */
7226 +                        dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
7227 +                        if (dest->weight >= mark->cw)
7228 +                                break;
7229 +                }
7230 +        }
7231 +
7232 +        IP_VS_DBG(6, "WRR: server %d.%d.%d.%d:%d "
7233 +                  "activeconns %d refcnt %d weight %d\n",
7234 +                  NIPQUAD(dest->addr), ntohs(dest->port),
7235 +                  atomic_read(&dest->activeconns),
7236 +                  atomic_read(&dest->refcnt), dest->weight);
7237 +
7238 +        return  dest;
7239 +}
7240 +
7241 +
7242 +static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
7243 +       {0},                    /* n_list */
7244 +       "wrr",                  /* name */
7245 +       ATOMIC_INIT(0),         /* refcnt */
7246 +       ip_vs_wrr_init_svc,     /* service initializer */
7247 +       ip_vs_wrr_done_svc,     /* service done */
7248 +       ip_vs_wrr_update_svc,   /* service updater */
7249 +       ip_vs_wrr_schedule,     /* select a server from the destination list */
7250 +};
7251 +
7252 +
7253 +__initfunc(int ip_vs_wrr_init(void))
7254 +{
7255 +       IP_VS_INFO("Initializing WRR scheduling\n");
7256 +        INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
7257 +       return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
7258 +}
7259 +
7260 +#ifdef MODULE
7261 +EXPORT_NO_SYMBOLS;
7262 +
7263 +int init_module(void)
7264 +{
7265 +        INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
7266 +
7267 +       /* module initialization by 'request_module' */
7268 +       if(register_ip_vs_scheduler(&ip_vs_wrr_scheduler) != 0)
7269 +               return -EIO;
7270 +
7271 +       IP_VS_INFO("WRR scheduling module loaded.\n");
7272 +       
7273 +        return 0;
7274 +}
7275 +
7276 +void cleanup_module(void)
7277 +{
7278 +       /* module cleanup by 'release_module' */
7279 +       if(unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler) != 0)
7280 +               IP_VS_INFO("cannot remove WRR scheduling module\n");
7281 +       else
7282 +               IP_VS_INFO("WRR scheduling module unloaded.\n");
7283 +}
7284 +
7285 +#endif /* MODULE */
7286 diff -urN --exclude-from=/usr/src/exclude linux-2.2.19/net/ipv4/sysctl_net_ipv4.c linux-2.2.19-vs-1.0.7/net/ipv4/sysctl_net_ipv4.c
7287 --- linux-2.2.19/net/ipv4/sysctl_net_ipv4.c     Tue Mar 27 09:33:49 2001
7288 +++ linux-2.2.19-vs-1.0.7/net/ipv4/sysctl_net_ipv4.c    Tue Mar 27 09:32:21 2001
7289 @@ -69,6 +69,9 @@
7290  struct ipv4_config ipv4_config;
7291  
7292  extern ctl_table ipv4_route_table[];
7293 +#ifdef CONFIG_IP_MASQUERADE_VS
7294 +extern ctl_table ipv4_vs_table[];
7295 +#endif
7296  
7297  #ifdef CONFIG_SYSCTL
7298  
7299 @@ -198,7 +201,10 @@
7300         {NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships",
7301          &sysctl_igmp_max_memberships, sizeof(int), 0644, NULL, &proc_dointvec},
7302  #endif
7303 +#ifdef CONFIG_IP_MASQUERADE_VS
7304 +       {NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table},
7305 +#endif
7306         {0}
7307  };
7308 -
7309 +                                                     
7310  #endif /* CONFIG_SYSCTL */
This page took 0.624478 seconds and 3 git commands to generate.