]>
Commit | Line | Data |
---|---|---|
0d9e0091 | 1 | ##dhcp-probe-02-keep-pcap.patch - add option to keep pcap open all the time |
c6298b67 JB |
2 | --- src/dhcp_probe.c.orig 2021-01-18 20:17:29.000000000 +0100 |
3 | +++ src/dhcp_probe.c 2024-04-20 21:59:22.184598775 +0200 | |
4 | @@ -49,6 +49,7 @@ char *capture_file = NULL; | |
0d9e0091 JR |
5 | */ |
6 | int snaplen = CAPTURE_BUFSIZE; | |
7 | int socket_receive_timeout_feature = 0; | |
8 | +int keep_pcap = 0; | |
9 | ||
10 | char *prog = NULL; | |
11 | char *logfile_name = NULL; | |
c6298b67 | 12 | @@ -75,6 +76,113 @@ struct ether_addr my_eaddr; |
0d9e0091 JR |
13 | int use_8021q = 0; |
14 | int vlan_id = 0; | |
15 | ||
16 | +int need_promiscuous(void) | |
17 | +{ | |
18 | + /* If we're going to claim a chaddr different than my_eaddr, some of the responses | |
19 | + may come back to chaddr (as opposed to my_eaddr or broadcast), so we'll need to | |
20 | + listen promiscuously. | |
21 | + If we're going to claim an ether_src different than my_eaddr, in theory that should | |
22 | + make no difference; bootp/dhcp servers should rely on chaddr, not ether_src. Still, | |
23 | + it's possible there's a server out there that does it wrong, and might therefore mistakenly | |
24 | + send responses to ether_src. So lets also listen promiscuously if ether_src != my_eaddr. | |
25 | + */ | |
26 | + int promiscuous = 0; | |
27 | + if (bcmp(GetChaddr(), &my_eaddr, sizeof(struct ether_addr)) || | |
28 | + bcmp(GetEther_src(), &my_eaddr, sizeof(struct ether_addr))) | |
29 | + promiscuous = 1; | |
30 | + return promiscuous; | |
31 | +} | |
32 | + | |
c6298b67 | 33 | +int init_pcap(int promiscuous) |
0d9e0091 JR |
34 | +{ |
35 | + /* open packet capture descriptor */ | |
36 | + /* XXX On Solaris 7, sometimes pcap_open_live() fails with a message like: | |
37 | + pcap_open_live qfe0: recv_ack: info unexpected primitive ack 0x8 | |
38 | + It's not clear what causes this, or what the 0x8 code indicates. | |
39 | + The error appears to be transient; retrying sometimes will work, so I've wrapped the call in a retry loop. | |
40 | + I've also added a delay after each failure; perhaps the failure has something to do with the fact that | |
41 | + we call pcap_open_live() so soon after pcap_close() (for the second and succeeding packets in each cycle); | |
42 | + adding a delay might help in that case. | |
43 | + */ | |
44 | + struct bpf_program bpf_code; | |
45 | + char pcap_errbuf[PCAP_ERRBUF_SIZE]; | |
46 | + int linktype; | |
47 | + int pcap_open_retries = PCAP_OPEN_LIVE_RETRY_MAX; | |
48 | + | |
49 | + do { | |
50 | + pcap_errbuf[0] = '\0'; /* so we can tell if a warning was produced on success */ | |
51 | + if ((pd = pcap_open_live(ifname, snaplen, promiscuous, GetResponse_wait_time(), pcap_errbuf)) != NULL) { | |
52 | + break; /* success */ | |
53 | + } else { /* failure */ | |
54 | + if (pcap_open_retries == 0) { | |
55 | + report(LOG_DEBUG, "pcap_open_live(%s): %s; retry count (%d) exceeded, giving up", ifname, pcap_errbuf, PCAP_OPEN_LIVE_RETRY_MAX); | |
56 | + my_exit(1, 1, 1); | |
57 | + } else { | |
58 | + if (debug > 1) | |
59 | + report(LOG_DEBUG, "pcap_open_live(%s): %s; will retry", ifname, pcap_errbuf); | |
60 | + sleep(PCAP_OPEN_LIVE_RETRY_DELAY); /* before next retry */ | |
61 | + } | |
62 | + } /* failure */ | |
63 | + } while (pcap_open_retries--); | |
64 | + | |
0d9e0091 JR |
65 | + if (pcap_errbuf[0] != '\0') |
66 | + /* even on success, a warning may be produced */ | |
67 | + report(LOG_WARNING, "pcap_open_live(%s): succeeded but with warning: %s", ifname, pcap_errbuf); | |
68 | + | |
69 | + /* make sure this interface is ethernet */ | |
70 | + linktype = pcap_datalink(pd); | |
71 | + if (linktype != DLT_EN10MB) { | |
c6298b67 JB |
72 | + /* In libpcap 0.9.8 on Solaris 9 SPARC, this only happened if you pointed us to an interface |
73 | + that truly had the wrong datalink type. | |
74 | + It was not a transient error, so we exited. | |
75 | + However, by libpcap version 1.1.1 on Solaris 9 SPARC, this happens from time to time; | |
76 | + pcap_datalink() returns 0, indicating DLT_NULL. | |
77 | + Perhaps that's a bug introduced after libpcap 0.9.8. | |
78 | + As this seems to be a transient error, we no longer exit, but instead just log the error, | |
79 | + and skip the rest of the current cycle. | |
80 | + A side effect of this change is that when you DO mistakenly point dhcp_probe to | |
81 | + a non-Ethernet interface (the error is not transient), we keep trying instead | |
82 | + of exiting. If a future libpcap change returns to the old behavior (where the | |
83 | + interface type remains consistent), we should go back to the old behavior of exiting. | |
84 | + */ | |
85 | + /* | |
0d9e0091 JR |
86 | + report(LOG_ERR, "interface %s link layer type %d not ethernet", ifname, linktype); |
87 | + my_exit(1, 1, 1); | |
c6298b67 JB |
88 | + */ |
89 | + report(LOG_ERR, "interface %s link layer type %d not ethernet, skipping rest of this probe cycle", ifname, linktype); | |
90 | + return -1; /* break for (l) ... */ | |
0d9e0091 JR |
91 | + } |
92 | + /* compile bpf filter to select just udp/ip traffic to udp port bootpc */ | |
c6298b67 JB |
93 | + /* Although one would expect frames on an untagged logical network interface to arrive without any 802.1Q tag, |
94 | + some Ethernet drivers will deliver some frames with an 802.1Q tag in which vlan==0. | |
95 | + This may be because the frame arrived with an 802.1Q tag in which the 802.1p priority was non-zero. | |
96 | + To preserve that priority field, they retain the 802.1Q tag and set the vlan field to 0. | |
97 | + As per spec, a frame received with 802.1Q tag in which vlan == 0 should be treated as an untagged frame. | |
98 | + So our bpf filter needs to include both untagged and tagged frames. | |
99 | + */ | |
100 | + if (pcap_compile(pd, &bpf_code, "udp dst port bootpc or (vlan and udp dst port bootpc)", 1, PCAP_NETMASK_UNKNOWN) < 0) { | |
0d9e0091 JR |
101 | + report(LOG_ERR, "pcap_compile: %s", pcap_geterr(pd)); |
102 | + my_exit(1, 1, 1); | |
103 | + } | |
104 | + /* install compiled filter */ | |
105 | + if (pcap_setfilter(pd, &bpf_code) < 0) { | |
106 | + report(LOG_ERR, "pcap_setfilter: %s", pcap_geterr(pd)); | |
107 | + my_exit(1, 1, 1); | |
108 | + } | |
109 | + if (socket_receive_timeout_feature) | |
110 | + set_pcap_timeout(pd); | |
111 | + | |
112 | + return 0; | |
113 | +} | |
114 | + | |
115 | +void | |
116 | +reset_pcap() | |
117 | +{ | |
118 | + /* close packet capture descriptor */ | |
c6298b67 JB |
119 | + pcap_close(pd); |
120 | + pd = NULL; | |
0d9e0091 JR |
121 | +} |
122 | + | |
0d9e0091 | 123 | int |
c6298b67 JB |
124 | main(int argc, char **argv) |
125 | { | |
126 | @@ -93,8 +201,6 @@ main(int argc, char **argv) | |
127 | int receive_and_process_responses_rc; | |
0d9e0091 JR |
128 | |
129 | /* for libpcap */ | |
0d9e0091 JR |
130 | - struct bpf_program bpf_code; |
131 | - int linktype; | |
132 | char pcap_errbuf[PCAP_ERRBUF_SIZE], pcap_errbuf2[PCAP_ERRBUF_SIZE]; | |
133 | ||
134 | /* get progname = last component of argv[0] */ | |
c6298b67 | 135 | @@ -104,7 +210,7 @@ main(int argc, char **argv) |
0d9e0091 JR |
136 | else |
137 | prog = argv[0]; | |
138 | ||
139 | - while ((c = getopt(argc, argv, "c:d:fhl:o:p:Q:s:Tvw:")) != EOF) { | |
140 | + while ((c = getopt(argc, argv, "c:d:fhkl:o:p:Q:s:Tvw:")) != EOF) { | |
141 | switch (c) { | |
142 | case 'c': | |
143 | if (optarg[0] != '/') { | |
c6298b67 | 144 | @@ -129,6 +235,9 @@ main(int argc, char **argv) |
0d9e0091 JR |
145 | case 'h': |
146 | usage(); | |
147 | my_exit(0, 0, 0); | |
148 | + case 'k': | |
149 | + keep_pcap = 1; | |
150 | + break; | |
151 | case 'l': | |
152 | if (optarg[0] != '/') { | |
153 | fprintf(stderr, "%s: invalid log file '%s', must be an absolute pathname\n", prog, optarg); | |
c6298b67 | 154 | @@ -435,8 +544,10 @@ main(int argc, char **argv) |
0d9e0091 JR |
155 | } |
156 | } | |
157 | ||
158 | + if (keep_pcap) | |
c6298b67 | 159 | + init_pcap(need_promiscuous()); |
0d9e0091 JR |
160 | + |
161 | while (1) { /* MAIN EVENT LOOP */ | |
162 | - int promiscuous; | |
163 | libnet_t *l; /* to iterate through libnet context queue */ | |
164 | /* struct pcap_stat ps; */ /* to hold pcap stats */ | |
165 | ||
c6298b67 | 166 | @@ -477,25 +588,9 @@ main(int argc, char **argv) |
0d9e0091 JR |
167 | interface in promiscuous mode as little as possible, since that can affect the host's performance. |
168 | */ | |
169 | ||
170 | - /* If we're going to claim a chaddr different than my_eaddr, some of the responses | |
171 | - may come back to chaddr (as opposed to my_eaddr or broadcast), so we'll need to | |
172 | - listen promiscuously. | |
173 | - If we're going to claim an ether_src different than my_eaddr, in theory that should | |
174 | - make no difference; bootp/dhcp servers should rely on chaddr, not ether_src. Still, | |
175 | - it's possible there's a server out there that does it wrong, and might therefore mistakenly | |
176 | - send responses to ether_src. So lets also listen promiscuously if ether_src != my_eaddr. | |
177 | - */ | |
178 | - if (bcmp(GetChaddr(), &my_eaddr, sizeof(struct ether_addr)) || | |
179 | - bcmp(GetEther_src(), &my_eaddr, sizeof(struct ether_addr))) | |
180 | - promiscuous = 1; | |
181 | - else | |
182 | - promiscuous = 0; | |
183 | - | |
184 | - | |
185 | for (l = libnet_cq_head(); libnet_cq_last(); l = libnet_cq_next()) { /* write one flavor packet and listen for answers */ | |
186 | ||
187 | int packets_recv; | |
188 | - int pcap_open_retries; | |
0d9e0091 JR |
189 | |
190 | /* We set up for packet capture BEFORE writing our packet, to minimize the delay | |
191 | between our writing and when we are able to start capturing. (I cannot tell from | |
c6298b67 | 192 | @@ -505,78 +600,10 @@ main(int argc, char **argv) |
0d9e0091 JR |
193 | we wanted! |
194 | */ | |
195 | ||
196 | - /* open packet capture descriptor */ | |
197 | - /* XXX On Solaris 7, sometimes pcap_open_live() fails with a message like: | |
198 | - pcap_open_live qfe0: recv_ack: info unexpected primitive ack 0x8 | |
199 | - It's not clear what causes this, or what the 0x8 code indicates. | |
200 | - The error appears to be transient; retrying sometimes will work, so I've wrapped the call in a retry loop. | |
201 | - I've also added a delay after each failure; perhaps the failure has something to do with the fact that | |
202 | - we call pcap_open_live() so soon after pcap_close() (for the second and succeeding packets in each cycle); | |
203 | - adding a delay might help in that case. | |
204 | - */ | |
205 | - pcap_open_retries = PCAP_OPEN_LIVE_RETRY_MAX; | |
206 | - while (pcap_open_retries--) { | |
207 | - pcap_errbuf[0] = '\0'; /* so we can tell if a warning was produced on success */ | |
208 | - if ((pd = pcap_open_live(ifname, snaplen, promiscuous, GetResponse_wait_time(), pcap_errbuf)) != NULL) { | |
209 | - break; /* success */ | |
210 | - } else { /* failure */ | |
211 | - if (pcap_open_retries == 0) { | |
212 | - report(LOG_DEBUG, "pcap_open_live(%s): %s; retry count (%d) exceeded, giving up", ifname, pcap_errbuf, PCAP_OPEN_LIVE_RETRY_MAX); | |
213 | - my_exit(1, 1, 1); | |
214 | - } else { | |
215 | - if (debug > 1) | |
216 | - report(LOG_DEBUG, "pcap_open_live(%s): %s; will retry", ifname, pcap_errbuf); | |
217 | - sleep(PCAP_OPEN_LIVE_RETRY_DELAY); /* before next retry */ | |
218 | - } | |
219 | - } /* failure */ | |
c6298b67 JB |
220 | + if (! keep_pcap) { |
221 | + if (init_pcap(need_promiscuous())) | |
222 | + break; /* for l... */ | |
223 | } | |
0d9e0091 JR |
224 | - if (pcap_errbuf[0] != '\0') |
225 | - /* even on success, a warning may be produced */ | |
226 | - report(LOG_WARNING, "pcap_open_live(%s): succeeded but with warning: %s", ifname, pcap_errbuf); | |
227 | - | |
228 | - /* make sure this interface is ethernet */ | |
229 | - linktype = pcap_datalink(pd); | |
230 | - if (linktype != DLT_EN10MB) { | |
c6298b67 JB |
231 | - /* In libpcap 0.9.8 on Solaris 9 SPARC, this only happened if you pointed us to an interface |
232 | - that truly had the wrong datalink type. | |
233 | - It was not a transient error, so we exited. | |
234 | - However, by libpcap version 1.1.1 on Solaris 9 SPARC, this happens from time to time; | |
235 | - pcap_datalink() returns 0, indicating DLT_NULL. | |
236 | - Perhaps that's a bug introduced after libpcap 0.9.8. | |
237 | - As this seems to be a transient error, we no longer exit, but instead just log the error, | |
238 | - and skip the rest of the current cycle. | |
239 | - A side effect of this change is that when you DO mistakenly point dhcp_probe to | |
240 | - a non-Ethernet interface (the error is not transient), we keep trying instead | |
241 | - of exiting. If a future libpcap change returns to the old behavior (where the | |
242 | - interface type remains consistent), we should go back to the old behavior of exiting. | |
243 | - */ | |
244 | - /* | |
0d9e0091 JR |
245 | - report(LOG_ERR, "interface %s link layer type %d not ethernet", ifname, linktype); |
246 | - my_exit(1, 1, 1); | |
c6298b67 JB |
247 | - */ |
248 | - report(LOG_ERR, "interface %s link layer type %d not ethernet, skipping rest of this probe cycle", ifname, linktype); | |
249 | - break; /* for (l) ... */ | |
0d9e0091 | 250 | - } |
c6298b67 | 251 | - |
0d9e0091 | 252 | - /* compile bpf filter to select just udp/ip traffic to udp port bootpc */ |
c6298b67 JB |
253 | - /* Although one would expect frames on an untagged logical network interface to arrive without any 802.1Q tag, |
254 | - some Ethernet drivers will deliver some frames with an 802.1Q tag in which vlan==0. | |
255 | - This may be because the frame arrived with an 802.1Q tag in which the 802.1p priority was non-zero. | |
256 | - To preserve that priority field, they retain the 802.1Q tag and set the vlan field to 0. | |
257 | - As per spec, a frame received with 802.1Q tag in which vlan == 0 should be treated as an untagged frame. | |
258 | - So our bpf filter needs to include both untagged and tagged frames. | |
259 | - */ | |
260 | - if (pcap_compile(pd, &bpf_code, "udp dst port bootpc or (vlan and udp dst port bootpc)", 1, PCAP_NETMASK_UNKNOWN) < 0) { | |
0d9e0091 JR |
261 | - report(LOG_ERR, "pcap_compile: %s", pcap_geterr(pd)); |
262 | - my_exit(1, 1, 1); | |
263 | - } | |
264 | - /* install compiled filter */ | |
265 | - if (pcap_setfilter(pd, &bpf_code) < 0) { | |
266 | - report(LOG_ERR, "pcap_setfilter: %s", pcap_geterr(pd)); | |
267 | - my_exit(1, 1, 1); | |
268 | - } | |
269 | - if (socket_receive_timeout_feature) | |
270 | - set_pcap_timeout(pd); | |
c6298b67 | 271 | |
0d9e0091 JR |
272 | /* write one packet */ |
273 | ||
c6298b67 | 274 | @@ -630,8 +657,8 @@ main(int argc, char **argv) |
0d9e0091 JR |
275 | */ |
276 | ||
277 | /* close packet capture descriptor */ | |
278 | - pcap_close(pd); | |
c6298b67 | 279 | - pd = NULL; |
0d9e0091 JR |
280 | + if (! keep_pcap) |
281 | + reset_pcap(); | |
282 | ||
c6298b67 | 283 | /* check for 'quit' request after sending each packet, since waiting until end of probe cycle |
0d9e0091 | 284 | would impose a substantial delay. */ |
c6298b67 | 285 | @@ -679,7 +706,7 @@ main(int argc, char **argv) |
0d9e0091 JR |
286 | reconfigure(write_packet_len); |
287 | reread_config_file = 0; | |
288 | } | |
289 | - | |
290 | + | |
291 | /* We allow must signals that come in during our sleep() to interrupt us. E.g. we want to cut short | |
292 | our sleep when we're signalled to exit. But we must block SIGCHLD during our sleep. That's because | |
293 | if we forked an alert_program or alert_program2 child above, its termination will likely happen while we're sleeping; | |
c6298b67 | 294 | @@ -694,7 +721,21 @@ main(int argc, char **argv) |
0d9e0091 JR |
295 | sigaddset(&new_sigset, SIGCHLD); |
296 | sigprocmask(SIG_BLOCK, &new_sigset, &old_sigset); /* block SIGCHLD */ | |
297 | ||
298 | - sleep(time_to_sleep); | |
299 | + if (keep_pcap) { | |
300 | + /* If we're going to keep the packet capture running, | |
301 | + we might as well read off all the packets received while | |
302 | + waiting. We shouldn't get any since we don't send any requests | |
303 | + but this should prevent any buffers from accidentally filling | |
304 | + with unhandled packets. */ | |
c6298b67 JB |
305 | + int packets_recv = 0, receive_and_process_responses_rc; |
306 | + if ((receive_and_process_responses_rc = receive_and_process_responses(time_to_sleep)) >= 0) | |
307 | + packets_recv = receive_and_process_responses_rc; | |
0d9e0091 JR |
308 | + |
309 | + if (packets_recv && debug > 10) | |
310 | + report(LOG_DEBUG, "captured %d packets while sleeping", packets_recv); | |
311 | + } else { | |
312 | + sleep(time_to_sleep); | |
313 | + } | |
314 | ||
315 | sigprocmask(SIG_SETMASK, &old_sigset, NULL); /* unblock SIGCHLD */ | |
316 | ||
c6298b67 | 317 | @@ -702,8 +743,10 @@ main(int argc, char **argv) |
0d9e0091 JR |
318 | |
319 | } /* MAIN EVENT LOOP */ | |
320 | ||
321 | - | |
322 | /* we only reach here after receiving a signal requesting we quit */ | |
323 | + | |
324 | + if (keep_pcap) | |
325 | + reset_pcap(); | |
326 | ||
327 | if (pd_template) /* only used if a capture file requested */ | |
328 | pcap_close(pd_template); | |
c6298b67 | 329 | @@ -1284,6 +1327,7 @@ usage(void) |
0d9e0091 JR |
330 | fprintf(stderr, " -d debuglevel enable debugging at specified level\n"); |
331 | fprintf(stderr, " -f don't fork (only use for debugging)\n"); | |
332 | fprintf(stderr, " -h display this help message then exit\n"); | |
333 | + fprintf(stderr, " -k keep pcap open constantly (don't recreate on each cycle)\n"); | |
334 | fprintf(stderr, " -l log_file log to file instead of syslog\n"); | |
335 | fprintf(stderr, " -o capture_file enable capturing of unexpected answers\n"); | |
336 | fprintf(stderr, " -p pid_file override default pid file [%s]\n", PID_FILE); |