]>
Commit | Line | Data |
---|---|---|
4b11d34b TP |
1 | #!/usr/bin/perl -w |
2 | # massagevendor.pl: v0.03 2002/03/07 KELEMEN Peter <fuji@debian.org> | |
3 | # This program is part of the arpwatch Debian package. | |
4 | ||
5 | # Compile Ethernet vendor code listings from different sources into internal | |
6 | # format used by arpwatch(8). | |
7 | ||
8 | # Ethernet vendor listings recognized: | |
9 | # IEEE OUI: http://standards.ieee.org/regauth/oui/oui.txt | |
10 | # CaveBear: http://map-ne.com/Ethernet/Ethernet.txt | |
11 | ||
12 | use Getopt::Long; | |
13 | use strict; | |
14 | use vars qw($opt_output $opt_ieee $opt_cavebear $opt_firstword $opt_help $opt_version); | |
15 | ||
16 | my $VERSION = q(0.03); | |
17 | ||
18 | $opt_output = 'ethercodes.dat'; | |
19 | $opt_ieee = 'oui.txt'; | |
20 | $opt_cavebear = 'Ethernet.txt'; | |
21 | $opt_firstword = 0; | |
22 | $opt_help = 0; | |
23 | $opt_version = 0; | |
24 | ||
25 | my $time = time(); | |
26 | ||
27 | GetOptions(qw(output=s ieee=s cavebear=s firstword help version)); | |
28 | version() if $opt_version; | |
29 | usage() if $opt_help; | |
30 | ||
31 | open(IEEE, '<' . $opt_ieee) || die "$opt_ieee: $!\n"; | |
32 | open(CAVEBEAR, '<' . $opt_cavebear) || die "$opt_cavebear: $!\n"; | |
33 | open(OUTPUT, '>' . $opt_output) || die "$opt_output: $!\n"; | |
34 | ||
35 | my ($prefix, $vendor); | |
36 | my %ieee = (); | |
37 | my %cavebear = (); | |
38 | ||
39 | # Process IEEE OUI listing. | |
40 | while (<IEEE>) { | |
41 | next unless /^[[:xdigit:]]{2}-[[:xdigit:]]{2}-[[:xdigit:]]{2}\s/; | |
42 | chomp; | |
43 | ($prefix, $vendor) = m/^([[:xdigit:]]{2}-[[:xdigit:]]{2}-[[:xdigit:]]{2})\s+\([^(]+\)\s+(.*)$/; | |
44 | $prefix =~ s/-//g; | |
45 | $ieee{mangle_prefix($prefix)} = mangle_vendor($vendor); | |
46 | } | |
47 | ||
48 | # Process CaveBear Ethernet vendor codes. | |
49 | while (<CAVEBEAR>) { | |
50 | next unless /^[0-9A-F]{6}\s/; | |
51 | s/\r//g; | |
52 | chomp; | |
53 | ($prefix, $vendor) = m/^([0-9A-F]{6})\s+(.*)$/; | |
54 | $vendor =~ s/\s{2,}/ /g; | |
55 | $vendor =~ s/([a-z])([A-Z])/$1 $2/g; | |
56 | $cavebear{mangle_prefix($prefix)} = $vendor; | |
57 | } | |
58 | print STDERR scalar keys %ieee, " (IEEE), ", scalar keys %cavebear, " (CaveBear).\n"; | |
59 | ||
60 | # Attempt to merge. | |
61 | my $equal = 0; | |
62 | my $first_word = 0; | |
63 | my $merged = 0; | |
64 | my $c_only = 0; | |
65 | foreach (sort keys %cavebear) { | |
66 | if (exists $ieee{$_}) { | |
67 | ||
68 | my ($i, $c); | |
69 | ||
70 | # Treat as equal if sanitized vendor name equals. | |
71 | $i = lc($ieee{$_}); | |
72 | $c = lc($cavebear{$_}); | |
73 | $i =~ s/\W//g; | |
74 | $c =~ s/\W//g; | |
75 | if ($i eq $c) { | |
76 | ++$equal; | |
77 | next; | |
78 | } | |
79 | ||
80 | if ($opt_firstword) { | |
81 | # Treat as equal if first word matches. | |
82 | $i = lc($ieee{$_}); | |
83 | $c = lc($cavebear{$_}); | |
84 | $i =~ s/(\w+).*/$1/; | |
85 | $c =~ s/(\w+).*/$1/; | |
86 | if (0 and $i eq $c) { | |
87 | $i =~ s/\W//g; | |
88 | $c =~ s/\W//g; | |
89 | if (length($c) >= length($i)) { | |
90 | $ieee{$_} = $cavebear{$_}; | |
91 | } | |
92 | ++$first_word; | |
93 | next; | |
94 | } | |
95 | } | |
96 | ||
97 | # At this point we're helpless; simple heuristics could not | |
98 | # determine if the two vendors were the same. Merge them. | |
99 | # Always use vendor name from IEEE, add vendor name from | |
100 | # CaveBear in brackets ([]). | |
101 | $ieee{$_} .= " [" . $cavebear{$_} . ']'; | |
102 | $ieee{$_} =~ s/\t/; /g; | |
103 | ++$merged; | |
104 | } else { | |
105 | $ieee{$_} = $cavebear{$_}; | |
106 | ++$c_only; | |
107 | } | |
108 | } | |
109 | ||
110 | foreach (sort keys %ieee) { | |
111 | print OUTPUT "$_\t$ieee{$_}\n"; | |
112 | } | |
113 | close(OUTPUT); | |
114 | close(IEEE); | |
115 | close(CAVEBEAR); | |
116 | ||
117 | $time = time()-$time; | |
118 | print scalar keys %ieee, " total. (processed in $time seconds)\n"; | |
119 | print STDERR $equal, " equal, ", $first_word, " equal based on company name.\n"; | |
120 | print STDERR $merged, " merged, ", $c_only, " listed only by CaveBear.\n"; | |
121 | ||
122 | # Mangle prefix code. Wish there was some standard notation... | |
123 | sub mangle_prefix { | |
124 | my $prefix = lc(shift); # Lowercase. | |
125 | ||
126 | # Extract XX:XX:XX MAC address prefix. | |
127 | @_ = $prefix =~ m/^([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})$/; | |
128 | ||
129 | map { s/^0//; } @_; # Strip leading zeros. | |
130 | return join(':', @_); | |
131 | } | |
132 | ||
133 | # Mangle vendor name since the IEEE listing is really carelessly compiled. | |
134 | # Heavy heuristics here, you've been warned. | |
135 | sub mangle_vendor { | |
136 | my $vendor = shift; | |
137 | $vendor =~ s/^\W+//; # Strip leading garbage. | |
138 | $vendor =~ s/\s{2,}/ /g; # Condense whitespace. | |
139 | $vendor =~ s/ \W$//; # Strip trailing garbage. | |
140 | $vendor =~ s/(\w{4,})/\u\L$1/g; # Capitalize words 4 or more chars. | |
141 | ||
142 | # Heuristics to uc() abbreviations longer than 4 characters. | |
143 | $vendor =~ s/^(Ecci|Ris(c|q)|Seel|Uunet)$/\U$1/g; | |
144 | ||
145 | # Heuristics to lc() abbreviations shorter than 4 characters. | |
146 | $vendor =~ s/\b(COM?|IN(C|T)|LTD|DIV|SYS|PUB|IND|PT(Y|E)|LAB|TEC|SEL|EON)\b/\u\L$1/g; | |
147 | ||
148 | # Lowercase ordinary words. | |
149 | $vendor =~ s/\b(BAY|THE|ZUR|END|ONE|SAN|SUN|NET|WAY|TOP|BOX)/\u\L$1/g; | |
150 | $vendor =~ s/\b(BUG|BUS|PIG|TEN|LAW|NOT|SEA|LEE)\b/\u\L$1/g; | |
151 | ||
152 | # Lowercase French words and abbreviations. | |
153 | $vendor =~ s/\b(LA|LE|SOC|FA)\b/\u\L$1/g; | |
154 | ||
155 | # Lowercase English, French, German and Spanish conjunctions. | |
156 | $vendor =~ s/\b(AND|OF|TO|IN|FOR)\b/\L$1/g; | |
157 | $vendor =~ s/\b(DES|UND|DE|DI|DU)\b/\L$1/g; | |
158 | ||
159 | $vendor =~ s/\b((L|D)')/\L$1/g; # Lowercase French prefixes. | |
160 | $vendor =~ s/('[A-Z])\b/\L$1/g; # Lowercase letters following an "'". | |
161 | $vendor =~ s/\bGmbh\b/GmbH/g; # Treat GmbH specially. | |
162 | ||
163 | return $vendor; | |
164 | } | |
165 | ||
166 | sub version { | |
167 | print <<EOF | |
168 | massagevendor $VERSION | |
169 | Copyright (C) 2000-2001 KELEMEN Peter <fuji\@debian.org> | |
170 | EOF | |
171 | ; | |
172 | exit(0); | |
173 | } | |
174 | ||
175 | sub usage { | |
176 | print <<EOF | |
177 | Massage IEEE OUI listing and Cavebear Ethernet vendor database into arpwatch(8) format. | |
178 | ||
179 | Usage: massagevendor [--ieee=<file>] [--cavebear=<file>] [--output=<file>] [--help] [--firstword] | |
180 | ||
181 | Options: | |
182 | --ieee=<file> Read IEEE OUI listing from this file [oui.txt] | |
183 | --cavebear=<file> Read Cavebear Ethernet vendor info from here [Ethernet.txt] | |
184 | --output=<file> Write massaged output to this file [ethercodes.dat] | |
185 | --firstword Enable equality heuristics based on he first word of | |
186 | a vendor's name. This might produce unwanted results. | |
187 | --help You are reading this now. | |
188 | ||
189 | EOF | |
190 | ; | |
191 | exit(0); | |
192 | } |