#!/usr/bin/perl -w # massagevendor.pl: v0.03 2002/03/07 KELEMEN Peter # This program is part of the arpwatch Debian package. # Compile Ethernet vendor code listings from different sources into internal # format used by arpwatch(8). # Ethernet vendor listings recognized: # IEEE OUI: http://standards.ieee.org/regauth/oui/oui.txt # CaveBear: http://map-ne.com/Ethernet/Ethernet.txt use Getopt::Long; use strict; use vars qw($opt_output $opt_ieee $opt_cavebear $opt_firstword $opt_help $opt_version); my $VERSION = q(0.03); $opt_output = 'ethercodes.dat'; $opt_ieee = 'oui.txt'; $opt_cavebear = 'Ethernet.txt'; $opt_firstword = 0; $opt_help = 0; $opt_version = 0; my $time = time(); GetOptions(qw(output=s ieee=s cavebear=s firstword help version)); version() if $opt_version; usage() if $opt_help; open(IEEE, '<' . $opt_ieee) || die "$opt_ieee: $!\n"; open(CAVEBEAR, '<' . $opt_cavebear) || die "$opt_cavebear: $!\n"; open(OUTPUT, '>' . $opt_output) || die "$opt_output: $!\n"; my ($prefix, $vendor); my %ieee = (); my %cavebear = (); # Process IEEE OUI listing. while () { next unless /^[[:xdigit:]]{2}-[[:xdigit:]]{2}-[[:xdigit:]]{2}\s/; chomp; ($prefix, $vendor) = m/^([[:xdigit:]]{2}-[[:xdigit:]]{2}-[[:xdigit:]]{2})\s+\([^(]+\)\s+(.*)$/; $prefix =~ s/-//g; $ieee{mangle_prefix($prefix)} = mangle_vendor($vendor); } # Process CaveBear Ethernet vendor codes. while () { next unless /^[0-9A-F]{6}\s/; s/\r//g; chomp; ($prefix, $vendor) = m/^([0-9A-F]{6})\s+(.*)$/; $vendor =~ s/\s{2,}/ /g; $vendor =~ s/([a-z])([A-Z])/$1 $2/g; $cavebear{mangle_prefix($prefix)} = $vendor; } print STDERR scalar keys %ieee, " (IEEE), ", scalar keys %cavebear, " (CaveBear).\n"; # Attempt to merge. my $equal = 0; my $first_word = 0; my $merged = 0; my $c_only = 0; foreach (sort keys %cavebear) { if (exists $ieee{$_}) { my ($i, $c); # Treat as equal if sanitized vendor name equals. $i = lc($ieee{$_}); $c = lc($cavebear{$_}); $i =~ s/\W//g; $c =~ s/\W//g; if ($i eq $c) { ++$equal; next; } if ($opt_firstword) { # Treat as equal if first word matches. $i = lc($ieee{$_}); $c = lc($cavebear{$_}); $i =~ s/(\w+).*/$1/; $c =~ s/(\w+).*/$1/; if (0 and $i eq $c) { $i =~ s/\W//g; $c =~ s/\W//g; if (length($c) >= length($i)) { $ieee{$_} = $cavebear{$_}; } ++$first_word; next; } } # At this point we're helpless; simple heuristics could not # determine if the two vendors were the same. Merge them. # Always use vendor name from IEEE, add vendor name from # CaveBear in brackets ([]). $ieee{$_} .= " [" . $cavebear{$_} . ']'; $ieee{$_} =~ s/\t/; /g; ++$merged; } else { $ieee{$_} = $cavebear{$_}; ++$c_only; } } foreach (sort keys %ieee) { print OUTPUT "$_\t$ieee{$_}\n"; } close(OUTPUT); close(IEEE); close(CAVEBEAR); $time = time()-$time; print scalar keys %ieee, " total. (processed in $time seconds)\n"; print STDERR $equal, " equal, ", $first_word, " equal based on company name.\n"; print STDERR $merged, " merged, ", $c_only, " listed only by CaveBear.\n"; # Mangle prefix code. Wish there was some standard notation... sub mangle_prefix { my $prefix = lc(shift); # Lowercase. # Extract XX:XX:XX MAC address prefix. @_ = $prefix =~ m/^([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})$/; map { s/^0//; } @_; # Strip leading zeros. return join(':', @_); } # Mangle vendor name since the IEEE listing is really carelessly compiled. # Heavy heuristics here, you've been warned. sub mangle_vendor { my $vendor = shift; $vendor =~ s/^\W+//; # Strip leading garbage. $vendor =~ s/\s{2,}/ /g; # Condense whitespace. $vendor =~ s/ \W$//; # Strip trailing garbage. $vendor =~ s/(\w{4,})/\u\L$1/g; # Capitalize words 4 or more chars. # Heuristics to uc() abbreviations longer than 4 characters. $vendor =~ s/^(Ecci|Ris(c|q)|Seel|Uunet)$/\U$1/g; # Heuristics to lc() abbreviations shorter than 4 characters. $vendor =~ s/\b(COM?|IN(C|T)|LTD|DIV|SYS|PUB|IND|PT(Y|E)|LAB|TEC|SEL|EON)\b/\u\L$1/g; # Lowercase ordinary words. $vendor =~ s/\b(BAY|THE|ZUR|END|ONE|SAN|SUN|NET|WAY|TOP|BOX)/\u\L$1/g; $vendor =~ s/\b(BUG|BUS|PIG|TEN|LAW|NOT|SEA|LEE)\b/\u\L$1/g; # Lowercase French words and abbreviations. $vendor =~ s/\b(LA|LE|SOC|FA)\b/\u\L$1/g; # Lowercase English, French, German and Spanish conjunctions. $vendor =~ s/\b(AND|OF|TO|IN|FOR)\b/\L$1/g; $vendor =~ s/\b(DES|UND|DE|DI|DU)\b/\L$1/g; $vendor =~ s/\b((L|D)')/\L$1/g; # Lowercase French prefixes. $vendor =~ s/('[A-Z])\b/\L$1/g; # Lowercase letters following an "'". $vendor =~ s/\bGmbh\b/GmbH/g; # Treat GmbH specially. return $vendor; } sub version { print < EOF ; exit(0); } sub usage { print <] [--cavebear=] [--output=] [--help] [--firstword] Options: --ieee= Read IEEE OUI listing from this file [oui.txt] --cavebear= Read Cavebear Ethernet vendor info from here [Ethernet.txt] --output= Write massaged output to this file [ethercodes.dat] --firstword Enable equality heuristics based on he first word of a vendor's name. This might produce unwanted results. --help You are reading this now. EOF ; exit(0); }