#!/usr/bin/perl # # nailem -- parse spam email for complaint purposes # # $Id: nailem,v 1.1 1998/03/17 05:08:54 minivend Exp minivend $ # # Copyright 1997, Mike Heins # # This program is licensed free of charge to anyone who has never spammed. # # Spammers may not use this program until it has been at least one # calendar year from their last spamming incident. # # Copyright 1996, 1997 by Michael J. Heins # # See the file 'Changes' for information. # # This program is free software; most users can redistribute it # and/or modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later # version. The only exception is that any organization employing # uninvited commercial email solicitations, commonly known as # SPAM, may not use the program for a period of one year after any # SPAM incident. Failure to discontinue use immediately upon # written notice will cause the charge of a $10,000 per day license # fee until such time as use is discontinued. The author, # Michael J. Heins, shall be the sole judge of what # constitutes a SPAM incident. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. my $Auto_to = ''; my $Sigfile = "$ENV{HOME}/.sig"; my $Cache_file = '/tmp/nailem.new.whois.gdbm'; my $Whois_template; # Uncomment this if you whish to use whois.abuse.net on Linux or other fwhois $Whois_template = 'whois %s@whois.abuse.net'; # Uncomment this if you whish to use whois.abuse.net on regular whois #$Whois_template = 'whois -h whois.abuse.net %s'; my @Trusted = ( qw! PLACE_YOUR_TRUSTED_DOMAINS_HERE iac.net minivend.com cool.com nwnexus.net nwnexus.com one.net muohio.edu oar.net !); use 5.004; # This is your letter, adjust how you like. my $Intro = <<'EOF'; To whom it may concern, I do not appreciate receiving uninvited email solicitations, commmonly known as SPAM. Please register my displeasure at receiving this one. I ask that you deal with the offender in accordance with your procedures. Thank you. ( TO NSPs who feel that this message was "shotgunned": - Do you require valid reverse DNS for all hosts and routers you connect? - Do you have an abuse address registered with spam.abuse.net? More importantly to NSPs, do your downstreams? Send something like "att-unisource.net: abuse@att-unisource.net" to update@abuse.net to be registered. - Do you reply to all complaints, even with only an autoresponse? - Are you pursuing legal action against spammers who persistently use your domain in their messages? If you do all of those things and feel you were shotgunned, please contact nospam@minivend.com; we want these automatically-generated complaints to be on target. SPECIAL NOTE ABOUT CLICK-THROUGHS: This program automatically fetches links included in the spam message and scans for URLs. If the spammer includes your URL in the message, we suggest you pursue them for misappropriation. Pages will not be scanned if a 404 error is returned. ) EOF # You can put known abuse addresses here my %Known_abuse = ( qw/ above.net abuse@above.net agis.net policy@agis.net alter.net fraud@uu.net aol.com abuse@aol.com att.net abuse@worldnet.att.net bbnplanet.net ops@bbnplanet.net cais.com crl-hack@minivend.com cais.net crl-hack@minivend.com cerf.net abuse@cerf.net crl.com crl-hack@minivend.com crl.net crl-hack@minivend.com earthlink.net spam@earthlink.net yahoo.com abuse@yahoo.com / ); # top-levels you trust to trace through, usually YOUR upstreams my @Trust_trace = (qw! cw.net crl.net crl.com oar.net one.net iac.net !); # Well-known bogus domains my @Known_bogus = ( qw! answerme.com cyberpromo.com iemmc.org nowhere.com nowhere.net public.com quantcom.com removeme.com savetrees.com ybecker.net www.yahoo.com quote.yahoo.com yourdomain.com !); ##### END CONFIGURABLE VARIABLES ####### use Getopt::Std; use LWP::Simple; my $prog = $0; $prog =~ s:(.*)/::; my $dir = $1 || '.'; use File::CounterFile; my $TicketNo; my $ctr = new File::CounterFile '/tmp/nailem.counter', '00132501'; $TicketNo = $ctr->inc(); $| = 1; $USAGE = < 1 ) ; my %Trust_trace; my %Trust_abuse; my %Known_bogus; if($opt_t) { push @Trusted, split /\s+/, $opt_t; } if($opt_T) { TRUSTEDFILE: { local($/); undef $/; open TRUST, $opt_T or do { warn "trusted domain file $opt_T not found: $!\n"; last TRUSTEDFILE; }; my $trusted = ; trim (\$trusted); push @Trusted, (split /\s+/, $trusted); } } for(@Trusted) { print "Trusted domain $_\n" if $DEBUG; $Trusted{$_} = 1; } for(@Trust_trace) { print "Trust trace $_\n" if $DEBUG; $Trust_trace{$_} = 1; $Trust_abuse{$_} = 1; } for(keys %Known_abuse) { $Trust_abuse{$_} = 1; } for(@Known_bogus) { print "Known bogus $_\n" if $DEBUG; $Known_bogus{"\U$_"} = 1; } sub trust_check { my ($addr, $trace, $abuse_check) = @_; return '' unless $addr =~ /\S/; $addr = lc $addr; my $trustme; if($abuse_check) { $trustme = \%Trust_abuse; } elsif($trace) { $trustme = \%Trust_trace; } else { $trustme = \%Trusted; } my $trusted; return 1 if $trustme->{$addr}; my @parts = split /\./, $addr; @parts = reverse @parts; my $res = ''; print "trytrust=" if $DEBUG; do { $res = shift(@parts) . ($res ? ".$res" : ''); print "$res," if $DEBUG; if (defined $trustme->{lc $res}) { $trusted = 1; $trustme->{$addr} = 1; print " YES!" if $DEBUG; @parts = (); } } while @parts; print "\n" if $DEBUG; return $trusted || undef; } # does fixed length message padded with .... sub do_msg { my ($msg, $size) = @_; $size = 30 unless defined $size; my $len = length $msg; return "$msg.." if ($len + 2) >= $size; $msg .= '.' x ($size - $len); return $msg; } sub whois_cache { my ($key, $val) = @_; my %cache; if(defined $val) { tie(%cache, 'GDBM_File', $Cache_file, GDBM_WRCREAT, 0666) or return ''; $cache{$key} = Data::Dumper::Dumper($val); } else { tie(%cache, 'GDBM_File', $Cache_file, GDBM_WRITER, 0666) or return ''; $val = eval $cache{$key}; } untie %cache; return $val; } sub whois { my($domain) = @_; my($top); return undef if $Known_bogus{$domain}; $domain =~ s/\s+$//; return undef unless $domain =~ /\.[A-Za-z]{3}$/; $domain =~ /($Hostparts\.[^.]+)$/os and $top = $1; $top = uc $top; print "whois $top " if $DEBUG; return undef if $Known_bogus{$top}; if ($Whois{$top}) { print "found memory\n" if $DEBUG; return $top; } elsif ($Cache) { print "looking dbm " if $DEBUG; $Whois{$top} = whois_cache($top) and return $top;; } my $what = `whois $top`; print "actually looking " if $DEBUG; if($what =~ /to single out one record/i) { $what =~ /\( ([-\w]+) \) \s+ $top[ \t\r]*\n/ix or warn "Bad whois?\n$what\n"; $what = `whois \\!$1`; } print "\n$what" if $DEBUG; return undef if $what =~ /(\n|^)No\s+match\s+for\s+"?$top/i; my @addr; my $abuse_whois; if($Known_abuse{lc $top}) { @addr = ($Known_abuse{lc $top}); } else { FINDAB: { $abuse_whois = $Whois_template; if($abuse_whois =~ s/%s/\L$top/) { $abuse_whois = `$abuse_whois`; $abuse_whois =~ s/\s+$//; $abuse_whois =~ s/^\s*\[.*//; $abuse_whois =~ s/^\s+//; $abuse_whois =~ s/\n+/,/g; if($abuse_whois =~ /^postmaster\@/i) { # Don't use stupid postmaster } elsif($abuse_whois) { push @addr, $abuse_whois; last FINDAB; } } while( $what =~ /\)\s+(.*@.*)/g ) { push @addr, $1; } } } my @dns; my $dns = $what; $dns =~ s/[\000-\377]+\n\s+domain servers .*\n//i; $dns =~ s/\n\w.*//s; print "$top DNS servers:\n$dns\n" if $DEBUG; while ($dns =~ m/\b ( (1[0-9][0-9]|2[0-4][0-9]|25[0-4]|[1-9]?[0-9]) \. (1[0-9][0-9]|2[0-4][0-9]|25[0-4]|[1-9]?[0-9]) \. (1[0-9][0-9]|2[0-4][0-9]|25[0-4]|[1-9]?[0-9]) \. (1[0-9][0-9]|2[0-4][0-9]|25[0-4]|[1-9]?[0-9]) )\b/xg ) { push @dns, $1; print "name server $1 \n" if $DEBUG; } my $ref = { top => $top, offender => $domain, whois => $what, contacts => [ @addr ], dns => [ @dns ], }; $Whois{$top} = $ref; if($Cache) { whois_cache($top, $ref); } return $top; } my $Unresolved = 'UNRESOLVED00'; sub find_ip { my $digits = shift; my $ip = `ping -c 1 $digits`; if($ip =~ s,PING\s+$digits\s+\([^)]+\).*,$1,is ) { return $ip; } return $digits; } sub find_address { my($addr) = @_; my($helo,$resolv, $ip); $addr =~ s/^\s+//; $addr =~ s/ $Ignore ($Hostchars) $Ignore //x and $helo = $1; $helo =~ s/.*\@// if defined $helo; print "h=$helo " if $DEBUG; $addr =~ s/([\d.]+)\]// and $ip = $1; print "i=$ip " if $DEBUG; $addr =~ /\s+EHLO\s+/i and $addr = ''; $addr =~ s/ $Ignore ($Hostchars) $Ignore //xo and $resolv = $1; print "r=$resolv " if $DEBUG; $addr =~ s/[\])\s]+//; if ($addr && ! $ip) { print "addr='$addr' and no IP\n" if $DEBUG; return undef if $addr && ! $ip; } my $trusted; if(! defined $ip) { ($resolv,$ip) = nslookup($helo); ($resolv,$ip) = nslookup($resolv) unless defined $resolv; return undef unless defined $ip; } elsif (! defined $resolv) { my $tmp_ip = $ip; ($resolv,$tmp_ip) = nslookup($ip); $resolv=$Unresolved++ unless defined $tmp_ip; } $trusted = trust_check($resolv); return ($ip, $resolv, $helo, $trusted); } sub nslookup { my($thing) = @_; return undef unless defined $thing && $thing =~ /\S/; print "nslookup='$thing'...." if $DEBUG; trim(\$thing); $thing = lc $thing; $thing =~ s/.*\@//; my($name,$ip); if($thing =~ /^[0.]+$/) { print "..bogus.." if $DEBUG; return (undef,undef); } elsif($Known_bogus{lc $thing} ) { print "..bogus.." if $DEBUG; return (undef,undef); } elsif($thing eq 'localhost' or $thing eq '127.0.0.1') { ($Looked_up_host{'127.0.0.1'}, $Looked_up_ip{'localhost'}) = ('localhost', '127.0.0.1'); } elsif (defined $Looked_up_host{$thing}) { ($name,$ip) = ($Looked_up_host{$thing}, $Looked_up_ip{$Looked_up_host{$thing}}); print "..found in cache.." if $DEBUG; } elsif (defined $Looked_up_ip{$thing}) { ($name,$ip) = ($Looked_up_host{$Looked_up_ip{$thing}}, $Looked_up_ip{$thing}); print "..found in cache.." if $DEBUG; } else { print "..go to net.." if $DEBUG; my $what = `nslookup -retry=1 -timeout=3 -query=a $thing 2>/dev/null`; $what =~ s/.*\n\n//; if ($what =~ /(?:\n|^)Name:\s*($Hostchars)\s*Address(?:es)?:\s*([\d.]+)/o) { ($name, $ip) = ($1, $2); ($Looked_up_host{$ip}, $Looked_up_ip{$name}) = ($name, $ip); } else { ($Looked_up_host{$thing}, $Looked_up_ip{$thing}) = (undef, undef); } } print " name=$Looked_up_host{$ip} ip=$Looked_up_ip{$name}\n" if $DEBUG; return ($Looked_up_host{$ip}, $Looked_up_ip{$name}); } my %Traced; sub traceroute { my($ip) = @_; $ip =~ s/.*\@//; return if defined $Traced{$ip}; my (@out); my $pid; $pid = open(TRACE, "traceroute $ip 2>/dev/null |") or die "Can't fork: $!\n"; print "Tracing $ip ... " if $DEBUG; $NAILEM::strikes = 0; alarm 20; $Traced{$ip} = ''; while(){ $Traced{$ip} .= $_; if ($NAILEM::strikes > 1) { alarm 0; kill(15, $pid); print "Traceroute out on $NAILEM::strikes strikes for $ip.\n" if $DEBUG; last; } m/^\s*\d+\s+\*\s+\*\s+\*/ and do { $NAILEM::strikes++; next }; m/^\s*\d+[*\s]+($Hostchars)/ or next; my $host = $1; print "traced host $host\n" if $DEBUG; push (@out, $host) unless $host =~ /^[.\d]+$/ || trust_check($host, 1); } close TRACE; alarm 0; my @saved; for(reverse @out) { unshift (@saved, $_); last if trust_check($_, 0, 1); } print "saved hosts: " . (join " ", @saved) . "\n" if $DEBUG and @out; return @saved; } ## MAIN my @msgs; # input, I need input.... { local($/); undef $/; my $pile = <>; if($pile =~ /\n/) { $pile =~ tr/\r//d; } else { $pile =~ tr/\r/\n/; } $pile =~ s/^\s*\n+//; if($pile =~ /^\s*From /) { @msgs = split /^From /m, $pile; @msgs = grep /\S/ && s/^/From /, @msgs; } else { @msgs = ($pile); } } my $j = 0; my @parsed; $| = 0; for(@msgs) { my ($header, $body) = split /\n\n/, $_, 2; my @header = split /\n/, $header; my $count; if($header[2] =~ /^\s*[-A-Z_a-z0-9]+:\s*$/o) { $count = scalar @header; print "X-Probable-Netscape-Header-Abomination: $count lines, trying to fix\n"; for( @header ) { s/^\s+($Hostparts:)/$1/o; } } JOIN: { for(my $i = 0; $i < @header; $i++) { $_ = $header[$i]; s/\s+/ /g; if (s/^(\s|[^:]+\s)/ $1/) { $header[$i - 1] .= $_; splice(@header, $i, 1); redo JOIN; } if ($count and s/:\s*$/: /) { $header[$i] .= $header[$i+1]; $header[$i] =~ s/\s+/ /g; splice(@header, $i+1, 1); redo JOIN; } s/\s\s+/ /g; } } my $ref = $parsed[$j++] = {}; $ref->{FullHeader} = $count ? (join "\n", @header, "\n") : $header; $ref->{Body} = $body; my $mf; ($mf) = shift(@header); $mf =~ s/^From\s+//; ($ref->{MailFrom}, $ref->{MailFromDate}) = split /\s+/, $mf, 2; $ref->{Received} = []; for(@header) { my ($key,$val) = split /:\s+/, $_, 2; $key = "\L\u$key"; if(! defined $ref->{$key}) { $ref->{$key} = $val; } elsif(ref $ref->{$key}) { push @{$ref->{$key}}, $val; } else { my $tmp = $ref->{$key}; $ref->{$key} = []; push @{$ref->{$key}}, $tmp, $val; } } $ref->{Fingerprints} = []; my @clicks; my @reclicks; while($body =~ m#(http://[^"'\s]+|www\.[^"'\s]+)#ig) { my $url = $1; $url =~ s!http://!!i; my $click = $url; $url =~ s:/.*::; print "Matched fingerprint $url\n" if $DEBUG; $click = "http://$click"; if ($opt_C) { my $clickto = LWP::Simple::get($click); if($clickto) { $clickto = "$click $clickto"; print "Fetched clickthrough $click\n" if $DEBUG and $clickto; push(@clicks, $clickto); } } push @{$ref->{Fingerprints}}, $url; } $ref->{Clicks} = []; my $orig_clicks = scalar @clicks; my $limit = 2 * $orig_clicks; $limit = 10 if $limit > 10; my $c; my $it_count; print "Clickthrough count $orig_clicks limit=$limit" if $DEBUG; while ($c = shift @clicks) { $it_count++; $c =~ s/(\S+)\s+//; my $u = $1; my $base = $u; if($c =~ m,]*src\s*=\s*['"]?([^"'\s]+) ) #xig) { my $url = $1 || $2; print " found clickthrough $url\n" if $DEBUG; if ( $limit >= $it_count and ( $url !~ /^\w+:/) or $url =~ m,http://$base, ) { my $click = $url; $click = "http://$base/$click" unless $click =~ /^http:/i; my $clickto = ''; $clickto = LWP::Simple::get($click) if $it_count <= $limit; if($clickto) { $clickto = "$click $clickto"; print "Fetched clickthrough $click\n\tbase=$base\n" if $DEBUG; push(@clicks, $clickto); } } } push @{$ref->{Clicks}}, "$u $c"; while($c =~ m# \s+href\s*=\s*['"]?(http://[^"'\s]+) #xig) { my $url = $1 || $2; print "Matched click-through fingerprint $url\n" if $DEBUG; $url =~ s!http://!!; $url =~ s,/.*,,; push @{$ref->{Fingerprints}}, " click to --> $url"; } } # # foreach $c (@reclicks) { # $c =~ s/(\S+)\s+//; # my $u = $1; # my $base = $u; # $base =~ s!http://!!i; # $base =~ s:/.*::; # while($c =~ m# # \s+href\s*=\s*['"]?([^"'\s]+) # #xig) { # my $url = $1; # $url =~ s!http://!!i; # $url =~ s:/.*::; #print "Matched reclick-through fingerprint $url\n" if $DEBUG; # push @{$ref->{Fingerprints}}, " reclick to --> $url"; # } # $c = "$u $c"; # } # push @clicks, @reclicks; while($body =~ /([^:\s]+\@[-\w.]+\.[A-Za-z]{2,3})/g) { push @{$ref->{Fingerprints}}, $1; print "Matched fingerprint $1\n" if $DEBUG; } #print Data::Dumper::Dumper($ref) if $DEBUG; } my (@out); $j = 0; sub do_received { } for(@parsed) { my $ref = $_; my $out = ''; my @bogus_received; my @bogus_domain; my @bad_helo; my @relayers; my @involved_domains; my @lookup; my @lookup_too; my @send_to; my @possibly_open; my ($injector, $first_sender); my ($injector_host, $first_sender_host); my $fillers = '(?:peer\s+cross-?checked\s+a?s?[\s:]*)?'; foreach my $line (reverse @{$ref->{Received}}) { print "Trying $line\n" if $DEBUG; my ($from, $by, $rest, $for); my ($name, $ip, $helo, $trusted); unless ($line =~ m{^ $Ignore (?:from)? # possible proceeding from $Ignore (.*?) by $Ignore $fillers ($Hostchars) $Ignore (.*) }xio ) { print "Continuing at bad received format\n" if $DEBUG; push(@bogus_received, $line); next; } else { $from = $1; $by = $2; $rest = $3; next if $by =~ /^fetchmail-\d+\.\d+/; if($rest and $rest =~ /for\s+(\S+)/ ) { $for = $1; $for =~ s/\W+$//; $for =~ s/^{Fingerprints}}; for (@prints) { print "fingerprinting $print " if $DEBUG; s/.*?--> //; if(/^\d+$/) { $_ = find_ip($_); } s/[^A-Za-z]+$//; my @addl = (); my @extra = (); if(/\@/) { @addl = whois($_); for (@addl) { next if $Known_abuse{lc $_}; print "found extra $_ " if $DEBUG; next unless $Whois{"\U$_"}->{dns}; print $Whois{"\U$_"}->{dns} if $DEBUG; push @extra, @{$Whois{"\U$_"}->{dns}}; } for (@extra) { my($name, $ip) = nslookup($_); push (@addl, traceroute(($ip || $_), 1)); } } else { next if $Known_bogus{lc $_}; my($name, $ip) = nslookup($_); push (@addl, traceroute(($ip || $_), 1)); } push @lookup, @addl; print "\n" if $DEBUG; } my $one; foreach $one (@lookup) { my $status = whois($one); unless($status) { push @bogus_domain, $one; } else { push @involved_domains, $status; } } my %seen; my %sent; my @emails; my $dom; foreach $dom (@involved_domains) { for( @{$Whois{$dom}->{contacts}}) { next if defined $sent{lc $_}; $sent{lc $_} = 1; push @send_to, "$_ (contact for $dom)"; } } print "\n#### END DEGUG INFO ####\n\n" if $DEBUG; unshift(@send_to, $Auto_to) if $Auto_to; $out .= "To: "; $out .= join ",\n\t", @send_to; $out .= "\n"; $out .= "Subject: ABUSE REPORT (#N$TicketNo): $ref->{Subject}\n"; $out .= "\n"; $out .= $Intro; $out .= do_msg("Probable injecting host"); $out .= "$injector_host\n"; undef %seen; if(@relayers) { $out .= do_msg("Untrusted relaying hosts"); $out .= join ", ", grep !$seen{$_}++, @relayers; $out .= "\n"; } undef %seen; if(@possibly_open) { $out .= do_msg("POSSIBLY OPEN RELAY"); $out .= join ", ", grep !$seen{$_}++, @possibly_open; $out .= "\n"; } undef %seen; if(@{$ref->{Fingerprints}}) { $out .= "\nSpammer fingerprints in body\n"; $out .= "----------------------------\n"; $out .= join "\n", grep !$seen{$_}++, @{$ref->{Fingerprints}}; $out .= "\n"; } else { $out .= "\nNo spammer fingerprints in body of message.\n" } $out .= "\n"; if(@bogus_received) { $out .= "\nBogus received lines\n"; $out .= "--------------------\n"; $out .= join "\n", @bogus_received; $out .= "\n\n\n"; } undef %seen; if(@bogus_domain) { $out .= do_msg("Could not lookup"); $out .= join ", ", grep !$seen{$_}++, @bogus_domain; $out .= "\n"; } $out .= "\n"; if(-f $Sigfile) { $out .= `cat $Sigfile`; } $out .= "\n"; $out .= "**** begin message headers ****\n"; $out .= $ref->{FullHeader}; $out .= "\n**** end message headers ****\n\n"; $out .= "**** begin message body ****\n"; $out .= $ref->{Body}; $out .= "**** end message body ****\n"; for(@{$ref->{Clicks}}) { s/(\S+)\s+//; my $url = $1; $out .= "**** begin click-to body $url ****"; $out .= $_; $out .= "**** end click-to body $url ****\n"; } print $out; #$out[$j++] = $out; if($Append_trace) { my $save = $DEBUG; $DEBUG = 0; my $possible; foreach $possible (sort keys %Traced) { my($name,$ip) = nslookup($possible); print "Traceroute to " . ($name ? "$name " : $possible) . ($ip ? "([$ip])" : '') . ":\n"; print $Traced{$possible}; print "\n\n"; } $DEBUG = $save; } if($Append_whois) { for(sort keys %Whois) { print $Whois{$_}->{'whois'}; print "\n"; } } } exit 0;