#!/usr/bin/perl
# extractor, a HTTPush plugin by Lluis Mora
# Extracts interesting data from visited webpages
#
# $Id: extractor,v 1.2 2001/10/28 22:13:03 jfs Exp $

use IO::Socket;
use MIME::Base64;

$|=1;

my %req;
my @kvl;


while($line=<STDIN>) {

#  print STDERR "extractor> $line";

  chomp($line);

  if(! $line) {
    ProcessRequest(\%req);
    # Clean req;
    %req=undef;
    @kvl=();
  }

  if(! $req{'opcode'}) {
    if($line=~/(\w+)\s+(\d+)\.(\d+)/) {
      $req{'opcode'}=$1;
      $req{'version'}="$2.$3";
    }
  } else {
    ($key, $val) = split(":",$line,2);
    $val=MIME::Base64::decode_base64($val);
  
    if($key && $val) {
		  my %kv;
      $kv{'k'}=$key;
      $kv{'v'}=$val;

      push (@{$req{kv}}, \%kv);
    }
  }

}

sub ProcessRequest {
  my %req=%{shift(@_)};
  my @kvl;

  if($req{'opcode'} eq "identify") {
    my $plugin_name=plugin_encode("extractor");
    my $plugin_description=plugin_encode("Gathers useful information from a webpage content");
    my $plugin_event=plugin_encode("request");
    my $plugin_noise=plugin_encode("0");

    print "register 1.0\n";
    print "name:$plugin_name\n";
    print "description:$plugin_description\n";
    print "event:$plugin_event\n";
    print "noise:$plugin_noise\n";
    print "\n";

  } elsif($req{'opcode'} eq "request") {
    my $timestamp=time();
    my $content, $who, $id;


    foreach $x (@{$req{'kv'}}) {
      %kv=%$x;

      if($kv{'k'} eq "received") {
        $content=$kv{'v'};
      } elsif ($kv{'k'} eq "who") {
        $who=$kv{'v'};
      } elsif ($kv{'k'} eq "id") {
        $id=$kv{'v'};
      }
    }

  $id=plugin_encode($id);
  my $zero=plugin_encode('0');

  if($content) {

    $_=$content;
    @m=sort {uc($a) cmp uc($b)} (/([\w\-\.]+\@[\w\-\.]+\.[\w\-]+)/g);

    if(@m) {
      $prev = "not equal to $in[0]";
      @m = grep($_ ne $prev && ($prev = $_, 1), @m);

      my $vuln=plugin_encode(join("\n",@m));
      my $section=plugin_encode("/inet/smtp/address");

      print "store 1.0\n";
      print "content:$vuln\n";
      print "section:$section\n";
      print "id:$id\n";
      print "\n";

      # Extract usernames from email addresses

      foreach $email (@m) {
       ($user)=split(/\@/, $email, 2);

       push(@u,$user);
      }

      @u=sort {uc($a) cmp uc($b)} (@u);
      @u = grep($_ ne $prev && ($prev = $_, 1), @u);

      my $vuln=plugin_encode(join("\n",@u));
      my $section=plugin_encode("/organization/username");

      print "store 1.0\n";
      print "content:$vuln\n";
      print "section:$section\n";
      print "id:$id\n";
      print "\n";

    }

  }

}
}

sub plugin_encode {
  my $data = shift;
  $data=MIME::Base64::encode_base64($data);
#  $data=~s/[\n\r]//g;
  chomp($data);
  return($data);
}
