#!/usr/bin/perl

=head1 NAME

pptrain - program to send spam/ham message to Proofpoint for training

=head1 SYNOPSIS

  pptrain -ham|-spam [-guid <guid>] <file_or_dir> [<file_or_dir>...]
          -register -key <key>
          -help
          -man

=head1 EXAMPLES

  pptrain -s /tmp/spam
  pptrain -h /tmp/ham
  pptrain -r -k aaasdfasdasdf

  # using a proxy (note: no trailing slash on proxy name!)
  https_proxy=http://my.proxy:3128 pptrain -s /tmp/spam

=head1 OPTIONS

=over 4

=item -ham

Send ham messages to Proofpoint.  At least one <file_or_dir> argument
must be specified.  Directories are recursively searched to find all
files underneath the given directory.  Files are expected to be in RFC
2822 format.

=item -spam

Send spam messages to Proofpoint.  At least one <file_or_dir> argument
must be specified.  Directories are recursively searched to find all
files underneath the given directory.  Files are expected to be in RFC
2822 format.

=item -register

Register a client (this program) with Proofpoint.  This argument
requires that the -key <key> arguments also be specified, see B<-key>
below.

=item -key <key>

The activation key to be used when registering a client.  The value of
<key> is provided by Proofpoint, see B<-register> above.

=item -guid <guid>

The guid is provided by the Proofpoint server once client activation
has occurred successfully.  The guid is typically stored in the zcs
local configuration file automatically by this program using
B<zmlocalconfig>.

=item -base_url <base_url>

This option overrides the default base_url. A custom base_url can also
be stored in the zcs local configuration file.

=item -help

Print program version and brief help documentation.

=item -man

Print complete program documentation.

=back

=cut

use strict;
use warnings;
use Benchmark ();

# Handle command line options
my $t0 = Benchmark->new;
my $pt = Proofpoint::Train->new( opt => Proofpoint::Train::Options() );
my $rc = $pt->main();
my $t1 = Benchmark->new;

my $td = Benchmark::timediff( $t1, $t0 );
my $secs = $td->real;

my ( $s, $f, $prog ) = ( $pt->{_success}, $pt->{_failure}, $pt->{_prog} );
print(
    "$prog: ", $s + $f,
    " message(s) processed in $secs second(s)\n",
    "$prog: successes = $s; failures = $f\n"
) if ( $s || $f );

# exit 0 on success
exit( !$rc ? 1 : 0 );

{

    package Proofpoint::Train;

    our ( $PRODUCT_ID, $VERSION, $Prog );

    BEGIN {
        $PRODUCT_ID = "ZCS Proofpoint Train";
        $VERSION    = '1.03';
    }

=head1 Configuration and Constants

Some configuration data can be stored in the ZCS local configuration
file managed by the B<zmlocalconfig> utility.

=over 4

=item Default Proofpoint server "base URL"

  "https://ccs.proofpoint.com/ccs"

This is used to generate the "clients" and "postings" URLs:

  $Opt{base_url} . "/clients"
  $Opt{base_url} . "/postings"

If a proxy  is required to access the  Proofpoint server, this program
can pick up the proxy settings from the runtime environment.  To proxy
the http  requests based  on the default  url, set  the B<https_proxy>
environment  variable to  the proxy  server to  be used.   For example
(note: there is no trailing slash on the proxy name!):

  export https_proxy=http://my.proxy:3128

=item zmlocalconfig

  "/opt/zimbra/bin/zmlocalconfig"

=item zmlocalconfig base_url "key"

  "proofpoint_base_url"

=item zmlocalconfig guid "key"

  "proofpoint_guid"

=back

=head2 DEPENDENCIES

The following non-"core" Perl modules are required by this program:

=over 4

=item HTTP::Request

=item LWP::UserAgent

=item MIME::Base64

=item XML::Parser

=back

The following non-"core" Perl modules are optional for this program:

=over 4

=item Crypt::SSLeay

This module is needed if a proxy server is used to connect to the
proofpoint server and if the proxy server requires use of the CONNECT
method.

=back

=head1 METHODS

The section provides some details about internal methods/functions
used by this program.

=cut

    use constant BASE_URL      => "https://ccs.proofpoint.com/ccs";
    use constant CLIENTS_PATH  => "/clients";
    use constant POSTINGS_PATH => "/postings";
    use constant ZMLC_CMD      => "/opt/zimbra/bin/zmlocalconfig";

    use constant PP_BASE_URL_LCKEY => "proofpoint_base_url";
    use constant PP_GUID_LCKEY     => "proofpoint_guid";

    use strict;
    use warnings;

    use File::Basename qw(basename);
    use File::Find qw(find);
    use IO::File ();
    use Getopt::Long qw(GetOptions);
    use Pod::Usage qw(pod2usage);

    # External Modules: provide a "nice" error message if they are missing...
    use lib qw(/opt/zimbra/zimbramon/lib/);

    BEGIN {
        $Prog = basename($0);
        my @mods = qw(HTTP::Request LWP::UserAgent MIME::Base64 XML::Parser);
        my @missing;
        foreach my $m (@mods) {
            eval "require $m";
            push( @missing, $m ) if ($@);
        }
        my @msg = (
            "$Prog: Perl modules usually under",
            " /opt/zimbra/zimbramon/lib are missing:\n",
            "\t@missing\n",
            "$Prog: Install these from ZCS, CPAN or via a package manager\n",
            "$Prog: Documentation available with:\n\tperldoc $0\n",
        );
        pod2usage( -exitval => 1, -verbose => 0, -message => join( "", @msg ) )
          if (@missing);
    }

    sub Options {
        my %Opt;

        GetOptions(
            \%Opt,        "ham",    "spam",  "register",
            "base_url=s", "guid=s", "key=s", "help",
            "man",        "debug:1"
        ) or pod2usage( -verbose => 1 );

        pod2usage( -verbose => 2 ) if ( $Opt{man} );
        pod2usage( -verbose => 1, -message => "$Prog: version $VERSION\n" )
          if ( $Opt{help} );

        my @error;

        my @do = map( $Opt{$_} ? $_ : (), qw(ham spam register) );
        unless ( scalar @do == 1 ) {
            push( @error, "must specify one of -ham, -spam or -register" );
        }
        elsif ( $do[0] eq "register" ) {
            push( @error, "must specify -key <key>" ) unless ( $Opt{key} );
            push( @error, "extra argument(s) specified: @ARGV" ) if (@ARGV);
            push( @error, "-guid <guid> is not allowed with -$do[0]" )
              if ( $Opt{guid} );
        }
        else {
            push( @error, "-key <key> is only valid with -register" )
              if ( $Opt{key} );
            push( @error, "must specify <file_or_dir>" ) unless (@ARGV);
        }

        pod2usage(
            -verbose => 0,
            -message => join( "", map( "$Prog: $_\n", @error ) )
        ) if (@error);

        return \%Opt;
    }

    sub new {
        my ( $class, $self ) = ( shift, {@_} );

        die("$Prog: arguments required\n")
          unless ( keys %$self );

        $self->{_prog} = $Prog;
        $self->{_success} = $self->{_failure} = 0;

        return bless( $self, $class );
    }

    sub main {
        my ($self) = @_;

        if ( $self->opt("register") ) {
            $self->register;
        }
        else {
            my $wanted = sub {
                -f $_ && $self->processmessage($File::Find::name);
            };

            $File::Find::name = undef;
            find( { wanted => $wanted, no_chdir => 1 }, @ARGV );
        }
        return $self->{_failure} ? 0 : 1;
    }

    sub opt {
        my ( $self, $key ) = @_;
        return $key ? $self->{opt}->{$key} : $self->{opt};
    }

    sub debug {
        my ($self) = @_;
        return $self->opt("debug");
    }

    sub command {
        my ( $self, $command, @args ) = @_;
        unless ( $command and -x $command ) {
            die("unable to execute $command\n");
        }
        return `$command @args`;
    }

    sub lcval {
        my ( $self, $out ) = @_;
        return ( $out and $out =~ /^[^=]*=\s*([^\s]+)\s*$/ ) ? $1 : undef;
    }

=head2 guid([$guid])

Returns the command line specified guid value (if used) or the guid
value found via zmlocalconfig "proofpoint_guid". If neither value is
set the program exits with an error indicating no guid value was
found.

If the optional $guid value is passed we set the zmlocalconfig guid
value to the passed value.

=cut

    sub guid {
        my ( $self, $value ) = @_;

        if ($value) {
            my @cmd = ( ZMLC_CMD, "-edit", PP_GUID_LCKEY . "=" . $value );
            my $out = eval { $self->command(@cmd); };
            if ( $@ || $? ) {
                chomp($@) if $@;
                warn(
                    "$Prog: unable to store guid via '@cmd'",
                    ( $? ? " rc=" . ( $? >> 8 ) : "" ),
                    ( $@ ? " ERROR:\n$@" : "" ), "\n"
                );
            }

            if ($out) {
                chomp($out);
                warn("$Prog: ERROR: command=@cmd\n$out\n");
            }
            $self->{"guid"} = $value;
        }
        elsif ( !$self->{"guid"} ) {
            if ( $self->opt("guid") ) {
                $self->{"guid"} = $self->opt("guid");
            }
            else {
                my @cmd = ( ZMLC_CMD, PP_GUID_LCKEY, "2>/dev/null" );
                my $out = eval { $self->command(@cmd); };
                my $val = $self->lcval($out);
                $self->{"guid"} = $val if ($val);
            }
        }

        die("$Prog: no guid: use --guid <guid> or --register if necessary\n")
          unless ( $self->{"guid"} );

        return $self->{"guid"};
    }

    sub key {
        my ($self) = @_;
        return $self->opt("key");
    }

    sub base_url {
        my ( $self, $value ) = @_;

        if ($value) {
            return $self->{_base_url} = $value;
        }
        elsif ( $self->{_base_url} ) {
            return $self->{_base_url};
        }
        else {
            if ( $self->opt("base_url") ) {
                $value = $self->opt("base_url");
            }
            else {
                my @cmd = ( ZMLC_CMD, PP_BASE_URL_LCKEY, "2>/dev/null" );
                my $out = eval { $self->command(@cmd); };
                $value = $self->lcval($out);
            }
            $value =~ s,/?\s*$,, if ($value);

            return $self->{_base_url} = $value || BASE_URL;
        }
    }

    sub clients_url {
        my ( $self, $value ) = @_;

        if ($value) {
            return $self->{_clients_url} = $value;
        }
        elsif ( $self->{_clients_url} ) {
            return $self->{_clients_url};
        }
        else {
            return $self->{_clients_url} = $self->base_url . CLIENTS_PATH;
        }
    }

    sub postings_url {
        my ( $self, $value ) = @_;

        if ($value) {
            return $self->{_postings_url} = $value;
        }
        elsif ( $self->{_postings_url} ) {
            return $self->{_postings_url};
        }
        else {
            return $self->{_postings_url} = $self->base_url . POSTINGS_PATH;
        }
    }

    sub lwp {
        my ($self) = @_;
        unless ( $self->{_lwp} ) {
            my $lwp = LWP::UserAgent->new();
            $self->{_lwp} = $lwp;

            # proxy handling...
            $lwp->env_proxy;    # use proxy settings from ENV
            if ( $self->base_url =~ /^https:/ and $lwp->proxy("https") ) {
                local ($@);
                eval { require Crypt::SSLeay; };
                if ($@) {
                    $self->{_ssl_proxy_note} = "install Crypt::SSLeay module"
                      . " if proxy requires CONNECT for https";
                }
                else {
                    $lwp->proxy( "https", undef );
                }
            }

            if ( $self->{opt}->{debug} ) {
                my $ua = $self->{_lwp};
                my $dumpit = sub { shift->dump; return };
                $ua->add_handler( "request_send",  $dumpit );
                $ua->add_handler( "response_done", $dumpit );
            }
        }
        return $self->{_lwp};
    }

=head2 post($dest,$soap)

Post an XML document to a Proofpoint server.

=cut

    sub post {
        my ( $self, $dest, $soap ) = @_;
        my $req = HTTP::Request->new( POST => $dest );
        $req->header( "Content-Type", "text/xml" );
        $req->content($soap);
        return $self->lwp->request($req);
    }

    sub _scan {
        my ( $self, $tree, $item ) = @_;
        for ( my $count = 0 ; $count < $#$tree ; $count++ ) {
            if ( $tree->[$count] eq $item ) {
                return $tree->[ $count + 1 ];
            }
        }
        return undef;
    }

    sub _error {
        my ( $self, $response, $msg ) = @_;
        my $sslnote = $self->{_ssl_proxy_note};
        warn( $Prog, ": NOTE: $sslnote\n" )
          if ( $sslnote and ( $self->debug or $response->code eq "501" ) );
        warn( $Prog, ": ERROR: ", $msg, ":\n", $response->status_line, "\n" );
    }

    sub register {
        my ($self) = @_;
        my $response = $self->post( $self->clients_url, $self->client_key );
        if ( $response->is_success ) {
            my $xp = XML::Parser->new( Style => "Tree" );
            my $xml = $xp->parse( $response->decoded_content );
            my $guid =
              $self->_scan(
                $self->_scan( $self->_scan( $xml, "client" ), "guid" ), 0 );
            $self->guid($guid);
            print("$Prog: GUID: $guid\n");
        }
        else {
            $self->_error( $response, "registration failed" );
        }
    }

    sub xml_decl {
        my ($self) = @_;
        return <<XML_DOC;
<?xml version="1.0" encoding="UTF-8"?>
XML_DOC
    }

=head2 client_key()

Generate a client XML request containing the activation key that will
be sent to Proofpoint as part of the registration process.

=cut

    sub client_key {
        my ($self) = @_;
        my $declxml = $self->xml_decl();
        my $clientxml = $self->client_xml( "activationkey" => $self->key );
        chomp( $declxml, $clientxml );
        return <<XML_DOC;
$declxml
$clientxml
XML_DOC
    }

    sub client_xml {
        my ( $self, $key, $value ) = @_;
        my $prodxml = $self->product_xml;
        chomp($prodxml);
        return <<XML_DOC;
 <client>
  <$key>$value</$key>
  $prodxml
 </client>
XML_DOC
    }

    sub product_xml {
        my ($self) = @_;
        return <<XML_DOC;
<product><id>$PRODUCT_ID</id><version>$VERSION</version></product>
XML_DOC
    }

=head2 posting($message)

Generate a message posting XML document containing an encoded email
message that will be sent to Proofpoint as part of the training
process.

=cut

    sub posting {
        my ( $self, $message ) = @_;

        # ham/spam type and msgtype as defined by Proofpoint
        my ( $pp_type, $pp_msgtype ) = $self->opt("ham") ? ( 2, 0 ) : ( 1, 10 );

        my $declxml = $self->xml_decl();
        my $clientxml = $self->client_xml( guid => $self->guid );
        chomp( $declxml, $clientxml );

        return <<XML_DOC;
$declxml
<posting>
$clientxml
 <type>$pp_type</type>
 <source>7</source>
 <msgtype>$pp_msgtype</msgtype>
 <messages>
  <message>
   <rfc822>$message</rfc822>
  </message>
 </messages>
</posting>
XML_DOC
    }

=head2 processmessage($file)

Read a email RFC 2822 message from a file, base64 encode the message,
and then post the message to a Proofpoint server.

=cut

    sub processmessage {
        my ( $self, $file ) = @_;
        my $fh = IO::File->new( $file, "r" )
          or die("$Prog: could not open file '$file'\n");
        my $msg = "";
        my $buf;
        while ( read( $fh, $buf, 60 * 57 ) ) {
            $msg .= MIME::Base64::encode_base64($buf);
        }
        $fh->close;

        my $response = $self->post( $self->postings_url, $self->posting($msg) );
        if ( $response->is_success ) {
            $self->{_success}++;
        }
        else {
            $self->{_failed}++;
            $self->_error( $response, "POST of '$file' failed" );
        }
    }
}

=head1 SEE ALSO

The following sites provide useful information on Proofpoint and Zimbra:

=over 4

=item *

Proofpoint L<http://www.proofpoint.com/>

=item *

Zimbra Collaboration Server L<http://www.zimbra.com/>

=back

=head1 BUGS and LIMITATIONS

=over 4

=item *

Performing a HTTP POST of a message to the "postings" service URL does
not return an error even when an invalid GUID is used in the request.

=back

=head1 COPYRIGHT

TBD

=head1 LICENSE

TBD

=cut
