Commit 120352af authored by unknown's avatar unknown
Browse files

New versions of mail_to_db.pl and pmail.pl.

Added email threading capabilities to both
programs.


tests/mail_to_db.pl:
  Changes to mail_to_db.pl
  - Removed table name as optional. Future releases may require
    more than one table, after which it's better to have a fixed
    table name.
  - Fixed a bug in report (division by zero error), if table was created,
    but no mails was inserted.
  - Added fields message_id and in_reply_to.
tests/pmail.pl:
  Changed pmail:
  
  New option: --thread. Prints all sub sequent replies in the thread.
  New option: --message_id. Prints message_id and number of replies found.
  Both options are run recursively. That means, not just direct replies to
  the mail found are being searched, but also replies to replies and so on
  until the whole thread has been found.
  
  Clean up: Localized variables and moved code into functions.
parent 20542236
Loading
Loading
Loading
Loading
+40 −14
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ use DBI;
use Getopt::Long;

$| = 1;
$VER = "2.6";
$VER = "3.0";

$opt_help          = 0;
$opt_version       = 0;
@@ -26,7 +26,6 @@ $opt_host = undef();
$opt_port          = undef();
$opt_socket        = undef();
$opt_db            = "mail";
$opt_table         = "mails";
$opt_user          = undef();
$opt_password      = undef();
$opt_max_mail_size = 65536;
@@ -97,7 +96,7 @@ sub main
    print "the my.cnf file. This command is available from the latest MySQL\n";
    print "distribution.\n";
  }
  GetOptions("help","version","host=s","port=i","socket=s","db=s","table=s",
  GetOptions("help","version","host=s","port=i","socket=s","db=s",
	     "user=s","password=s","max_mail_size=i","create","test",
	     "no_path","debug","stop_on_error","stdin")
  || die "Wrong option! See $progname --help\n";
@@ -123,7 +122,6 @@ sub main
  || die "Couldn't connect: $DBI::errstr\n";

  die "You must specify the database; use --db=" if (!defined($opt_db));
  die "You must specify the table; use --table=" if (!defined($opt_table));

  create_table($dbh) if ($opt_create);

@@ -218,9 +216,9 @@ sub main
  print "Total number of mails:\t\t\t\t"; 
  print $mail_inserted + $ignored;
  print " (OK: ";
  print sprintf("%.1f", (($mail_inserted / ($mail_inserted+$ignored)) * 100));
  print sprintf("%.1f", ($mail_inserted + $ignored) ? (($mail_inserted / ($mail_inserted+$ignored)) * 100) : 0.0);
  print "% Ignored: ";
  print sprintf("%.1f", (($ignored / ($mail_inserted + $ignored)) * 100));
  print sprintf("%.1f", ($mail_inserted + $ignored) ? (($ignored / ($mail_inserted + $ignored)) * 100) : 0);
  print "%)\n";
  print "################################ End Report ##################################\n";
  exit(0);
@@ -236,9 +234,11 @@ sub create_table
  my ($sth, $query);

  $query= <<EOF;
CREATE TABLE $opt_table
CREATE TABLE my_mail
(
 mail_id MEDIUMINT UNSIGNED NOT NULL auto_increment,
 message_id VARCHAR(255),
 in_reply_to VARCHAR(255),
 date DATETIME NOT NULL,
 time_zone VARCHAR(20),
 mail_from VARCHAR(120) NOT NULL,
@@ -250,6 +250,8 @@ CREATE TABLE $opt_table
 file VARCHAR(64) NOT NULL,
 hash INTEGER NOT NULL,
 KEY (mail_id),
 KEY (message_id),
 KEY (in_reply_to),
 PRIMARY KEY (mail_from, date, hash))
 TYPE=MyISAM COMMENT=''
EOF
@@ -277,7 +279,7 @@ sub process_mail_file
    chop if (substr($_, -1, 1) eq "\r");
    if ($type ne "message")
    { 
      if (/^Reply-To: (.*)/i)
      if (/^Reply-To:\s*(.*)/i)
      {
	$type = "reply";
	$values{$type} = $1;
@@ -302,12 +304,25 @@ sub process_mail_file
	$type = "subject";
	$values{$type} = $1;
      }
      elsif (/^Message-Id:\s*(.*)/i)
      {
	$type = "message_id";
	s/^\s*(<.*>)\s*/$1/;
	$values{$type} = $1;
      }
      elsif (/^In-Reply-To:\s*(.*)/i)
      {
	$type = "in_reply_to";
	s/^\s*(<.*>)\s*/$1/;
	$values{$type} = $1;
      }
      elsif (/^Date: (.*)/i)
      {
	date_parser($1, \%values, $file_name);
	$type = "rubbish";
      }
      elsif (/^[\w\W-]+:\s/)
      # Catch those fields that we don't or can't handle (yet)
      elsif (/^[\w\W-]+:/)
      {
	$type = "rubbish";
      }
@@ -319,6 +334,10 @@ sub process_mail_file
      else
      {
	s/^\s*/ /;
	if ($type eq 'message_id' || $type eq 'in_reply_to')
	{
	  s/^\s*(<.*>)\s*/$1/;
	}
	$values{$type} .= $_;
      }
    }
@@ -421,8 +440,10 @@ sub update_table
    goto restart;	  # Some mails may have duplicated messages
  }

  $q = "INSERT INTO $opt_table (";
  $q = "INSERT INTO my_mail (";
  $q.= "mail_id,";
  $q.= "message_id,";
  $q.= "in_reply_to,";
  $q.= "date,";
  $q.= "time_zone,";
  $q.= "mail_from,";
@@ -435,6 +456,12 @@ sub update_table
  $q.= "hash";
  $q.= ") VALUES (";
  $q.= "NULL,";
  $q.= (defined($values->{'message_id'}) ?
	$dbh->quote($values->{'message_id'}) : "NULL");
  $q.= ",";
  $q.= (defined($values->{'in_reply_to'}) ?
	$dbh->quote($values->{'in_reply_to'}) : "NULL");
  $q.= ",";
  $q.= "'" . $values->{'date'} . "',";
  $q.= (defined($values->{'time_zone'}) ?
	$dbh->quote($values->{'time_zone'}) : "NULL");
@@ -575,7 +602,6 @@ Options:
--port=#           TCP/IP port to be used with connection.
--socket=...       MySQL UNIX socket to be used with connection.
--db=...           Database to be used.
--table=...        Table name for mails.
--user=...         Username for connecting.
--password=...     Password for the user.
--stdin            Read mails from stdin.
+205 −78
Original line number Diff line number Diff line
#!/usr/bin/perl
#!/usr/bin/perl -w
#                                  
# Prints mails to standard output  
#                                  
@@ -9,21 +9,25 @@
use DBI;
use Getopt::Long;

$VER="1.5";
$VER="2.0";

@fldnms= ("mail_from","mail_to","cc","date","time_zone","file","sbj","txt");
$fields=8;
@mail= (@from,@to,@cc,@date,@time_zone,@file,@sbj,@txt);
my $fields= 0;
my $base_q= "";
my $mail_count= 0;

$opt_user= $opt_password= "";
$opt_socket= "/tmp/mysql.sock";
$opt_port= 3306;
$opt_db="mail";
$opt_table="mails";
$opt_table="my_mail";
$opt_help=$opt_count=0;
$opt_thread= 0;
$opt_host= "";
$opt_message_id= 0;

GetOptions("help","count","port=i","db=s","table=s","host=s","password=s",
	   "user=s","socket=s") || usage();
	   "user=s","socket=s", "thread","message_id") || usage();

if ($opt_host eq '')
{
@@ -39,61 +43,173 @@ if ($opt_help || !$ARGV[0])
#### Connect and parsing the query to MySQL
####

$dbh= DBI->connect("DBI:mysql:$opt_db:$opt_host:port=$opt_port:mysql_socket=$opt_mysql_socket", $opt_user,$opt_password, { PrintError => 0})
$dbh= DBI->connect("DBI:mysql:$opt_db:$opt_host:port=$opt_port:mysql_socket=$opt_socket", $opt_user,$opt_password, { PrintError => 0})
|| die $DBI::errstr;

main();

####
#### main
####

sub main
{
  my ($row, $val, $q, $mail, $sth);

  if ($opt_count)
  {
    count_mails();
  }

$fields=0;
$query = "select ";
  $base_q= "SELECT ";
  foreach $val (@fldnms)
  {
    if (!$fields)
    {
    $query.= "$val";
      $base_q.= "$val";
    }
    else
    {
    $query.= ",$val";
      $base_q.= ",$val";
    }
    $fields++;
  }
$query.= " from $opt_table where $ARGV[0] order by date desc";
  $base_q.= ",message_id" if ($opt_thread || $opt_message_id);
  $base_q.= " FROM $opt_table";
  $q= " WHERE $ARGV[0]";

  $sth= $dbh->prepare($base_q . $q);
  if (!$sth->execute)
  {
    print "$DBI::errstr\n";
    $sth->finish;
    die;
  }
  for (; ($row= $sth->fetchrow_arrayref); $mail_count++)
  {
    for ($i= 0; $i < $fields; $i++)
    {
      if ($opt_message_id)
      {
	$mail[$fields][$mail_count]= $row->[$fields];
	$mail[$fields][$mail_count].= "\nNumber of Replies: " . get_nr_replies($row->[$fields]);
      }
      $mail[$i][$mail_count]= $row->[$i];
    }
    if ($opt_thread)
    {
      get_mail_by_message_id($row->[$fields], $mail);
    }
  }
  print_mails($mail);
}

####
#### Send query and save result
#### Function, which fetches mail by searching in-reply-to with
#### a given message_id. Saves the value (mail) in mail variable.
#### Returns the message id of the mail found and searches again
#### and saves, until no more mails are found with that message_id.
####

sub get_mail_by_message_id
{
  my ($message_id, $mail)= @_;
  my ($q, $query, $i, $row, $sth);

  $q= " WHERE in_reply_to = \"$message_id\"";
  $query= $base_q . $q;
  $sth= $dbh->prepare($query);
  if (!$sth->execute)
  {
  print "$DBI::errstr\n";
    print "QUERY: $query\n$DBI::errstr\n";
    $sth->finish;
    die;
  }
for ($i=0; ($row= $sth->fetchrow_arrayref); $i++)
  while (($row= $sth->fetchrow_arrayref))
  {
  for ($j=0; $j < $fields; $j++)
    $mail_count++;
    for ($i= 0; $i < $fields; $i++)
    {
    $mail[$j][$i]= $row->[$j];
      if ($opt_message_id)
      {
	$mail[$fields][$mail_count]= $row->[$fields];
	$mail[$fields][$mail_count].= "\nNumber of Replies: " . get_nr_replies($row->[$fields]);
      }
      $mail[$i][$mail_count]= $row->[$i];
    }
    $new_message_id= $row->[$fields];
    if (defined($new_message_id) && length($new_message_id))
    {
      get_mail_by_message_id($new_message_id, $mail);
    }
  }
  return;
}

####
#### Print to stderr
#### Get number of replies for a given message_id
####

sub get_nr_replies
{
  my ($message_id)= @_;
  my ($sth, $sth2, $q, $row, $row2, $nr_replies);

  $nr_replies= 0;
  $q= "SELECT COUNT(*) FROM my_mail WHERE in_reply_to=\"$message_id\"";
  $sth= $dbh->prepare($q);
  if (!$sth->execute)
  {
    print "QUERY: $q\n$DBI::errstr\n";
    $sth->finish;
    die;
  }
  while (($row= $sth->fetchrow_arrayref))
  {
    if (($nr_replies= $row->[0]))
    {
      $q= "SELECT message_id FROM my_mail WHERE in_reply_to=\"$message_id\"";
      $sth2= $dbh->prepare($q);
      if (!$sth2->execute)
      {
	print "QUERY: $q\n$DBI::errstr\n";
	$sth->finish;
	die;
      }
      while (($row2= $sth2->fetchrow_arrayref))
      {
	# There may be several replies to the same mail. Also the
	# replies to the 'parent' mail may contain several replies
	# and so on. Thus we need to calculate it recursively.
	$nr_replies+= get_nr_replies($row2->[0]);
      }
    }
    return $nr_replies;
  }
}

####
#### Print mails
####

sub print_mails
{
  my ($mail)= @_;
  my ($i);

  for ($i=0; $mail[0][$i]; $i++)
  {
    print "#" x 33;
    print " " . ($i+1) . ". Mail ";
    print "#" x 33;
  print "\nFrom: $mail[0][$i]\n";
    print "\n";
    if ($opt_message_id)
    {
      print "Msg ID: $mail[$fields][$i]\n";
    }
    print "From: $mail[0][$i]\n";
    print "To: $mail[1][$i]\n";
  print "Cc: $mail[2][$i]\n";
    print "Cc:" . (defined($mail[2][$i]) ? $mail[2][$i] : "") . "\n";
    print "Date: $mail[3][$i]\n";
    print "Timezone: $mail[4][$i]\n";
    print "File: $mail[5][$i]\n";
@@ -114,6 +230,7 @@ else
  }
  print "#" x 20;
  print "\n";
}  

####
#### Count mails that matches the query, but don't show them
@@ -121,6 +238,8 @@ print "\n";

sub count_mails
{
  my ($sth);

  $sth= $dbh->prepare("select count(*) from $opt_table where $ARGV[0]");
  if (!$sth->execute)
  {
@@ -157,12 +276,18 @@ sub usage
  --help       show this help
  --count      Shows how many mails matches the query, but not the mails.
  --db=        database to use (Default: $opt_db)
  --table=    table to use    (Default: $opt_table)
  --host=      Hostname which to connect (Default: $opt_host)
  --socket=    Unix socket to be used for connection (Default: $opt_socket)
  --password=  Password to use for mysql
  --user=      User to be used for mysql connection, if not current user
  --port=      mysql port to be used (Default: $opt_port)
  --thread     Will search for possible replies to emails found by the search
               criteria. Replies, if found, will be displayed right after the
               original mail.
  --message_id Display message_id on top of each mail. Useful when searching
               email threads with --thread. On the second line is the number
               of replies to the same thread, starting counting from that
               mail (excluding possible parent mails).
  "SQL where clause" is the end of the select clause,
  where the condition is expressed. The result will
  be the mail(s) that matches the condition and
@@ -177,6 +302,8 @@ sub usage
  - Message text
  The field names that can be used in the where clause are:
    Field       Type 
  - message_id  varchar(255) # Use with --thread and --message_id
  - in_reply_to varchar(255) # Internally used by --thread
  - mail_from   varchar(120)
  - date        datetime
  - sbj         varchar(200)
@@ -187,7 +314,7 @@ sub usage
  - reply       varchar(120)
  - file        varchar(32)
  - hash        int(11)
  An example of the pmail:
  An example of pmail:
  pmail "txt like '%libmysql.dll%' and sbj like '%delphi%'"
  NOTE: the txt field is NOT case sensitive!
EOF