#!/usr/local/bin/perl
##
###########################################################################
##
##  Program:  parseFAXhistory.pl
##
##  Purpose:  Gather statistics from the PowerPages HISTORY.TXT file
##
##  Date:     4 Jun 1997
##
##  Author:   Peter Murray, Digital Media Services
##            Case Western Reserve University
##            http://www.cwru.edu/home/pem.html
##
##  Legalities:
##            Copyright 1997 by Case Western Reserve University.
##
##  Revision History:
##    4 Jun 1997  pem  Initial Release
##    5 Jun 1997  pem  When output statistics, subtract the number of
##                     articles from the number of pages to represent
##                     the true number of PowerPages pages sent out
##                     (similar comparison with the Print station stats)
##
## DOCUMENTATION, in PERL POD format, is at the end of the program.
## Running the program `perldoc <programname>` should output the manual.
##
##
## CONFIGURATION SECTION
## The %locations hash is used to map phone numbers to more descriptive
## locations.  This is useful for the "known output" FAX numbers, such as
## the dedicated PowerPages FAX machines in libraries...
%locations = ('msass'=>'1002','law'=>'2106','allen'=>'8536');

## The other place that this script needs configuration is later in the
## parsing section when you create patterns for other phone numbers
## 
## END OF CONFIGURATION SECTION


use Date::Parse;
require "getopts.pl";

## If the user does not specify an beginning or ending time, default to 
## some reasonable values.  The $endTime value is taken from the maximum
## UNIX time (some time in 2038).
$betTime = 0; $endTime = 2147480000;

&Getopts('b:e:');

##
## First, read through the input files from STDIN or specified on the command
## line.  Parse each line into its components (labeled as @fields), create a
## hash of the values, and add it to an array.
my(@fields) = ('date','time','duration','event','pages','status','phone');
while (<>) {
  my %record;
  next if /^Date/;
  next if /Protocol/;
  chomp; s/\r\0//;
  my(@values) = split(/\s+/,$_);
  for ($cursor=0; $cursor <= $#values; $cursor++) {
    $record{$fields[$cursor]} = $values[$cursor];
  }
  $record{'unixtime'} = str2time("$record{'date'} $record{'time'}");
  $records[$total++]=\%record;
}

##
## Find the UNIX time for the beginning and ending time, if specified in
## the command line parameters.
$begTime = str2time($opt_b) if defined $opt_b;
$endTime = str2time($obt_e) if defined $opt_e;

##
## Work through each record.  Skip it if it is outside of the boundaries
## of the times specified on the command line.
foreach $item (@records) {
  next if $item->{'unixtime'} < $begTime;
  next if $item->{'unixtime'} > $endTime;
  
# Remove extraneous characters, such as commas and dashes, from the phone number
  $item->{'phone'} =~ s/[,-]//g;
  
# Check the phone number against each of the records in the %locations hash; if
# a match is found, run the checkRecord subroutine to add this line to the
# results set, then skip to the next record.
  foreach $location (keys %locations) {
    if ($locations{$location} eq $item->{'phone'}) {
      &checkRecord($location, $item);
      next;
    }
  }
  
# If we didn't match against any of the locations, look at several patterns
# defined below for a match.  The first example, "/^\d{4}$/", matches all
# four-digit phone numbers (on campus for us).  The second example, "/^7\d{4}$/",
# matches our student dorm lines (4 digit numbers with a prefix of "7").  At
# the end, if nothing matches, put it in the "other" group.  If we have done our
# job right, only errors will be put here in the final output.
  if ($item->{'phone'} =~ /^\d{4}$/) {
    &checkRecord('campus',$item);
    next;
  }
  if ($item->{'phone'} =~ /^7\d{4}$/) {
    &checkRecord('student',$item);
    next;
  }
  if ($item->{'phone'} =~ /^142\d{5}$/) {
    &checkRecord('hospital',$item);
    next;
  }
  if ($item->{'phone'} =~ /^9\d{7}$/) {
    &checkRecord('offcampus',$item);
    next;
  }
  &checkRecord('other',$item);
}

# Now output the results in a tab-delimited format.
print "Location\tArticles\tPages\tErrors\n";
foreach $location (keys %results) {
  print "$location\t$results{$location}{'articles'}\t";
  print ($results{$location}{'pages'}-$results{$location}{'articles'})."\t";
  print "$results{$location}{'errors'}\n";
}

## AND END THE PROGRAM!
exit;


## SUBROUTINE checkRecord
##
## Takes the hash of the log file record, categorizes it as a success or
## failure, and updates the counters.
##   Parameters:
##      <$location>    location where the statistics will be put
##      <$recordRef>   a reference to the hash of the log file entry
##   Returns:  void
sub checkRecord {
  my($location,$recordRef)=@_;
  if ($recordRef->{'status'} eq 'Error') {
    $results{$location}{'errors'}++;
  } else {
    $results{$location}{'articles'}++;
    $results{$location}{'pages'} += $recordRef->{'pages'};
  }
}


=head1 NAME

parseFAXhistory.pl - Gather statistics from the PowerPages HISTORY.TXT file

=head1 SYNOPSIS

  parseFAXhistory.pl  [ -b mm/dd/yy ]  [ -e mm/dd/yy ]  [ file ... ]

=head1 DESCRIPTION

This script will parse a HISTORY.TXT file from a PowerPages FAX machine
and categorize the log entries as success or failure, and count the number
of articles and pages.  Each record is further categorized by output location
based on the phone number.  Output is sent to STDOUT.

=head1 OPTIONS

=over 4

=item -b mm/dd/yy

Ignore log file entries before this date.  The format of the date is numeric
month, day, and year, seperated by slashes.  For example:  "6/5/97"

=item -e mm/dd/yy

Ignore log file entries after this date.  The format is the same as above.

=item file ...

One or more log files.  Optionally, the log file can come into this script
via STDIN.

=back

=head1 EXAMPLES

=over 4

C<parseFAXhistory.pl -b 5/1/97 -e 6/1/97 HISTORY.TXT>

=back

=head1 SETUP

You must set up your PowerPages FAX server to keep a log file of these entries
to disk.  To do this, follow these general directions:

=over 4

=item 1

Reboot your PowerPages FAX server and at the "Starting MS-DOS..." 
message as the system is booting, press F5 to skip the processing of the
CONFIG.SYS and the AUTOEXEC.BAT files.

=item 2

Change directory into the \CASMGR directory.  Run the Setup program.  

=item 3

Choose "Advanced Setup".  Choose "Setup technical software options".

=item 4

Choose "History File".  Choose "ON".  Note the message about erasing the log file
occasionally.  Press F10 twice to save your changes.

=item 5

Choose "Exit Setup".  B<IMPORTANT:  Choose *NOT* to
update the AUTOEXEC.BAT and CONFIG.SYS files; instead, choose "Quit".>

=item 6

Reboot your PowerPages FAX server.

=back

The location of the log file is F<C:\CASMGR\HISTORY.TXT>.

You must then set up the parseFAXhistory.pl program.  There are two things
that you need to look at:

=over 4

=item %locations hash variable

At the top of the program, there is a hash variable called %locations that
should be filled in with the locations of your in-library PowerPages fax
machines so that these locations are counted seperately.  The key for each
record is the location name and the value is the FAX phone number.

=item Phone number patterns

The second place that needs configuration is in the phone number
patterns near the middle of the program.  Several examples are provided
and there are comments in that section giving an overview of how to make
the patterns is in the comments.

=back

=head NOTES

The number of errors is not really reflective of the number of problem requests.
Requests to incorrect phone numbers will cycle through the Retry queue
many times before the FAX server gives up; each one of these tries is logged in
the HISTORY.TXT file, inflating the actual number of errors by a dramatic amount.
The number of errors is probably useless.


=head1 COPYRIGHT

Copyright 1997 by Case Western Reserve University.

=head1 AUTHOR

 Peter Murray
 Library Systems Manager
 Digital Media Services
 Case Western Reserve University
 pem@po.cwru.edu
 http://www.cwru.edu/home/pem.html

Updates available from http://www.cwru.edu/lit/homes/pem/projects/parseFAXhistory.html

=cut



