#!/usr/local/bin/perl
##
###########################################################################
##
##  Program:  nph-redirect.pl
##
##  Purpose:  NPH Error Script to handle redirections based on 404 errors
##            from Netscape Enterprise server
##
##  Version:  1.4   24 Aug 1999
##
##  Author:   Peter Murray, Digital Media Services
##            Case Western Reserve University
##            http://www.cwru.edu/home/pem.html
##
##  Legalities:
##            Copyright 1997 by Case Western Reserve University.
##            All rights reserved.
##  Based on: RedMan Version 2.10 (7Nov95)
##            Main RedMan WebPage - http://sw.cse.bris.ac.uk/WebTools/redman.html
##            major revisions jrp 5/96--cleaned up html for error message, 
##            made files/subdirectories under redirects work with ncsa 1.5.1
##
##  Revision History:
##    5 Jun 1997  pem  Initial Release
##   10 Jun 1997  pem  v1.01: First production use; changed the log file to
##                     include the HTTP_REFERRER field
##   12 Jun 1997  pem  v1.1: Added the "message file" field in the config file
##                     that will be used instead of the standard message
##   22 Dec 1997  pem  v1.2: Fixed bug in the header-generation portion of
##                     RedirectURI() where it was not taking into account
##                     directory-level index documents
##   16 Feb 1998  pem  v1.3: Changed the routine in RedirectURI that determines
##                     whether the source file is an HTML file or a directory 
##   24 Aug 1999  pem  v1.4: If the request is for "favicon.ico", and one exists
##                     on the server, send that content rather than an error page
##
## DOCUMENTATION, in PERL POD format, is at the end of the program.
## Running the program `perldoc <programname>` should output the manual.
##
##
## CONFIGURATION SECTION

# $SYS_RED_LIST - Filesystem explicit name of the file that declares 
# server-wide rdirections, as setup by the WebAdministrator.
$SYS_RED_LIST='/usr/local/web/conf/redirect.conf';

# $USR_RED_LIST - Name of file that contains user defined redirections.
$USR_RED_LIST='redirects';

# $RED_LOG - Filesystem explicit name of the RedMan logfile
$RED_LOG='/usr/local/web/logs/redirect.log';

# $errorMessageFile - Location of the portion of the normal 404
# error message that will be sent to the user if the the redirection
# fails
$errorMessageFile = "/usr/local/extradisk/webtest/errors/404.msg";

# $faviconFile - Location of the MSIE 5-specific favorites icon for the server
$faviconFile = "/usr/local/extradisk/webtest/favicon.ico";

# $SECONDS - Number of seconds in the META REFRESH delay
$SECONDS = 30;

# Default APAS contact, with the format <TAB>E-mail<TAB><TAB>Title text<TAB>
$APAScontact = "\taurora\@po.cwru.edu\t\tthe AURORA Campus Web server\t";

##
## END OF CONFIGURATION SECTION
##

use CWRUWWW;


# $DOC_ROOT - Filesystem explicit name of directory that the WebServer 
# serves files from.
($DOC_ROOT = $ENV{'PATH_TRANSLATED'}) =~ s/$ENV{'PATH_INFO'}$//;

# $target - The PATH_INFO of the request that generated the error
$target = $ENV{'PATH_INFO'};

#
# First, try to handle .htm<=>.html substitutions
if ($target =~ /.html$/) {
  $newFile=$`.'.htm';
  $locale=$DOC_ROOT.$newFile;
  if(-e $locale && -f $locale) {
    &RedirectURI("html->htm $target","$ENV{'SERVER_URL'}$newFile",{'type' => 'quiet'});
  }
} elsif ($target =~ /.htm$/) {
  $newFile=$`.'.html';
  $locale=$DOC_ROOT.$newFile;
  if(-e $locale && -f $locale) {
    &RedirectURI("htm->html $target","$ENV{'SERVER_URL'}$newFile",{'type' => 'quiet'});
  }
}

#
# Next, if the request is for "favicon.ico", and one is configured in this
# script, and it exists on the server, deliver it to the user instead of an error
if (($target =~ /\/favicon.ico$/)&&($faviconFile ne '')) {
  if (open(FAVICON, $faviconFile)) {
    print "HTTP/1.0 200 Okay  Here it comes\n";
    print "Content-Type: application/octet-stream\n\n";
    print FAVICON;
    close FAVICON;
    logMsg("favicon $target (from $ENV{'HTTP_REFERER'})");
    exit(0);
  }
}

#
# Before we go any farther, get the APAS contact information out of the 
# database.  Rekey the contacts database so that it is based on the directory,
# then work our way through the $target string from the end to the beginning
# until we find a directory match.  If this fails, we still have the 
# $APAScontact information defined at the top of this script.
%APASdirs = ReKeyContacts('directory');
my(@splitTarget) = split /\//, $target;
APASsearch: for ($index = $#splitTarget; $index > 0; $index--) {
  my($attemptedTarget) = join('/', @splitTarget[0..$index]);
  foreach $key (keys %APASdirs) {
    if ($attemptedTarget eq "/$key") {
      $APAScontact = $APASdirs{$key};
      last APASsearch;
    }
  }
}

#
# Check for system-wide redirections, as specified by the Web administrator.
if (-e $SYS_RED_LIST && -f $SYS_RED_LIST) {
  &AttemptRedirect($target, $SYS_RED_LIST);
}

#
# Check for a 'redirection' file in the same directory as the missing 
# WebResource.  If one is found, pass it to the AttemptRedirect routine
# to see if anything in it matches $target.
$marker=rindex($target,'/');
$new1=join('/',substr($target,0,$marker),$USR_RED_LIST);
$userRedirectList=join('',$DOC_ROOT,$new1);

$target=substr($target,$marker+1);

if (-e $userRedirectList && -f $userRedirectList) {
  &AttemptRedirect($target, $userRedirectList);
}

#
# If the routine gets this far, there are no viable substitutions or 
# redirections. Give a standard error message.
&ErrorMessage("$ENV{'PATH_INFO'} (from $ENV{'HTTP_REFERER'})");
exit(0);

####
#### END OF MAIN PROGRAM
####

##
## Subroutine logMsg
##   Open the log file, write a time stamp and the message provided as
##   a parameter to this routine, and close the log file.
##   Parameters:
##      <$msg>     the message we wish to log
##   Returns:  <void>
sub logMsg {
  my($msg) = @_;
  return if !defined $RED_LOG;
  
# If $RED_LOG is defined, open and time-stamp this action.  Return if
# openning the log file causes an error.
  open(LOG,">>$RED_LOG") || return;  
  my($sec,$min,$hour,$mday,$mon,$year)=localtime(time());
  printf LOG "[%2.2d/%2.2d/%4.4d %2.2d:%2.2d:%2.2d] %s\n",$mday,($mon+1),($year+1900),$hour,$min,$sec,$msg;
  close LOG;
}

##
## Subroutine readDatabaseFile
##   Open and read the database file.  See the manual page for the format
##   of the file.
##   Parameters:
##      <$dbfile>     Name of the file in the filespace
##   Returns:  <%redirects>   Hash array of redirect informaton
sub readDatabaseFile {
  my ($dbfile) = @_;

  my $keyword, $value, $key, %dbase;
  my $contLine="";
  my $keywords = 'newtarget type message position';

  open(DBFILE,$dbfile) || die "Can't open Database File ($dbfile): $!\n";
  while (<DBFILE>) {
    s/([^#]*)#.*/$1/;                  # handle comment lines
    next if (/^\s*$/);                 # continue through file if line is blank
    if (/(.*)\\/) {                    # if this line is continued by the next
      $contLine .= $1;                 # hold this line ...
      next;                            # ... and go to the next
    }
    $_ = $contLine . $_; $contLine=""; # append prev lines, clear out prev lines

    if (!/^([^:]+):\s+(.*)/) {
      warn "Unknown line in database file: $_\n";
      next;
    }
    $keyword = lc($1); $value = $2;

    if ($keyword =~ /^oldtarget/i) {    # "database" directive marks the 
      $key = $value;                  # beginning of a new database definition
      warn "Encountered database value '$dbase' more than once\n"
         if (defined($redirects{$key}));
      next;
    }

    if ($keywords =~ /\b$keyword\b/) {
      $dbase{$key}{$keyword} = $value;
      next;
    } else {
      warn "Unknown keyword: $keyword ($value)\n";
      next;
    }
  }
  return %dbase;
}


##
## SUBROUTINE AttemptRedirect
##
## Create the new location information, and attempt a redirection.
##   Parameters:
##      <$location>    location where the statistics will be put
##      <$recordRef>   a reference to the hash of the log file entry
##   Returns:  void
sub AttemptRedirect {
  my($target,$redirectFile) = @_;

#
# First, read the redirects database file
  my(%redirects) = &readDatabaseFile($redirectFile);

#
# Next change %7E encoding to the more common tilde
  $target =~ s/%7E/~/i;

#
# Now, look for matching redirect paths.  The general idea is to split $target
# on slashes, and then use this array to build gradually smaller and smaller 
# paths until we find a match or give up.
  my(@splitTarget) = split /\//, $target;
  for ($index=$#splitTarget; $index > 0; $index--) {
    my($attemptedTarget) = join('/', @splitTarget[0..$index]);
    foreach $key (keys %redirects) {
      if ($attemptedTarget eq $key) {
# If we do find a match, then build a new target string based on the 'newtarget'
# info from the redirect file and the right-most parts of $target which did not
# match the 'oldtarget' string.  Then, for good measure, make sure there is
# an "http" handle on the front, and add one from this server if there is not.
# Finally, send a descriptive message, the new target, and the redirect type
# to the RedirectURI() routine and exit the program.
        my($newtarget)=$redirects{$key}{'newtarget'}.join('/',@splitTarget[($index+1)..$#splitTarget]);
        if ($newtarget =~ /^\//) {
          $newtarget = "$ENV{'SERVER_URL'}$newtarget";
        }
        &RedirectURI("redirect $target", $newtarget, %{$redirects{$key}});
        exit(0);   ## Shouldn't be needed, but just in case
      }
    }
  }
}


##
## SUBROUTINE RedirectURI
##
## Send new location information back to the remote browser = Redirection!!!
##   Parameters:
##      <$reason>      for the log file, the reason this redirection is happening
##      <$newtarget>   URL of the new target
##      <$type>        type of redirection:  gone/force/delay/quiet
##   Returns:  void  (THIS SUBROUTINE MUST EXIT THE PROGRAM)
sub RedirectURI {
  my($reason,$newtarget,%redirInfo) = @_;
  my($headers,$metaInfo,$goThereMsg,$isHTMLfile);

#
# First, split $APAScontact into useful information, $APASemail (the second
# element) and $APAStitle (the fourth element)
  my($APASemail,$APAStitle) = (split /\t/, $APAScontact)[1,3];
  
# Determine if what was asked for what a ".htm[l]" file or a directory
# index.  The regex for the directory index is somewhat tricky, but 
# basically we are looking for a $newtarget that doesn't have a file
# extension on the end of it.  NOTE: This will cause directories with
# periods in them in the last part of the URL will *FAIL* (it will quietly
# redirect the user to the new location, not taking into account the
# $redirInfo{'type'} parameter.
  if (($newtarget =~ /\.htm(l)?$/i) || ($newtarget !~ m#/[^/]+\.\w+$#)) {
    $isHTMLfile = 1;
  }

#
# Then build some strings, $header (HTTP headers), $metaInfo (for HTML <HEAD>
# placement), and $goThereMsg (as part of the <BODY> text), that will be used
# to build the response.
  if ($redirInfo{'type'} eq 'gone') {
    $headers="HTTP/1.0 404 Gone\n";
  } elsif (($redirInfo{'type'} eq 'force')&&($isHTMLfile)) {
    $headers="HTTP/1.0 404 Moved\n";
  } elsif (($redirInfo{'type'} eq 'delay')&&($isHTMLfile)) {
    $headers="HTTP/1.0 404 Data follows...\n";
    $metaInfo="<META HTTP-EQUIV=\"Refresh\" CONTENT=\"${SECONDS};URL=$newtarget\">\n";
    $goThereMsg="You will be taken to the new location automatically in $SECONDS seconds, if your browser supports this feature.\n";
  } else {
    $headers="HTTP/1.0 302 Moved\nLocation: $newtarget\n";
  }

#
# Create the Non-processed Headers response back to the user.
  print $headers;
  if ($isHTMLfile) {
    print "Content-type: text/html\n\n";
    print CWRUheader('Document not found...','',$metaInfo);
    if (($redirInfo{'type'} ne 'gone')&&($redirInfo{'position'} ne 'bottom')) {
      print "<P>The document you requested may have moved.  It is possible that the new location may be\n";
      print "<BIG><B><A HREF=\"$newtarget\">$newtarget</A></B></BIG>.</P>\n";
      print "<P>$goThereMsg</P>\n";
    }
    if ($redirInfo{'message'} ne '') {
      open (MSGFILE, $redirInfo{'message'}) && do {
        print <MSGFILE>;
        close MSGFILE;
      };
    }
    if (($redirInfo{'type'} ne 'gone') && ($redirInfo{'position'} eq 'bottom')) {
      print "<P>The document you requested may have moved.  It is possible that the new location may be\n";
      print "<BIG><B><A HREF=\"$newtarget\">$newtarget</A></B></BIG>.</P>\n";
      print "<P>$goThereMsg</P>\n";
    }
    if ($ENV{'HTTP_REFERER'} ne "") {
      print "<P>Please send a message to the authors of the page your came from,";
      print " <A HREF=\"$ENV{'HTTP_REFERER'}\">$ENV{'HTTP_REFERER'}</A>,";
      print " to notify them of the change.\n</P>\n";
    }
    print "<P>If you have further questions, please contact the maintainer of $APAStitle (<A HREF=\"mailto:$APASemail\">$APASemail</A>).\n</P>\n";
    print CWRUfooter($APASemail);
  } else {
    print "\n\n";
  }

#
# Log a message in the log file and quit the program.
  logMsg("$reason $newtarget $redirInfo{'type'} (from $ENV{'HTTP_REFERER'})");
  exit(0);
}


##
## SUBROUTINE ErrorMessage
##
## This is the error message to give if there were no viable substitutions 
## or redirections for the missing WebResource.
##   Parameters:
##      <$target>    failed target
##   Returns:  void
sub ErrorMessage {
  my($target) = @_;
  
#
# First, split $APAScontact into useful information, $APASemail (the second
# element) and $APAStitle (the fourth element)
  my($APASemail,$APAStitle) = (split /\t/, $APAScontact)[1,3];

#
# Next, build a response back to the user...this should be the typical 404 Document
# Not Found error-type.
  print "HTTP/1.0 404 Document Not Found\n";
  print "Content-type: text/html\r\n\r\n";
  print CWRUheader("File Not Found!");
  print "<p><b>The URL you have tried to access does not seem to exist.</b> ";
  print "For further assistance, you should contact the maintainer of $APAStitle ";
  print "(<A HREF=\"mailto:$APASemail\">$APASemail</A>)";
  open (MSGFILE, $errorMessageFile) && do {
    print ", or follow one of these suggestions:</P>";
    print <MSGFILE>;
    close MSGFILE;
  };
  print "\n</P>\n";
  print CWRUfooter($APASemail);

#
# Log a message in the log file and quit the program.
  logMsg("**Failed: $target");
  exit(0);
}

=head1 NAME

nph-redirect.pl - NPH Error Script to handle redirections based on 404 errors from Netscape Enterprise server

=head1 SYNOPSIS

  nph-redirect.pl

=head1 DESCRIPTION

This script will run as a CGI to the Netscape Commerce server to intercept
404-document-not-found errors and redirect them to other locations if we
can tell the user where to go; otherwise, return a standard error message
with the added benefit of using the APAS maintainer's e-mail address rather
than the main Aurora e-mail address.

=head1 SETUP

First, you need to configure this script's variables.

=over 4

=item $SYS_RED_LIST

Full pathname of the file that declares server-wide redirections

=item $USR_RED_LIST

Name of file, which would appear in web document directories that contains
redirections for files/directories in that directory (can be written by
the user, not necessarily by the web administrator).

=item $RED_LOG

Full pathname of the redirection logfile

=item $errorMessageFile

Location of the portion of the normal 404 error message that will be sent to
the user if the the redirection fails

=item $faviconFile

Location of the MSIE 5-specific favorites icon for the server

=item $SECONDS

Number of seconds in the META REFRESH delay for "delay"-type redirections
(see below for what a "delay"-type redirection is).

=item $APAScontact

Default APAS contact, with the format <TAB>E-mail<TAB><TAB>Title text<TAB>

=back

Also note, if you are not at CWRU, there are many customizations that you
will have to deal with.  These include "use CWRUWWW" (our PERL library),
CWRUheader()/CWRUfooter() (generating our standard headers and footers), 
and all APAS references (our database of information providers).

=head1 CONFIGURATION FILE FORMAT

The configuration file contains redirection specifications with the 
keyword/value format

=over 4

=item keyword: value

=back

One redirection tuple is made up of three to five parts:

=over 4

=item 1.

An 'oldtarget'...the old path of the file in the web server.  Note:
directories must *NOT* end in a slash.

=item 2.

A 'newtarget'...the full or partial URL of the new location.  Note:
directories *MUST* end in a slash.

=item 3.

A 'type'...which can be one of four types:

=over 6

=item gone

The target is gone and we don't wish to offer suggestions on
where to go.

=item force

Force the user to click on the URL anchor to take them to the
new location.

=item delay

Provide the opportunity for the user to click on the URL anchor to
go to the new location, but also use a <META> REFRESH tag to take 
the user to the new location after a system-defined delay.

=item quiet

Quietly send the user to the new location using the "Location:" 
HTTP header.  Also provide an HTML file with the URL anchor on 
the off chance that the "Location:" header didn't work or isn't
supported.

=back

=item 4.

A 'message', which is used as an alternative to the predefined 
redirection message.  This is a complete file name in the UNIX
file space where the message is.  The message is inserted between
the standard header and footer.

=item 5.

A 'position'...which specifies whether the 'message' is before
or after the bit of generated text that tells the user where
the guessed new location is (only applies to "force" types).

=back

For example:

 oldtarget: /lit/homes/sully
 newtarget: /dms/homes/sully/
 type: force

 oldtarget: /lit/homes/sully/resume.html
 newtarget: http://www.cwru.edu:80/dms/homes/sully/resume.html
 type: force

The file can contain comments by beginning the comment with a "#" character,
lasting until the end of the line.  Lines can be continued to the next line
but using the "\" character at the end of the line.

=head1 COPYRIGHT

Copyright 1997 by Case Western Reserve University.

=head1 AUTHOR

This program is based on RedMan Version 2.10 (7Nov95)
The main RedMan WebPage is http://sw.cse.bris.ac.uk/WebTools/redman.html.
Major revisions by jrp 5/96--cleaned up html for error message, 
made files/subdirectories under redirects work with ncsa 1.5.1

 Peter Murray
 Library Systems Manager
 Digital Media Services
 Case Western Reserve University
 pem@po.cwru.edu
 http://www.cwru.edu/home/pem.html

Updates available from http://www.cwru.edu/lit/homes/pem/projects/nph-redirect.html

=cut


