#!/usr/bin/perl -w
##############################################################################
# Simple Search Version 1.0 #
# Copyright 1996 Matt Wright mattw@worldwidemart.com #
# and erik sandblom
open STDERR, ">>search_errors";
$archive = '/home/sandblom/ern_archive';
$scriptname = $0;
$scriptname =~ s#.*/##;
$wordborder = '\s|<|>|,|\.|\?|!|-| |\(|\)|"|\'|"|“|”|‘|’';
print "Content-type: text/html\n\n";
print `cat /home/sandblom/.www/cgi-bin/searchheader`;
unless ($ENV{'QUERY_STRING'}) { # if no search information
print `cat /home/sandblom/.www/cgi-bin/whatsearch`;
exit;
}
&parse_form;
&print_form; # speeds presentation
&search;
&print_hits;
&print_stats;
sub parse_form {
$buffer = "";
$buffer = $ENV{'QUERY_STRING'};
$buffer =~ s/=\Z//; # if query string ends in =
unless ($buffer =~ /=[^&]*?\Z/) { # OK if query string ends in "name=value"
$buffer =~ s/&[^=]*?\Z//;
}
unless ($buffer =~ /terms=[^&=]+/) { # if no search terms
print `cat /home/sandblom/.www/cgi-bin/whatsearch`;
exit;
}
@pairs = split(/&/, $buffer);
foreach $pair (@pairs) {
($name, $value) = split(/=/, $pair);
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$FORM{$name} = $value;
}
unless (($FORM{'boolean'}) and ($FORM{'boolean'} eq 'AND')) {
$FORM{'boolean'} = 'OR';
}
unless (($FORM{'case'}) and ($FORM{'case'} eq 'Sensitive')) {
$FORM{'case'} = 'Insensitive';
}
unless ($FORM{'startat'}) {
$FORM{'startat'} = 0;
}
$FORM{'terms'} =~ s/£/\£/g;
$FORM{'terms'} =~ s/\x80/\€/g;
$FORM{'terms'} =~ s/\xA4/\€/g;
$FORM{'terms'} =~ s/å/\å/g;
$FORM{'terms'} =~ s/Å/\Å/g;
$FORM{'terms'} =~ s/ä/\ä/g;
$FORM{'terms'} =~ s/Ä/\Ä/g;
$FORM{'terms'} =~ s/æ/\æ/g;
$FORM{'terms'} =~ s/Ä/\&Aelig;/g;
$FORM{'terms'} =~ s/á/\á/g;
$FORM{'terms'} =~ s/à/\à/g;
$FORM{'terms'} =~ s/â/\â/g;
$FORM{'terms'} =~ s/é/\é/g;
$FORM{'terms'} =~ s/è/\è/g;
$FORM{'terms'} =~ s/í/\í/g;
$FORM{'terms'} =~ s/Í/\Í/g;
$FORM{'terms'} =~ s/ñ/\ñ/g;
$FORM{'terms'} =~ s/Ñ/\Ñ/g;
$FORM{'terms'} =~ s/ö/\ö/g;
$FORM{'terms'} =~ s/Ö/\Ö/g;
$FORM{'terms'} =~ s/Ø/\Ø/g;
$FORM{'terms'} =~ s/ø/\ø/g;
$FORM{'terms'} =~ s/ó/\ó/g;
$FORM{'terms'} =~ s/Ó/\Ó/g;
$FORM{'terms'} =~ s/®/\®/g;
$FORM{'terms'} =~ s/ü/\ü/g;
$FORM{'terms'} =~ s/Ü/\Ü/g;
$FORM{'terms'} =~ s/û/\û/g;
$FORM{'terms'} =~ s/Û/\Û/g;
$FORM{'terms'} =~ s/ß/\ß/g;
$weird = 0;
if ($FORM{'terms'} =~ /_|\]|\[/){
$weird++;
$FORM{'terms'} =~ tr/_/ /;
$FORM{'terms'} =~ tr/[/ /;
$FORM{'terms'} =~ tr/]/ /;
}
# save search query for putting in form and links.
# this preserves the order of terms, so user not confused.
# This has no technical importance to search
# however seems to remove plus signs, ie encoded spaces # hmm maybe not March 17th 2003
$savedterms = $FORM{'terms'};
}
sub print_form {
print "
\n\n";
}
sub search {
# open bulletins
open ARCHIVE, "$archive";
@lines = ;
close ARCHIVE;
$updated = pop @lines;
$updated =~ s/--updated--//;
$string = join '', @lines;
@bulletins = (split /--nextbulletin--\n/, $string);
# find quoted phrases
while ($FORM{'terms'} =~ s/\"(.*?)\"//) {
push @quotedterms, $1;
}
# no extra spaces after finding phrases
$FORM{'terms'} =~ tr/ / /s;
$FORM{'terms'} =~ s/^ //;
$FORM{'terms'} =~ s/ $//;
# get unquoted words
@unquotedterms = split(/\s+/, $FORM{'terms'});
# put the terms in the list without making them just a single long string
# after splitting terms by space, enable wildcard search if user gives *
foreach $term (@quotedterms) {
$term =~ s/\*/[^ ]*?/g;
push @terms, $term;
}
foreach $term (@unquotedterms) {
$term =~ s/\*/[^ ]*?/g;
push @terms, $term;
}
# end of phrasal search modification
# actual search part
foreach $bulletin (@bulletins) {
$bulletin =~ s/ / /g; # for finding "X 2000"
if ($FORM{'boolean'} eq 'AND') {
foreach $term (@terms) {
if ($FORM{'case'} eq 'Insensitive') {
# \b is a word border but doesn't work with html entities
# so i made my own word border expression, see top of file :-)
if (!($bulletin =~ /($wordborder)$term($wordborder)/i)) {
$include{$bulletin} = 'no';
last;
}
else {
$include{$bulletin} = 'yes';
}
}
elsif ($FORM{'case'} eq 'Sensitive') {
if (!($bulletin =~ /($wordborder)$term($wordborder)/)) {
$include{$bulletin} = 'no';
last;
}
else {
$include{$bulletin} = 'yes';
}
}
}
}
elsif ($FORM{'boolean'} eq 'OR') {
foreach $term (@terms) {
if ($FORM{'case'} eq 'Insensitive') {
if ($bulletin =~ /($wordborder)$term($wordborder)/i) {
$include{$bulletin} = 'yes';
last;
}
else {
$include{$bulletin} = 'no';
}
}
elsif ($FORM{'case'} eq 'Sensitive') {
if ($bulletin =~ /($wordborder)$term($wordborder)/) {
$include{$bulletin} = 'yes';
last;
}
else {
$include{$bulletin} = 'no';
}
}
}
}
}
}
sub print_hits {
$showingfrom = ($FORM{'startat'} +1);
$endat = ($FORM{'startat'} + 10);
$continue = ($FORM{'startat'} + 11);
$savedterms =~ s/"/%22/g;
$savedterms =~ s/ /\+/g;
$i = 0;
foreach $key (keys %include) {
if ($include{$key} eq 'yes') {
push @hitlist, $key;
}
}
@hitlist = sort {$b cmp $a} @hitlist;
$numberofhits = @hitlist;
$interval = ($numberofhits - $FORM{'startat'});
if ($numberofhits == 0) {
print "Sorry, couldn't find any bulletins. Tips:\n";
if (($savedterms !~ /[a-z]/) and ($FORM{'case'} eq 'Sensitive')) {
print "- Unless you're looking for something that is normally written with ALL CAPITALS, try pressing the \"Case Insensitive\" button.
\n";
if ($FORM{'terms'} =~ /\./) {
print "- Erik's Rail News writes acronyms without dots (.).
\n";
}
}
if ($weird != 0) {
print "- Sorry, this search engine does not recognize weird symbols like \[, \], or _. Use a * to find variant of words; \"comput*\" finds computer, computing and computer.
\n";
}
if ($i > 1) {
print "- Try using fewer or more general search terms.
\n";
}
print "- Use an asterisk, *, to use wildcards. For example, \"comput*\" will find computer, computers and computing.
\n";
print "- Try the Erik's Rail News Search and Archive page.
\n";
print "- If you're pretty sure what you're looking for is not here, click here to continue your search on Google.
\n";
}
else {
$rest = "bulletins displayed first. Archive updated to $updated.\n";
if ($numberofhits <= 10) {
print "Found $numberofhits bulletins, newest $rest";
}
elsif ($FORM{'startat'} == 0) {
print "
Found $numberofhits bulletins, showing first 10. Newest $rest";
}
elsif ($interval > 10) {
print "
Found $numberofhits bulletins, showing $showingfrom to $endat. Newest $rest";
}
elsif ($interval < 10) {
print "
Found $numberofhits bulletins, showing last $interval. Newest $rest";
}
if (($savedterms !~ /[a-z]/) and ($FORM{'case'} eq 'Insensitive')) {
print "
- If you're looking for an acronym like ICE or EWS, try pressing the \"Case Sensitive\" button.\n";
}
if ($weird != 0) {
print "
- Sorry, this search engine does not recognize weird symbols like \[, \], or _. Use a * to find variant of words; \"comput*\" finds computer, computing and computer.
\n";
}
if (($FORM{'terms'} =~ /britain|canada|china|finland|france|germany|hungary|italy|sweden|norway|mexico|poland|portugal/i) and (!($FORM{'terms'} =~ /\*/))){
print"- Looking for information about a country? Use an asterisk to use the wildcard feature to find the noun as well as the adjective. For example, to find information about Britain, just type in \"brit*\", for Britain and British. For Sweden, type \"swed*\" and so on.
\n";
}
if ($FORM{'terms'} =~ /rail|train/i) {
print "- Unless words like \"rail\" or \"train\" form part of the name of an organisation you are interested in, try deleting these words from your search to get more accurate results.
\n";
}
if ($numberofhits > 20){
unless ($FORM{'terms'} =~ /\d\d\d\d|january|february|march|april|may|june|july|august|september|october|november|december/i){
print "- Try adding a month and/or year if you want to narrow the search results.
\n";
}
}
print "
";
foreach $hit (@hitlist) {
if ($FORM{'startat'} < $endat) {
unless ($FORM{'startat'} >= $numberofhits) {
print "
\n\n";
$hitlist[$FORM{'startat'}] =~ s/\d\d\d\d-\d\d-\d\d--\d\d\d\d\n//;
print "$hitlist[$FORM{'startat'}]\n"; #the dos command for cat is type
$FORM{'startat'}++;
}
}
elsif ($FORM{'startat'} = $continue) {
print "
\n\n";
print "Next 10<\/a><\/p>";
print "\n\n";
last;
}
}
print "
Back to Erik's Rail News front page
\n";
print "