#!/usr/bin/perl -w ############################################################################## # Simple Search Version 1.0 # # Copyright 1996 Matt Wright mattw@worldwidemart.com # # and erik sandblom open STDERR, ">>search_errors"; $archive = '/home/sandblom/ern_archive'; $scriptname = $0; $scriptname =~ s#.*/##; $wordborder = '\s|<|>|,|\.|\?|!|-| |\(|\)|"|\'|"|“|”|‘|’'; print "Content-type: text/html\n\n"; print `cat /home/sandblom/.www/cgi-bin/searchheader`; unless ($ENV{'QUERY_STRING'}) { # if no search information print `cat /home/sandblom/.www/cgi-bin/whatsearch`; exit; } &parse_form; &print_form; # speeds presentation &search; &print_hits; &print_stats; sub parse_form { $buffer = ""; $buffer = $ENV{'QUERY_STRING'}; $buffer =~ s/=\Z//; # if query string ends in = unless ($buffer =~ /=[^&]*?\Z/) { # OK if query string ends in "name=value" $buffer =~ s/&[^=]*?\Z//; } unless ($buffer =~ /terms=[^&=]+/) { # if no search terms print `cat /home/sandblom/.www/cgi-bin/whatsearch`; exit; } @pairs = split(/&/, $buffer); foreach $pair (@pairs) { ($name, $value) = split(/=/, $pair); $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $FORM{$name} = $value; } unless (($FORM{'boolean'}) and ($FORM{'boolean'} eq 'AND')) { $FORM{'boolean'} = 'OR'; } unless (($FORM{'case'}) and ($FORM{'case'} eq 'Sensitive')) { $FORM{'case'} = 'Insensitive'; } unless ($FORM{'startat'}) { $FORM{'startat'} = 0; } $FORM{'terms'} =~ s/£/\£/g; $FORM{'terms'} =~ s/\x80/\€/g; $FORM{'terms'} =~ s/\xA4/\€/g; $FORM{'terms'} =~ s/å/\å/g; $FORM{'terms'} =~ s/Å/\Å/g; $FORM{'terms'} =~ s/ä/\ä/g; $FORM{'terms'} =~ s/Ä/\Ä/g; $FORM{'terms'} =~ s/æ/\æ/g; $FORM{'terms'} =~ s/Ä/\&Aelig;/g; $FORM{'terms'} =~ s/á/\á/g; $FORM{'terms'} =~ s/à/\à/g; $FORM{'terms'} =~ s/â/\â/g; $FORM{'terms'} =~ s/é/\é/g; $FORM{'terms'} =~ s/è/\è/g; $FORM{'terms'} =~ s/í/\í/g; $FORM{'terms'} =~ s/Í/\Í/g; $FORM{'terms'} =~ s/ñ/\ñ/g; $FORM{'terms'} =~ s/Ñ/\Ñ/g; $FORM{'terms'} =~ s/ö/\ö/g; $FORM{'terms'} =~ s/Ö/\Ö/g; $FORM{'terms'} =~ s/Ø/\Ø/g; $FORM{'terms'} =~ s/ø/\ø/g; $FORM{'terms'} =~ s/ó/\ó/g; $FORM{'terms'} =~ s/Ó/\Ó/g; $FORM{'terms'} =~ s/®/\®/g; $FORM{'terms'} =~ s/ü/\ü/g; $FORM{'terms'} =~ s/Ü/\Ü/g; $FORM{'terms'} =~ s/û/\û/g; $FORM{'terms'} =~ s/Û/\Û/g; $FORM{'terms'} =~ s/ß/\ß/g; $weird = 0; if ($FORM{'terms'} =~ /_|\]|\[/){ $weird++; $FORM{'terms'} =~ tr/_/ /; $FORM{'terms'} =~ tr/[/ /; $FORM{'terms'} =~ tr/]/ /; } # save search query for putting in form and links. # this preserves the order of terms, so user not confused. # This has no technical importance to search # however seems to remove plus signs, ie encoded spaces # hmm maybe not March 17th 2003 $savedterms = $FORM{'terms'}; } sub print_form { print "
"; if ($FORM{'boolean'} eq AND) { print "AND
OR
\n"; } elsif ($FORM{'boolean'} eq OR) { print "AND
OR
\n"; } if ($FORM{'case'} eq Insensitive) { print "Case Insensitive
Case Sensitive
\n"; } elsif ($FORM{'case'} eq Sensitive) { print "Case Insensitive
Case Sensitive
\n"; } print "

\n\n"; } sub search { # open bulletins open ARCHIVE, "$archive"; @lines = ; close ARCHIVE; $updated = pop @lines; $updated =~ s/--updated--//; $string = join '', @lines; @bulletins = (split /--nextbulletin--\n/, $string); # find quoted phrases while ($FORM{'terms'} =~ s/\"(.*?)\"//) { push @quotedterms, $1; } # no extra spaces after finding phrases $FORM{'terms'} =~ tr/ / /s; $FORM{'terms'} =~ s/^ //; $FORM{'terms'} =~ s/ $//; # get unquoted words @unquotedterms = split(/\s+/, $FORM{'terms'}); # put the terms in the list without making them just a single long string # after splitting terms by space, enable wildcard search if user gives * foreach $term (@quotedterms) { $term =~ s/\*/[^ ]*?/g; push @terms, $term; } foreach $term (@unquotedterms) { $term =~ s/\*/[^ ]*?/g; push @terms, $term; } # end of phrasal search modification # actual search part foreach $bulletin (@bulletins) { $bulletin =~ s/ / /g; # for finding "X 2000" if ($FORM{'boolean'} eq 'AND') { foreach $term (@terms) { if ($FORM{'case'} eq 'Insensitive') { # \b is a word border but doesn't work with html entities # so i made my own word border expression, see top of file :-) if (!($bulletin =~ /($wordborder)$term($wordborder)/i)) { $include{$bulletin} = 'no'; last; } else { $include{$bulletin} = 'yes'; } } elsif ($FORM{'case'} eq 'Sensitive') { if (!($bulletin =~ /($wordborder)$term($wordborder)/)) { $include{$bulletin} = 'no'; last; } else { $include{$bulletin} = 'yes'; } } } } elsif ($FORM{'boolean'} eq 'OR') { foreach $term (@terms) { if ($FORM{'case'} eq 'Insensitive') { if ($bulletin =~ /($wordborder)$term($wordborder)/i) { $include{$bulletin} = 'yes'; last; } else { $include{$bulletin} = 'no'; } } elsif ($FORM{'case'} eq 'Sensitive') { if ($bulletin =~ /($wordborder)$term($wordborder)/) { $include{$bulletin} = 'yes'; last; } else { $include{$bulletin} = 'no'; } } } } } } sub print_hits { $showingfrom = ($FORM{'startat'} +1); $endat = ($FORM{'startat'} + 10); $continue = ($FORM{'startat'} + 11); $savedterms =~ s/"/%22/g; $savedterms =~ s/ /\+/g; $i = 0; foreach $key (keys %include) { if ($include{$key} eq 'yes') { push @hitlist, $key; } } @hitlist = sort {$b cmp $a} @hitlist; $numberofhits = @hitlist; $interval = ($numberofhits - $FORM{'startat'}); if ($numberofhits == 0) { print "Sorry, couldn't find any bulletins. Tips: