" . $STR_RESULTS_FOR . " " . htmlspecialchars($query));
if ($UseCats) {
if ($cat == -1)
print(" " . $STR_RESULTS_IN_ALL_CATEGORIES);
else
print(" " . $STR_RESULTS_IN_CATEGORY . " \"". rtrim($catnames[$cat]) . "\"");
}
print "
\n";
// Begin main search loop -----------------------------------------------------
$numwords = count ($SearchWords);
$pagesCount = count($urls);
$outputline = 0;
$UseWildCards = 1; // default as using wildcard
// Initialise $res_table to be a 2D array of count($pages) long, filled with zeros.
//$res_table = array_fill(0, $pagesCount, array_fill(0, 6, 0));
$res_table = array();
for ($i = 0; $i < $pagesCount; $i++)
{
$res_table[$i] = array();
$res_table[$i][0] = 0; // score
$res_table[$i][1] = 0; // num of sw matched
$res_table[$i][2] = 0; // pagetext ptr #1
$res_table[$i][3] = 0; // pagetext ptr #2
$res_table[$i][4] = 0; // pagetext ptr #3
$res_table[$i][5] = 0; // 'and' user search terms matched
}
$exclude_count = 0;
// check if word is in skipword file
$SkippedWords = 0;
$context_maxgoback = 1;
$SkippedExactPhrase = 0;
$maxscore = 0;
//$sw_results = array_fill(0, $numwords, 0);
$sw_results = array();
for ($i = 0; $i < $numwords; $i++)
$sw_results[$i] = 0;
for ($sw = 0; $sw < $numwords; $sw++)
{
if ($SearchWords[$sw] == "")
continue;
// check min length
if (strlen($SearchWords[$sw]) < $MinWordLen) {
SkipSearchWord($sw);
continue;
}
$ExactPhrase = 0;
$UseWildCards = 0;
$ExcludeTerm = 0;
// Check exclusion searches
if ($SearchWords[$sw][0] == "-")
{
$SearchWords[$sw] = substr($SearchWords[$sw], 1);
$ExcludeTerm = 1;
$exclude_count++;
}
if ($AllowExactPhrase == 1 && strpos($SearchWords[$sw], " ") !== false)
{
// Initialise exact phrase matching for this search term
$ExactPhrase = 1;
$phrase_terms = split(" ", $SearchWords[$sw]);
//$phrase_terms = preg_split("/\W+/", $SearchWords[$sw], -1, 0 /*PREG_SPLIT_DELIM_CAPTURE*/);
$num_phrase_terms = count($phrase_terms);
if ($num_phrase_terms > $context_maxgoback)
$context_maxgoback = $num_phrase_terms;
$phrase_terms_data = array();
$tmpid = 0;
$WordNotFound = 0;
for ($j = 0; $j < $num_phrase_terms; $j++)
{
$tmpid = GetDictID($phrase_terms[$j]);
if ($tmpid == -1) // word is not in dictionary
{
$WordNotFound = 1;
break;
}
$wordmap_row = $dict[$tmpid][1];
if ($wordmap_row != -1)
{
fseek($fp_wordmap, $wordmap_row);
$countbytes = fread($fp_wordmap, 2);
$phrase_data_count[$j] = ord($countbytes[0]) | ord($countbytes[1])<<8;
for ($xbi = 0; $xbi < $phrase_data_count[$j]; $xbi++) {
$xbindata = fread($fp_wordmap, 8);
if (strlen($xbindata) == 0)
print "error in wordmap file: expected data not found";
$phrase_terms_data[$j][$xbi] = unpack("vscore/vpagenum/Vptr", $xbindata);
}
}
else
{
$phrase_data_count[$j] = 0;
$phrase_terms_data[$j] = 0;
}
}
if ($WordNotFound == 1)
continue;
}
else if (strpos($SearchWords[$sw], "*") !== false || strpos($SearchWords[$sw], "?") !== false)
{
$pattern = "/";
// match entire word
if ($SearchAsSubstring == 0)
$pattern = $pattern . "\A";
$SearchWords[$sw] = pattern2regexp($SearchWords[$sw]);
$pattern = $pattern . $SearchWords[$sw];
if ($SearchAsSubstring == 0)
$pattern = $pattern . "\Z";
if ($ToLowerSearchWords != 0)
$pattern = $pattern . "/i";
else
$pattern = $pattern . "/";
$UseWildCards = 1;
}
for ($i = 0; $i < $dict_count; $i++)
{
$dictline = $dict[$i];
$word = $dict[$i][0];
// if we're not using wildcards, direct match
if ($ExactPhrase == 1)
{
// todo: move to next phrase term if first phrase term is skipped?
// compare first term in exact phrase
$result = strcasecmp($phrase_terms[0], $word);
}
else if ($UseWildCards == 0)
{
if ($SearchAsSubstring == 0)
$result = strcasecmp($SearchWords[$sw], $word);
else
{
if (stristr($word, $SearchWords[$sw]) == FALSE)
$result = 1; // not matched
else
$result = 0; // matched
}
}
else
{
// if we have wildcards...
$result = !(preg_match($pattern, $word));
}
// result = 0 if matched, result != 0 if not matched.
// word found but indicated to be not indexed or skipped
if ($result == 0 && $dictline[1] == -1)
{
if ($UseWildCards == 0 && $SearchAsSubstring == 0)
{
if ($ExactPhrase == 1)
$SkippedExactPhrase = 1;
SkipSearchWord($sw);
break;
}
else
continue;
}
if ($result == 0)
{
// keyword found in the dictionary
if ($ExactPhrase == 1)
{
// we'll use the wordmap data for the first term that we have worked out earlier
$data = $phrase_terms_data[0];
$data_count = $phrase_data_count[0];
$ContextSeeks = 0;
}
else
{
// seek to position in wordmap file
fseek($fp_wordmap, $dictline[1]);
//print "seeking in wordmap: " . $dictline[1] . "
";
// first 2 bytes is data count
$countbytes = fread($fp_wordmap, 2);
$data_count = ord($countbytes[0]) | ord($countbytes[1])<<8;
//print "data count: " . $data_count . "
";
for ($bi = 0; $bi < $data_count; $bi++)
{
$bindata = fread($fp_wordmap, 8);
if (strlen($bindata) == 0)
print "Error in wordmap file: expected data not found";
$data[$bi] = unpack("vscore/vpagenum/Vptr", $bindata);
}
}
$sw_results[$sw] = $data_count;
// Go through wordmap for each page this word appears on
for ($j = 0; $j < $data_count; $j++)
{
$score = $data[$j]["score"];
$txtptr = $data[$j]["ptr"];
if ($ExactPhrase == 1)
{
$maxptr = $data[$j]["ptr"];
$maxptr_term = 0;
$GotoNextPage = 0;
// Check if all of the other words in the phrase appears on this page.
for ($xi = 1; $xi < $num_phrase_terms; $xi++)
{
// see if this word appears at all on this page, if not, we stop scanning page.
// do not check for skipped words (data count value of zero)
if ($phrase_data_count[$xi] != 0)
{
// check wordmap for this search phrase to see if it appears on the current page.
for ($xbi = 0; $xbi < $phrase_data_count[$xi]; $xbi++) {
if ($phrase_terms_data[$xi][$xbi]["pagenum"] == $data[$j]["pagenum"])
{
// intersection, this term appears on both pages, goto next term
// remember biggest pointer.
if ($phrase_terms_data[$xi][$xbi]["ptr"] > $maxptr)
{
$maxptr = $phrase_terms_data[$xi][$xbi]["ptr"];
$maxptr_term = $xi;
}
$score += $phrase_terms_data[$xi][$xbi]["score"];
break;
}
}
if ($xbi == $phrase_data_count[$xi]) // if not found
{
$GotoNextPage = 1;
break; // goto next page
}
}
} // end phrase term for loop
if ($GotoNextPage == 1)
continue;
// Check how many context seeks we have made.
$ContextSeeks++;
if ($ContextSeeks > $MaxContextSeeks)
{
print "
" . $STR_PHRASE_CONTAINS_COMMON_WORDS . " \"" . $SearchWords[$sw] . "\"";
break;
}
// ok, so this page contains all of the words in the phrase
$FoundPhrase = 0;
$FoundFirstWord = 0;
// we goto the first occurance of the first word in pagetext
$pos = $maxptr - (($maxptr_term+3) * $MaxDictIDLen); // assume 3 possible punctuations.
// do not seek further back than the occurance of the first word (avoid wrong page)
if ($pos < $data[$j]["ptr"])
$pos = $data[$j]["ptr"];
fseek($fp_pagetext, $pos);
// now we look for the phrase within the context of this page
do
{
for ($xi = 0; $xi < $num_phrase_terms; $xi++)
{
// do...while loop to ignore punctuation marks in context phrase
do
{
$xword_id = 0;
$bytesread = 0;
do
{
$bytes_buffer = fread($fp_pagetext, 2); // grab 2 bytes
$dict_id = ord($bytes_buffer[0]) | ord($bytes_buffer[1])<<8;
$xword_id += $dict_id;
$bytesread += 2;
} while ($dict_id >= 65535);
$pos += $bytesread;
// check if we are at the end of page (wordid = 0) or invalid $xword_id
if ($xword_id == 0 || $xword_id >= $dict_count)
break;
} while ($xword_id <= $DictReservedLimit && !feof($fp_pagetext));
// if the words are NOT the same, we break out
if (strcasecmp($dict[$xword_id][0], $phrase_terms[$xi]) != 0)
break;
// remember how many times we find the first word on this page
if ($xi == 0)
{
$FoundFirstWord++;
// remember the position of the 'start' of this phrase
//$txtptr = $pos - $MaxDictIDLen;
$txtptr = $pos - $bytesread;
}
}
if ($xi == $num_phrase_terms)
{
// exact phrase found!
$FoundPhrase = 1;
}
} while ($xword_id != 0 && $FoundPhrase == 0 &&
$FoundFirstWord <= $data[$j]["score"]);
if ($FoundPhrase != 1)
continue; // goto next page.
}
//Check if page is already in output list
$pageexists = 0;
$ipage = $data[$j]["pagenum"];
if ($ExcludeTerm == 1)
{
// we clear out the score entry so that it'll be excluded in the filtering stage
$res_table[$ipage][0] = 0;
}
elseif ($res_table[$ipage][0] == 0)
{
// not in list, count this page as a unique match
$res_table[$ipage][0] += $score;
$res_table[$ipage][2] = $txtptr;
}
else
{
// already in list
if ($res_table[$ipage][0] > 10000)
{
// take it easy if its too big (to prevent huge scores)
$res_table[$ipage][0] += 1;
}
else
{
$res_table[$ipage][0] += $score; //Add in score
$res_table[$ipage][0] *= 2; //Double Score as we have two words matching
}
// store the next two searchword matches
if ($res_table[$ipage][1] > 0 && $res_table[$ipage][1] < $MaxContextKeywords)
{
if ($res_table[$ipage][3] == 0)
$res_table[$ipage][3] = $txtptr;
elseif ($res_table[$ipage][4] == 0)
$res_table[$ipage][4] = $txtptr;
}
}
$res_table[$ipage][1] += 1;
if ($res_table[$ipage][0] > $maxscore)
$maxscore = $res_table[$ipage][0];
// store the 'and' user search terms matched' value
if ($res_table[$ipage][5] == $sw || $res_table[$ipage][5] == $sw-$SkippedWords-$exclude_count)
$res_table[$ipage][5] += 1;
}
if ($UseWildCards == 0 && $SearchAsSubstring == 0)
break; //This search word was found, so skip to next
}
}
}
//Close the files
fclose($fp_wordmap);
if ($SkippedWords > 0)
{
print "
" . $STR_SKIPPED_FOLLOWING_WORDS . " " . $SkippedOutputStr . "
\n";
if ($SkippedExactPhrase == 1)
print $STR_SKIPPED_PHRASE . ".
\n";
print "\n";
}
//Count number of output lines that match ALL search terms
$oline = 0;
$fullmatches = 0;
$matches = 0;
// Second pass, results filtering.
$full_numwords = $numwords - $SkippedWords - $exclude_count;
for ($i = 0; $i < $pagesCount; $i++)
{
$IsFiltered = false;
if ($res_table[$i][0] > 0)
{
if ($UseCats && $cat != -1)
{
// Using cats and not doing an "all cats" search
if (rtrim($catpages[$i]) != $cat)
$IsFiltered = true;
}
if ($IsFiltered == false)
{
//if ($res_table[$i][1] >= $full_numwords)
if ($res_table[$i][5] >= $full_numwords)
$fullmatches++;
else
{
// if AND search, only copy AND results
if ($and == 1)
$IsFiltered = true;
}
}
if ($IsFiltered == false)
{
// copy if not filtered out
$output[$oline][0] = $i; // page index
$output[$oline][1] = $res_table[$i][0]; // score
$output[$oline][2] = $res_table[$i][1]; // num of sw matched
$output[$oline][3] = $res_table[$i][2]; // pagetext ptr #1
$output[$oline][4] = $res_table[$i][3]; // pagetext ptr #2
$output[$oline][5] = $res_table[$i][4]; // pagetext ptr #3
$oline++;
}
}
}
$matches = $oline;
//Sort results in order of score, use the "SortCompare" function
if ($matches > 1)
{
if ($sort == 1 && $UseDateTime == 1 && isset($datetime))
{
usort($output, "SortByDate");
}
else
{
// Default sort by relevance
usort($output, "SortCompare");
}
}
// query_out is the query prepared to be passed in a URL.
//$query_out = htmlspecialchars($query_out);
$query_out = urlencode($query);
//Display search result information
print("
\n");
if ($matches == 0)
print $STR_SUMMARY_NO_RESULTS_FOUND;
elseif ($numwords > 1 && $and == 0)
{
//OR
$SomeTermMatches = $matches - $fullmatches;
print PrintNumResults($fullmatches) . " " . $STR_SUMMARY_FOUND_CONTAINING_ALL_TERMS . " ";
if ($SomeTermMatches > 0)
print PrintNumResults($SomeTermMatches) . " " . $STR_SUMMARY_FOUND_CONTAINING_SOME_TERMS;
}
elseif ($numwords > 1 && $and == 1) //AND
print PrintNumResults($fullmatches) . " " . $STR_SUMMARY_FOUND_CONTAINING_ALL_TERMS;
else
print PrintNumResults($matches) . " " . $STR_SUMMARY_FOUND;
print "
";
if ($matches < 3)
{
if ($and == 1 && $numwords > 1)
print "
" . $STR_POSSIBLY_GET_MORE_RESULTS . "
". $STR_ANY_OF_TERMS . ".
";
else if ($UseCats && $cat != -1)
print "
" . $STR_POSSIBLY_GET_MORE_RESULTS . "
" . $STR_ALL_CATS . ".
";
}
print "
\n";
if ($Spelling == 1)
{
// load in spellings file
$fp_spell = fopen("zoom_spelling.zdat", "rt");
$i = 0;
while (!feof($fp_spell))
{
$spline = fgets($fp_spell, $MaxKeyWordLineLen);
if (strlen($spline) > 0)
{
$spell[$i] = explode(" ", $spline, 4);
$i++;
}
}
fclose($fp_spell);
$spell_count = $i;
$SuggestStr = "";
$SuggestionFound = 0;
$SuggestionCount = 0;
$word = "";
$word2 = "";
$word3 = "";
for ($sw = 0; $sw < $numwords; $sw++)
{
if ($sw_results[$sw] >= $SpellingWhenLessThan)
{
// this word has enough results
if ($sw > 0)
$SuggestStr = $SuggestStr . " ";
$SuggestStr = $SuggestStr . $SearchWords[$sw];
}
else
{
// this word returned less results than threshold, and requires spelling suggestions
//$sw_spcode = GetSPCode($SearchWords[$sw]);
$sw_spcode = metaphone($SearchWords[$sw],4);
if (strlen($sw_spcode) > 0)
{
$SuggestionFound = 0;
for ($i = 0; $i < $spell_count; $i++)
{
$spcode = $spell[$i][0];
if ($spcode == $sw_spcode)
{
$j = 0;
while ($SuggestionFound == 0 && $j < 3 && isset($spell[$i][1+$j]))
{
$dictid = intval($spell[$i][1+$j]);
$word = $dict[$dictid][0];
if (strcasecmp($word, $SearchWords[$sw]) == 0)
{
// Check that it is not a skipped word or the same word
$SuggestionFound = 0;
}
else
{
$SuggestionFound = 1;
$SuggestionCount++;
if ($numwords == 1) // if single word search
{
if ($j < 1 && isset($spell[$i][1+$j+1]))
{
$dictid = intval($spell[$i][1+$j+1]);
$word2 = $dict[$dictid][0];
if (strcasecmp($word2, $SearchWords[$sw]) == 0)
$word2 = "";
}
if ($j < 2 && isset($spell[$i][1+$j+2]))
{
$dictid = intval($spell[$i][1+$j+2]);
$word3 = $dict[$dictid][0];
if (strcasecmp($word3, $SearchWords[$sw]) == 0)
$word3 = "";
}
}
}
$j++;
}
}
elseif (strcmp($spcode, $sw_spcode) > 0)
{
break;
}
if ($SuggestionFound == 1)
break;
}
if ($SuggestionFound == 1)
{
if ($sw > 0)
$SuggestStr = $SuggestStr . " ";
$SuggestStr = $SuggestStr . $word; // add string AFTER so we can preserve order of words
}
//else
// $SuggestStr = $SuggestStr . $SearchWords[$sw];
}
}
}
if ($SuggestionCount > 0)
{
print "
" . $STR_DIDYOUMEAN . "
". $SuggestStr . "";
if (strlen($word2) > 0)
print " or
". $word2 . "";
if (strlen($word3) > 0)
print " or
". $word3 . "";
print "?
";
}
}
// Number of pages of results
$num_pages = ceil($matches / $per_page);
if ($num_pages > 1)
print "
" . $num_pages . " " . $STR_PAGES_OF_RESULTS . "
\n";
// Show sorting options
if ($matches > 1)
{
if ($UseDateTime == 1)
{
print("
");
}
}
// Determine current line of result from the $output array
if ($page == 1) {
$arrayline = 0;
} else {
$arrayline = (($page - 1) * $per_page);
}
// The last result to show on this page
$result_limit = $arrayline + $per_page;
// Display the results
while ($arrayline < $matches && $arrayline < $result_limit)
{
$ipage = $output[$arrayline][0];
$score = $output[$arrayline][1];
print "
\n";
print "
\n";
if ($DisplayMetaDesc == 1)
{
// Print meta description
if (strlen($descriptions[$ipage]) > 2) {
print("
");
if ($Highlighting == 1)
PrintHighlightDescription(rtrim($descriptions[$ipage]));
else
print rtrim($descriptions[$ipage]);
print "
\n";
}
}
if ($DisplayContext == 1)
{
// Extract contextual page content
$context_keywords = $output[$arrayline][2]; // # of terms matched
if ($context_keywords > $MaxContextKeywords)
$context_keywords = $MaxContextKeywords;
$context_word_count = ceil($ContextSize / $context_keywords);
$goback = floor($context_word_count / 2);
$gobackbytes = $goback * $MaxDictIDLen;
if (($gobackbytes / 2) > ($context_word_count - $context_maxgoback - $goback)) // 2 is MinDictIDLen
{
// go back less if potential for matched word to be outside the context range
// determine most bytes we should go back to fit the word in in case of all dict ID's were min. len.
$gobackbytes = 2 * ($context_word_count - $context_maxgoback - $goback);
// determine number of words available with this number of bytes, if all dict ID's were max. len.
// thus avoiding jumping into the middle of a multi-pair dictID value
$goback = floor($gobackbytes / $MaxDictIDLen);
// redetermine max bytes to jump back for this number of words
$gobackbytes = $goback * $MaxDictIDLen;
}
$last_startpos = 0;
$last_endpos = 0;
$FoundContext = 0;
print "
\n";
for ($j = 0; $j < $context_keywords && !feof($fp_pagetext); $j++)
{
$origpos = $output[$arrayline][3 + $j];
$startpos = $origpos;
if ($gobackbytes < $startpos)
{
$startpos = $startpos - $gobackbytes;
$noGoBack = false;
}
else
$noGoBack = true;
//if ($startpos < 0)
// $startpos = 0;
// Check that this will not overlap with previous extract
if ($startpos > $last_startpos && $startpos < $last_endpos)
$startpos = $last_endpos; // we will just continue last extract if so.
// find the pagetext pointed to
fseek($fp_pagetext, $startpos);
// remember the last start position
$last_startpos = $startpos;
$word_id = GetNextDictWord($fp_pagetext);
//print "wordid: " . $word_id;
$context_str = "";
$noSpaceForNextChar = false;
for ($i = 0; $i < $context_word_count && !feof($fp_pagetext); $i++)
{
if ($noSpaceForNextChar == false)
{
//if ($word_id > $DictReservedLimit) // no space for reserved words (punctuation, etc)
// No space for reserved words (punctuation, etc)
if ($word_id > $DictReservedNoSpaces)
$context_str .= " ";
elseif ($word_id > $DictReservedSuffixes && $word_id <= $DictReservedPrefixes)
{
// This is a Prefix character
$context_str .= " ";
$noSpaceForNextChar = true;
}
elseif ($word_id > $DictReservedPrefixes) // this is a nospace character
$noSpaceForNextChar = true;
}
else
$noSpaceForNextChar = false;
if ($word_id == 0 || $word_id == 1 || $word_id >= $dict_count) // check if end of page or section
{
// if end of page occurs AFTER word pointer (ie: reached next page)
if ($noGoBack || ftell($fp_pagetext) > $origpos)
break; // then we stop.
else // if end of page occurs BEFORE word pointer (ie: reached previous page)
{
$context_str = "";// then we clear the existing context buffer we've created.
$i = 0;
}
}
else
$context_str .= $dict[$word_id][0];
$word_id = GetNextDictWord($fp_pagetext);
}
// remember the last end position
$last_endpos = ftell($fp_pagetext);
if (strcmp(trim($context_str), trim($titles[$ipage])) == 0)
{
$context_str = ""; // clear the string if its identical to the title
}
if ($context_str != "")
{
print " ... ";
$FoundContext = 1;
if ($Highlighting == 1)
PrintHighlightDescription($context_str);
else
print $context_str;
}
}
if ($FoundContext == 1)
print " ...";
print "
\n";
}
$info_str = "";
if ($DisplayTerms == 1)
{
$info_str .= $STR_RESULT_TERMS_MATCHED . " ". $output[$arrayline][2];
}
if ($DisplayScore == 1)
{
if (strlen($info_str) > 0)
$info_str .= " - ";
$info_str .= $STR_RESULT_SCORE . " " . $score;
}
if ($DisplayDate == 1)
{
if (strlen($info_str) > 0)
$info_str .= " - ";
//$info_str .= date("j M Y @ g:i A", $datetime[$ipage]) ." - ";
$info_str .= date("j M Y", $datetime[$ipage]);
}
if ($DisplayURL == 1)
{
if (strlen($info_str) > 0)
$info_str .= " - ";
$info_str .= $STR_RESULT_URL . " ".rtrim($urls[$ipage]);
}
print "
";
print $info_str;
print "
\n";
$arrayline++;
}
if ($DisplayContext == 1 || $AllowExactPhrase == 1)
fclose($fp_pagetext);
// Show links to other result pages
if ($num_pages > 1) {
// 10 results to the left of the current page
$start_range = $page - 10;
if ($start_range < 1)
$start_range = 1;
// 10 to the right
$end_range = $page + 10;
if ($end_range > $num_pages)
$end_range = $num_pages;
print "
\n" . $STR_RESULT_PAGES . " ";
if ($page > 1)
print "
<< " . $STR_RESULT_PAGES_PREVIOUS . " ";
for ($i = $start_range; $i <= $end_range; $i++) {
if ($i == $page) {
print $page." ";
} else {
print "
".$i." ";
}
}
if ($page != $num_pages)
print "
" . $STR_RESULT_PAGES_NEXT . " >> ";
}
print "
"; // end of results style tag
if ($Timing == 1 || $Logging == 1) {
$mtime = explode(" ", microtime());
$endtime = doubleval($mtime[1]) + doubleval($mtime[0]);
$difference = abs($starttime - $endtime);
$timetaken = number_format($difference, 3, '.', '');
if ($Timing == 1)
print "