|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T s
Length: 3593 (0xe09)
Types: TextFile
Names: »searchsbr.c«
└─⟦9ae75bfbd⟧ Bits:30007242 EUUGD3: Starter Kit
└─⟦e7f64e0c0⟧ »EurOpenD3/mail/vmh.tar.Z«
└─⟦dcb95597f⟧
└─⟦this⟧ »searchsbr.c«
#ifndef lint
static char rcsid[] =
"$Header: searchsbr.c,v 1.3 86/03/04 00:06:54 deboor Locked $";
static char notice[] =
"This program is in the public domain and is available for unlimited \
distribution as long as this notice is enclosed.";
#endif
/*
* routines for implementing a modified version of the Boyer-Moore search
* algorithm. The modification consists of allowing lower case letters in the
* key to match either upper or lowercase letters in file. Upper case letters
* match only upper case letters.
*
* $Source: /c/support/deboor/usr/src/old/vmh/RCS/searchsbr.c,v $
* $Revision: 1.3 $
* $Author: deboor $
*
* FUNCTIONS:
* setoffsets sets up an array of offsets to pass to searchline
* searchline search a line for a given key using the modifed
* Boyer-Moore algorithm mentioned above.
*/
#include "vmh.h"
#include "pick.h"
/****** The Search Routines ********/
setoffsets (key, offs)
Reg3 char *key;
Reg4 offsetsP offs;
{
Reg2 u_char *cp;
Reg1 int idx;
Reg5 int len;
len = strlen (key);
for (idx=0; idx < 256; idx++) /* start it out with all == length of key */
offs[idx] = len;
/* now form the offsets for the characters in the key */
for (cp = (u_char *) key, idx = 1; idx < len; cp++, idx++) {
offs[*cp] = len - idx;
if (isalpha(*cp) && islower (*cp))
offs[toupper(*cp)] = len - idx;
}
}
/*
* search_line (line, key, klen, offs, slen) char *line, *key; int klen;
* register offsetsP offs; int slen;
* This function is the real heart of the searching routines. It is
* an algorithm which was taken from the November '84 issue of
* Scientific American. It's almost as fast as egrep. It consists of
* the following:
* A line to search is passed in line with its length in slen .
* A key for which to search is in key with its length in klen .
* 'offs' is a 256 byte array of integers set up using the setoffsets()
* function, above. See its header for a description. The function begins
* with one pointer at the end of the key and another pointer at a similar
* distance from the start of the line. If the characters at these posi-
* tions match, the pointers are decremented and thesse values compared,
* and so on until the key pointer is pointing before the start of the
* key. In this case a match is declared and a 1 is returned. If the
* characters at the two positions do *not* match, the offs array is
* consulted at the position equal to the ascii value of the character
* in the *line*. The line pointer is then advanced that many places
* beyond where it originally started, the key pointer is reset and the
* matching starts all over again.
* If at some point the line pointer becomes greater than line+slen ,
* no match is declared and a 0 is returned.
*/
search_line (line, key, klen, offs, slen)
char *line,
*key;
int klen;
Reg2 offsetsP offs;
int slen;
{
char *eokey = key - 1, /* "end of key" in loop */
*eol; /* "end of line" in loop */
Reg1 char *cp; /* current pointer */
Reg6 char *ec; /* end of current section (cp + klen) */
Reg5 char *ek; /* actual end of key */
Reg3 char *kp;
Reg4 int next;
klen--;
ek = kp = key + klen; ec = cp = line + klen; eol = line + slen;
while (kp != eokey && cp < eol) {
if (*cp != *kp) {
/*
* lower case matches both lower case *and* upper case
*/
if (islower (*kp) && toupper(*kp) == *cp) {
cp--, kp--;
continue;
}
next = offs[(u_int) *cp] - (ek - kp);
cp = (ec += next);
kp = ek;
} else {
cp--, kp--;
}
}
if (kp == eokey)
return (1);
else
return (0);
}