⟦aef85808f⟧

TextFile

#ifndef lint
static char rcsid[] =
	"$Header: searchsbr.c,v 1.3 86/03/04 00:06:54 deboor Locked $";

static char notice[] =
	"This program is in the public domain and is available for unlimited \
distribution as long as this notice is enclosed.";
#endif

/*
 * routines for implementing a modified version of the Boyer-Moore search
 * algorithm. The modification consists of allowing lower case letters in the
 * key to match either upper or lowercase letters in file. Upper case letters
 * match only upper case letters.
 *
 * $Source: /c/support/deboor/usr/src/old/vmh/RCS/searchsbr.c,v $
 * $Revision: 1.3 $
 * $Author: deboor $
 *
 * FUNCTIONS:
 *	setoffsets	sets up an array of offsets to pass to searchline
 *	searchline	search a line for a given key using the modifed
 *			Boyer-Moore algorithm mentioned above.
 */
#include "vmh.h"
#include "pick.h"


		/****** The Search Routines ********/
setoffsets (key, offs)
	Reg3	char		*key;
	Reg4	offsetsP	offs;
{
	Reg2	u_char	*cp;
	Reg1	int	idx;
	Reg5	int	len;

	len = strlen (key);
	
	for (idx=0; idx < 256; idx++)	/* start it out with all == length of key */
		offs[idx] = len;

	/* now form the offsets for the characters in the key */

	for (cp = (u_char *) key, idx = 1; idx < len; cp++, idx++) {
		offs[*cp] = len - idx;
		if (isalpha(*cp) && islower (*cp))
			offs[toupper(*cp)] = len - idx;
	}

}

/*
 * search_line (line, key, klen, offs, slen) char *line, *key; int klen;
 *   register offsetsP offs; int slen;
 *	This function is the real heart of the searching routines. It is
 *	an algorithm which was taken from the November '84 issue of
 *	Scientific American. It's almost as fast as egrep. It consists of
 *	the following:
 *	A line to search is passed in  line  with its length in  slen .
 *	A key for which to search is in  key  with  its length in  klen .
 *	'offs' is a 256 byte array of integers set up using the setoffsets()
 *	function, above. See its header for a description. The function begins
 *	with one pointer at the end of the key and another pointer at a similar
 *	distance from the start of the line. If the characters at these posi-
 *	tions match, the pointers are decremented and thesse values compared,
 *	and so on until the key pointer is pointing before the start of the
 *	key. In this case a match is declared and a 1 is returned. If the
 *	characters at the two positions do *not* match, the  offs  array is
 *	consulted at the position equal to the ascii value of the character
 *	in the *line*. The line pointer is then advanced that many places
 *	beyond where it originally started, the key pointer is reset and the
 *	matching starts all over again.
 *	If at some point the line pointer becomes greater than  line+slen ,
 *	no match is declared and a 0 is returned.
 */
search_line (line, key, klen, offs, slen)
	char		*line,
			*key;
	int		klen;
	Reg2   offsetsP	offs;
	int		slen;
{
	char		*eokey = key - 1,	/* "end of key" in loop */
			*eol;			/* "end of line" in loop */
	
	Reg1	char	*cp;	/* current pointer */
	Reg6	char	*ec;	/* end of current section (cp + klen) */
	Reg5	char	*ek;	/* actual end of key */
	Reg3	char	*kp;
	Reg4	int	next;

	klen--;
	ek = kp = key + klen; ec = cp = line + klen; eol = line + slen;
	while (kp != eokey && cp < eol) {
		if (*cp != *kp) {
			/*
			 * lower case matches both lower case *and* upper case
			 */
			if (islower (*kp) && toupper(*kp) == *cp) {
				cp--, kp--;
				continue;
			}
			next = offs[(u_int) *cp] - (ek - kp);
			cp = (ec += next);
			kp = ek;
		}  else  {
			cp--, kp--;
		}
	}
	if (kp == eokey) 
		return (1);
	else
		return (0);
}
DataMuseum.dk

DKUUG/EUUG Conference tapes

⟦aef85808f⟧ TextFile

Derivation

TextFile