|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T s
Length: 3593 (0xe09) Types: TextFile Names: »searchsbr.c«
└─⟦9ae75bfbd⟧ Bits:30007242 EUUGD3: Starter Kit └─⟦e7f64e0c0⟧ »EurOpenD3/mail/vmh.tar.Z« └─⟦dcb95597f⟧ └─⟦this⟧ »searchsbr.c«
#ifndef lint static char rcsid[] = "$Header: searchsbr.c,v 1.3 86/03/04 00:06:54 deboor Locked $"; static char notice[] = "This program is in the public domain and is available for unlimited \ distribution as long as this notice is enclosed."; #endif /* * routines for implementing a modified version of the Boyer-Moore search * algorithm. The modification consists of allowing lower case letters in the * key to match either upper or lowercase letters in file. Upper case letters * match only upper case letters. * * $Source: /c/support/deboor/usr/src/old/vmh/RCS/searchsbr.c,v $ * $Revision: 1.3 $ * $Author: deboor $ * * FUNCTIONS: * setoffsets sets up an array of offsets to pass to searchline * searchline search a line for a given key using the modifed * Boyer-Moore algorithm mentioned above. */ #include "vmh.h" #include "pick.h" /****** The Search Routines ********/ setoffsets (key, offs) Reg3 char *key; Reg4 offsetsP offs; { Reg2 u_char *cp; Reg1 int idx; Reg5 int len; len = strlen (key); for (idx=0; idx < 256; idx++) /* start it out with all == length of key */ offs[idx] = len; /* now form the offsets for the characters in the key */ for (cp = (u_char *) key, idx = 1; idx < len; cp++, idx++) { offs[*cp] = len - idx; if (isalpha(*cp) && islower (*cp)) offs[toupper(*cp)] = len - idx; } } /* * search_line (line, key, klen, offs, slen) char *line, *key; int klen; * register offsetsP offs; int slen; * This function is the real heart of the searching routines. It is * an algorithm which was taken from the November '84 issue of * Scientific American. It's almost as fast as egrep. It consists of * the following: * A line to search is passed in line with its length in slen . * A key for which to search is in key with its length in klen . * 'offs' is a 256 byte array of integers set up using the setoffsets() * function, above. See its header for a description. The function begins * with one pointer at the end of the key and another pointer at a similar * distance from the start of the line. If the characters at these posi- * tions match, the pointers are decremented and thesse values compared, * and so on until the key pointer is pointing before the start of the * key. In this case a match is declared and a 1 is returned. If the * characters at the two positions do *not* match, the offs array is * consulted at the position equal to the ascii value of the character * in the *line*. The line pointer is then advanced that many places * beyond where it originally started, the key pointer is reset and the * matching starts all over again. * If at some point the line pointer becomes greater than line+slen , * no match is declared and a 0 is returned. */ search_line (line, key, klen, offs, slen) char *line, *key; int klen; Reg2 offsetsP offs; int slen; { char *eokey = key - 1, /* "end of key" in loop */ *eol; /* "end of line" in loop */ Reg1 char *cp; /* current pointer */ Reg6 char *ec; /* end of current section (cp + klen) */ Reg5 char *ek; /* actual end of key */ Reg3 char *kp; Reg4 int next; klen--; ek = kp = key + klen; ec = cp = line + klen; eol = line + slen; while (kp != eokey && cp < eol) { if (*cp != *kp) { /* * lower case matches both lower case *and* upper case */ if (islower (*kp) && toupper(*kp) == *cp) { cp--, kp--; continue; } next = offs[(u_int) *cp] - (ek - kp); cp = (ec += next); kp = ek; } else { cp--, kp--; } } if (kp == eokey) return (1); else return (0); }