|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - downloadIndex: ┃ T s ┃
Length: 9128 (0x23a8) Types: TextFile Names: »sp.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki └─ ⟦this⟧ »EUUGD11/euug-87hel/sec1/sp/sp.c«
/* vi: set tabstop=4 : */ /* * Version 1.3 December 1986 * * sp - spell word * * Usage: sp [-f dictionary-list] [-eavc] [word ...] * * Compute the Soundex code for each word on the command line * (or each word on the standard input) and compare against a * dictionary * * The soundex dictionary list may be specified on the command line * The environment variable SPPATH may be set to a list of colon * separated pathnames of soundex dictionaries. * If a command line dictionary-list (a colon separated list of pathnames) is * given in addition to the SPPATH variable, all dictionaries are used. * * To reduce the size of the word list, certain heuristics are used: * the -a option causes all words matched to be printed * The output is alphabetically sorted and indicators are printed * beside each word: * X == exact match * ! == close match * * == near match * ' ' == matched * * Note that the maximum number of colliding words is MAXCOUNT due to the * data structure used. * * Permission is given to copy or distribute this program provided you * do not remove this header or make money off of the program. * * Please send comments and suggestions to: * * Barry Brachman * Dept. of Computer Science * Univ. of British Columbia * Vancouver, B.C. V6T 1W5 * * .. {ihnp4!alberta, uw-beaver}!ubc-vision!ubc-cs!brachman * brachman@cs.ubc.cdn * brachman%ubc.csnet@csnet-relay.arpa * brachman@ubc.csnet */ #include <sys/types.h> #include <sys/file.h> #include <ctype.h> #include <stdio.h> #ifdef NEWDBM #include <ndbm.h> #else !NEWDBM #include <dbm.h> #endif NEWDBM #include "sp.h" #define streq(X, Y) (!strcmp(X, Y)) #define range(S) ((strlen(S) + 4) / 5) #define USAGE "Usage: sp [-f dictionary-list] [-eavc] [word ...]" char word[MAXWORDLEN + 2]; datum FETCH(); char *fileptr[MAXDICT + 1]; /* Up to MAXDICT dictionaries + sentinel */ int dict_ptr = 0; char *wordptr[MAXWORDS], *wordlistptr; char wordlist[WORDSPACE]; int nmatched; /* * Soundex codes * The program depends upon the numbers zero through six being used * but this can easily be changed */ char soundex_code_map[26] = { /*** A B C D E F G H I J K L M N O P ***/ 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1, /*** Q R S T U V W X Y Z ***/ 2, 6, 2, 3, 0, 1, 0, 2, 0, 2 }; int aflag, cflag, eflag, vflag; main(argc, argv) int argc; char **argv; { register int fflag, i; register char *p; char *getenv(); argc--; argv++; fileptr[0] = (char *) NULL; while (argc > 0 && argv[0][0] == '-') { fflag = 0; /* to break out of following loop... */ for (i = 1; argv[0][i] != '\0' && fflag == 0; i++) { switch (argv[0][i]) { case 'a': aflag = 1; break; case 'c': cflag = 1; break; case 'e': eflag = 1; break; case 'f': if (argc == 1) { fprintf(stderr, "%s\n", USAGE); exit(1); } mkfilelist(argv[1]); argc--; argv++; fflag = 1; /* break out of loop */ break; case 'v': vflag = 1; break; default: fprintf(stderr, "%s\n", USAGE); exit(1); } } argc--, argv++; } if ((p = getenv("SPPATH")) != (char *) NULL) mkfilelist(p); if (fileptr[0] == (char *) NULL) mkfilelist(DEFAULT_SPPATH); if (vflag) { printf("Using dictionaries:\n"); for (i = 0; fileptr[i] != (char *) NULL; i++) if (strlen(fileptr[i]) > 0) printf("\t%s\n", fileptr[i]); } if (argc) { for (i = 0; i < argc; i++) { if (!eflag) printf("%s:\n", argv[i]); apply(argv[i]); if (!eflag) printf("\n"); } } else { int ch, len; while (1) { printf("Word? "); if (fgets(word, sizeof(word), stdin) == (char *) NULL) { printf("\n"); break; } len = strlen(word); if (word[len - 1] != '\n') { fprintf(stderr, "sp: Word too long: %s", word); while ((ch = getchar()) != '\n') /* flush rest of line */ putc(ch, stderr); putc('\n', stderr); continue; } word[--len] = '\0'; if (len > MAXWORDLEN) { fprintf(stderr, "sp: Word too long: %s\n", word); continue; } apply(word); if (!eflag) printf("\n"); } } } /* * Apply the Soundex search for a word to each dictionary in turn * Note that 'DBMINIT' opens both the '.dir' and the '.pag' files * and we must close them to avoid running out of file descriptors * * This routine gets called each time a word is looked up and therefore * the dbm files may be repeatedly opened and closed. Since the vast majority * of the time this program is invoked for just a single word it doesn't seem * worthwhile to do the right thing by saving file descriptors/DBM pointers. * There probably won't be more than two dictionaries in use anyway. */ apply(word) char *word; { register int code, i, nodicts; nmatched = 0; wordlistptr = wordlist; if ((code = soundex(word, 3)) == BAD_WORD) return; nodicts = 1; for (i = 0; fileptr[i] != (char *) NULL; i++) { if (strlen(fileptr[i]) == 0) continue; if (DBMINIT(fileptr[i], O_RDONLY) != -1) { proc(code); nodicts = 0; } DBMCLOSE(); } if (nodicts) { fprintf(stderr, "sp: Can't open any dictionaries\n"); exit(1); } if (vflag && !eflag && nmatched == 0) printf("%s: no match\n", word); else choose(word); } /* * Look the word up in the current dictionary * and save all the matches * Note that only three digits are of the Soundex code are stored * in a dictionary */ proc(soundex) int soundex; { register int c, len; datum dbm_key, dbm_content; key_t *key, keyvec[KEYSIZE]; char *mk_word(), *p; key = keyvec; dbm_key.dptr = (char *) key; dbm_key.dsize = KEYSIZE; c = 0; while (1) { mk_key(key, soundex, c); dbm_content = FETCH(dbm_key); if (dbm_content.dptr == 0) break; if (IS_DELETED(dbm_content)) { if (++c > MAXCOUNT) { fprintf(stderr, "sp: entry count overflow\n"); exit(1); } continue; } if (nmatched == MAXWORDS) { fprintf(stderr, "sp: Too many matches\n"); exit(1); } p = mk_word(dbm_content.dptr, dbm_content.dsize, soundex); len = strlen(p); if (wordlistptr + len >= &wordlist[WORDSPACE]) { fprintf(stderr, "sp: Out of space for words\n"); exit(1); } strncpy(wordlistptr, p, len); wordlistptr[len] = '\0'; wordptr[nmatched++] = wordlistptr; wordlistptr += len + 1; if (++c > MAXCOUNT) { fprintf(stderr, "sp: entry count overflow\n"); exit(1); } } } /* * Select and print those words which we consider * to have matched 'word' */ choose(word) register char *word; { register int c, code, i, len, mcount, wordlen; register char *p; int compar(); code = soundex(word, 4); qsort(wordptr, nmatched, sizeof(char *), compar); c = range(word); wordlen = strlen(word); mcount = 0; for (i = 0; i < nmatched; i++) { p = wordptr[i]; if (strmatch(word, p) == 0) { printf("X"); if (eflag) { printf(" %s\n", word); return; } } else if (eflag) continue; else if (soundex(p, 4) == code) printf("!"); else if (aflag && (wordlen < (len = strlen(p)) - c || len > wordlen + c)) printf(" "); else if (!cflag) printf("*"); else continue; printf("%3d. %s\n", mcount + 1, p); mcount++; } if (vflag) printf("(%d total matches)\n", nmatched); } /* * Compute an 'n' digit Soundex code for 'word' * See mksp.c */ soundex(word, n) register char *word; int n; { register int c, digit_part, previous_code, soundex_length; register char *p, *w; char wcopy[MAXWORDLEN + 2]; strcpy(wcopy, word); p = w = wcopy; while (*p != '\0') { if (isupper(*p)) *p = tolower(*p); p++; } if (!isalpha(*w)) { fprintf(stderr, "sp: Improper word: %s\n", word); return(BAD_WORD); } digit_part = 0; soundex_length = 0; previous_code = soundex_code_map[*w - 'a']; for (p = w + 1; *p != '\0' && soundex_length < n; p++) { if (!isalpha(*p)) continue; c = soundex_code_map[*p - 'a']; if (c == 0 || previous_code == c) { previous_code = c; continue; } digit_part = digit_part * 7 + c; previous_code = c; soundex_length++; } while (soundex_length++ < n) digit_part *= 7; return((digit_part << 5) + *w - 'a'); } /* * Process a path string (environment variable SPPATH, DEFAULT_SPPATH, or an * arg) by separating the pathnames into strings pointed to by elements * of 'fileptr' * End of list indicated by fileptr entry of NULL * * No attempt made to ignore duplicate pathnames */ mkfilelist(p) register char *p; { register int len; register char *path, *start; char *malloc(); while (*p != '\0' && dict_ptr < MAXDICT) { start = p; while (*p != ':' && *p != '\0') p++; if (start == p && *p == ':') { /* colon with nothing else */ p++; continue; } len = p - start; path = (char *) malloc((unsigned) (len + 1)); if (path == (char *) NULL) { fprintf(stderr, "sp: Out of dictionary space\n"); exit(1); } strncpy(path, start, len); path[len] = '\0'; fileptr[dict_ptr++] = path; } fileptr[dict_ptr] = (char *) NULL; } compar(p, q) char **p, **q; { return(strmatch(*p, *q)); /* return(strcmp(*p, *q)); */ /* use if you prefer case sensitive */ }