DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - download
Index: ┃ T s

⟦d3aa77410⟧ TextFile

    Length: 9128 (0x23a8)
    Types: TextFile
    Names: »sp.c«

Derivation

└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
    └─ ⟦this⟧ »EUUGD11/euug-87hel/sec1/sp/sp.c« 

TextFile

/* vi: set tabstop=4 : */

/*
 * Version 1.3 December 1986
 *
 * sp - spell word
 *
 * Usage:	sp [-f dictionary-list] [-eavc] [word ...]
 *
 * Compute the Soundex code for each word on the command line
 * (or each word on the standard input) and compare against a
 * dictionary
 *
 * The soundex dictionary list may be specified on the command line
 * The environment variable SPPATH may be set to a list of colon
 * separated pathnames of soundex dictionaries.
 * If a command line dictionary-list (a colon separated list of pathnames) is
 * given in addition to the SPPATH variable, all dictionaries are used.
 *
 * To reduce the size of the word list, certain heuristics are used:
 * the -a option causes all words matched to be printed
 * The output is alphabetically sorted and indicators are printed
 * beside each word:
 *	X   == exact match
 *	!   == close match
 *	*   == near match
 * ' '  == matched
 *
 * Note that the maximum number of colliding words is MAXCOUNT due to the
 * data structure used.
 *
 * Permission is given to copy or distribute this program provided you
 * do not remove this header or make money off of the program.
 *
 * Please send comments and suggestions to:
 *
 * Barry Brachman
 * Dept. of Computer Science
 * Univ. of British Columbia
 * Vancouver, B.C. V6T 1W5
 *
 * .. {ihnp4!alberta, uw-beaver}!ubc-vision!ubc-cs!brachman
 * brachman@cs.ubc.cdn
 * brachman%ubc.csnet@csnet-relay.arpa
 * brachman@ubc.csnet
 */

#include <sys/types.h>
#include <sys/file.h>
#include <ctype.h>
#include <stdio.h>

#ifdef NEWDBM
#include <ndbm.h>
#else !NEWDBM
#include <dbm.h>
#endif NEWDBM

#include "sp.h"

#define streq(X, Y)	(!strcmp(X, Y))
#define range(S)	((strlen(S) + 4) / 5)

#define USAGE		"Usage: sp [-f dictionary-list] [-eavc] [word ...]"

char word[MAXWORDLEN + 2];

datum FETCH();

char *fileptr[MAXDICT + 1];	/* Up to MAXDICT dictionaries + sentinel */
int dict_ptr = 0;

char *wordptr[MAXWORDS], *wordlistptr;
char wordlist[WORDSPACE];
int nmatched;

/*
 * Soundex codes
 * The program depends upon the numbers zero through six being used
 * but this can easily be changed
 */
char soundex_code_map[26] = {
/***	 A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P	***/ 
		 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1,

/***	 Q  R  S  T  U  V  W  X  Y  Z			***/
		 2, 6, 2, 3, 0, 1, 0, 2, 0, 2
};

int aflag, cflag, eflag, vflag;

main(argc, argv)
int argc;
char **argv;
{
	register int fflag, i;
	register char *p;
	char *getenv();

	argc--; argv++;
	fileptr[0] = (char *) NULL;
	while (argc > 0 && argv[0][0] == '-') {
		fflag = 0;		/* to break out of following loop... */
		for (i = 1; argv[0][i] != '\0' && fflag == 0; i++) {
			switch (argv[0][i]) {
			case 'a':
				aflag = 1;
				break;
			case 'c':
				cflag = 1;
				break;
			case 'e':
				eflag = 1;
				break;
			case 'f':
				if (argc == 1) {
					fprintf(stderr, "%s\n", USAGE);
					exit(1);
				}
				mkfilelist(argv[1]);
				argc--;
				argv++;
				fflag = 1;		/* break out of loop */
				break;
			case 'v':
				vflag = 1;
				break;
			default:
				fprintf(stderr, "%s\n", USAGE);
				exit(1);
			}
		}
		argc--, argv++;
	}

	if ((p = getenv("SPPATH")) != (char *) NULL)
		mkfilelist(p);
	if (fileptr[0] == (char *) NULL)
		mkfilelist(DEFAULT_SPPATH);
	if (vflag) {
		printf("Using dictionaries:\n");
		for (i = 0; fileptr[i] != (char *) NULL; i++)
			if (strlen(fileptr[i]) > 0)
				printf("\t%s\n", fileptr[i]);
	}
	if (argc) {
		for (i = 0; i < argc; i++) {
			if (!eflag)
				printf("%s:\n", argv[i]);
			apply(argv[i]);
			if (!eflag)
				printf("\n");
		}
	}
	else {
		int ch, len;

		while (1) {
			printf("Word? ");
			if (fgets(word, sizeof(word), stdin) == (char *) NULL) {
				printf("\n");
				break;
			}
			len = strlen(word);
			if (word[len - 1] != '\n') {
				fprintf(stderr, "sp: Word too long: %s", word);
				while ((ch = getchar()) != '\n')	/* flush rest of line */
					putc(ch, stderr);
				putc('\n', stderr);
				continue;
			}
			word[--len] = '\0';
			if (len > MAXWORDLEN) {
				fprintf(stderr, "sp: Word too long: %s\n", word);
				continue;
			}

			apply(word);
			if (!eflag)
				printf("\n");
		}
	}
}

/*
 * Apply the Soundex search for a word to each dictionary in turn
 * Note that 'DBMINIT' opens both the '.dir' and the '.pag' files
 * and we must close them to avoid running out of file descriptors
 *
 * This routine gets called each time a word is looked up and therefore
 * the dbm files may be repeatedly opened and closed.  Since the vast majority
 * of the time this program is invoked for just a single word it doesn't seem
 * worthwhile to do the right thing by saving file descriptors/DBM pointers.
 * There probably won't be more than two dictionaries in use anyway.
 */
apply(word)
char *word;
{
	register int code, i, nodicts;

	nmatched = 0;
	wordlistptr = wordlist;
	if ((code = soundex(word, 3)) == BAD_WORD)
		return;
	nodicts = 1;
	for (i = 0; fileptr[i] != (char *) NULL; i++) {
		if (strlen(fileptr[i]) == 0)
			continue;
		if (DBMINIT(fileptr[i], O_RDONLY) != -1) {
			proc(code);
			nodicts = 0;
		}
		DBMCLOSE();
	}
	if (nodicts) {
		fprintf(stderr, "sp: Can't open any dictionaries\n");
		exit(1);
	}
	if (vflag && !eflag && nmatched == 0)
		printf("%s: no match\n", word);
	else
		choose(word);
}

/*
 * Look the word up in the current dictionary
 * and save all the matches
 * Note that only three digits are of the Soundex code are stored
 * in a dictionary
 */
proc(soundex)
int soundex;
{
	register int c, len;
	datum dbm_key, dbm_content;
	key_t *key, keyvec[KEYSIZE];
	char *mk_word(), *p;

	key = keyvec;
	dbm_key.dptr = (char *) key;
	dbm_key.dsize = KEYSIZE;
	c = 0;
	while (1) {
		mk_key(key, soundex, c);
		dbm_content = FETCH(dbm_key);

		if (dbm_content.dptr == 0)
			break;

		if (IS_DELETED(dbm_content)) {
			if (++c > MAXCOUNT) {
				fprintf(stderr, "sp: entry count overflow\n");
				exit(1);
			}
			continue;
		}

		if (nmatched == MAXWORDS) {
			fprintf(stderr, "sp: Too many matches\n");
			exit(1);
		}

		p = mk_word(dbm_content.dptr, dbm_content.dsize, soundex);
		len = strlen(p);
		if (wordlistptr + len >= &wordlist[WORDSPACE]) {
			fprintf(stderr, "sp: Out of space for words\n");
			exit(1);
		}
		strncpy(wordlistptr, p, len);
		wordlistptr[len] = '\0';
		wordptr[nmatched++] = wordlistptr;
		wordlistptr += len + 1;
		if (++c > MAXCOUNT) {
			fprintf(stderr, "sp: entry count overflow\n");
			exit(1);
		}
	}
}

/*
 * Select and print those words which we consider
 * to have matched 'word'
 */
choose(word)
register char *word;
{
	register int c, code, i, len, mcount, wordlen;
	register char *p;
	int compar();

	code = soundex(word, 4);
	qsort(wordptr, nmatched, sizeof(char *), compar);
	c = range(word);
	wordlen = strlen(word);
	mcount = 0;
	for (i = 0; i < nmatched; i++) {
		p = wordptr[i];
		if (strmatch(word, p) == 0) {
			printf("X");
			if (eflag) {
				printf(" %s\n", word);
				return;
			}
		}
		else if (eflag)
			continue;
		else if (soundex(p, 4) == code)
			printf("!");
		else if (aflag &&
			(wordlen < (len = strlen(p)) - c || len > wordlen + c))
			printf(" ");
		else if (!cflag)
			printf("*");
		else
			continue;
		printf("%3d. %s\n", mcount + 1, p);
		mcount++;
	}
	if (vflag)
		printf("(%d total matches)\n", nmatched);
}

/*
 * Compute an 'n' digit Soundex code for 'word' 
 * See mksp.c
 */
soundex(word, n)
register char *word;
int n;
{
	register int c, digit_part, previous_code, soundex_length;
	register char *p, *w;
	char wcopy[MAXWORDLEN + 2];

	strcpy(wcopy, word);
	p = w = wcopy;
	while (*p != '\0') {
		if (isupper(*p))
			*p = tolower(*p);
		p++;
	}
	if (!isalpha(*w)) {
		fprintf(stderr, "sp: Improper word: %s\n", word);
		return(BAD_WORD);
	}
	digit_part = 0;
	soundex_length = 0;
	previous_code = soundex_code_map[*w - 'a'];
	for (p = w + 1; *p != '\0' && soundex_length < n; p++) {
		if (!isalpha(*p))
			continue;
		c = soundex_code_map[*p - 'a'];
		if (c == 0 || previous_code == c) {
			previous_code = c;
			continue;
		}
		digit_part = digit_part * 7 + c;
		previous_code = c;
		soundex_length++;
	}
	while (soundex_length++ < n)
		digit_part *= 7;
	return((digit_part << 5) + *w - 'a');
}

/*
 * Process a path string (environment variable SPPATH, DEFAULT_SPPATH, or an
 * arg) by separating the pathnames into strings pointed to by elements
 * of 'fileptr'
 * End of list indicated by fileptr entry of NULL
 *
 * No attempt made to ignore duplicate pathnames
 */
mkfilelist(p)
register char *p;
{
	register int len;
	register char *path, *start;
	char *malloc();

	while (*p != '\0' && dict_ptr < MAXDICT) {
		start = p;
		while (*p != ':' && *p != '\0')
			p++;
		if (start == p && *p == ':') {	/* colon with nothing else */
			p++;
			continue;
		}
		len = p - start;
		path = (char *) malloc((unsigned) (len + 1));
		if (path == (char *) NULL) {
			fprintf(stderr, "sp: Out of dictionary space\n");
			exit(1);
		}
		strncpy(path, start, len);
		path[len] = '\0';
		fileptr[dict_ptr++] = path;
	}
	fileptr[dict_ptr] = (char *) NULL;
}

compar(p, q)
char **p, **q;
{

	return(strmatch(*p, *q));
/*	return(strcmp(*p, *q)); */	/* use if you prefer case sensitive */
}