|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T f
Length: 6334 (0x18be)
Types: TextFile
Names: »funiq.c«
└─⟦060c9c824⟧ Bits:30007080 DKUUG TeX 2/12/89
└─⟦this⟧ »./babel/swedish/SLaTeX/spell/funiq.c«
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
└─⟦23cd347d5⟧ »unix3.0/babel.tar.Z«
└─⟦2fb9f645a⟧
└─⟦this⟧ »babel/swedish/SLaTeX/spell/funiq.c«
/* funiq.c
*
* Written by Bill Vaughn,
* CVS, Univ. of Rochester, Rochester, NY
* {seismo,allegra,decvax}!rochester!ur-cvsvax!bill
*
* This program reads one line from the standard input, searches for
* the line in an evolving binary tree sorted alphabetically.
* If the line is NOT found, it is inserted in the tree and put on
* the standard output. No action is taken if the line IS found.
*
* The program is intended to work with one-word-per-line input,
* but doesn't enforce that rule. The input to 'funiq' should be
* random, NOT sorted. Sorted input gives worst case running time.
* The program dynamically allocates all of its array space and uses
* a binary tree storage method which, according to theory, has an
* average search time proportional to log(N), where N is the number
* of nodes in the tree at the time the search is performed.
*
* Principal uses:
* 1) 'Funiq' can replace the 'sort -u' in the pipeline in /usr/bin/spell.
* 'Funiq' will not block the pipe and it will run faster than sort.
* The input to /usr/lib/spell doesn't have to be sorted anyway; the
* final sort in the pipe does that. The reason for the 'sort -u' at
* that point in the pipeline is to limit the calls to the hashing
* function in /usr/lib/spell. 'Funiq' accomplishes this task much
* more efficiently.
*
* 2) Also, 'funiq' can replace the default mode of 'uniq' in cases where
* the output need not be sorted. The input should not be sorted
* (as opposed to 'uniq', where is must be sorted.)
*
* Possible improvements:
* Use a balancing method on the binary tree to improve
* performance. See Knuth, 'Searching and Sorting', pp. 451-463
* for an algorithm to balance binary trees during insertion.
* It will require an extra byte of data per node however.
* (Further analysis reveals that this might not be worth the effort.)
*
* Compilation:
* cc -O funiq.c -o funiq
* or cc -O -DSTATS funiq.c -o funiq (produces statistics)
*/
#include <stdio.h>
/* The following defines the binary tree node structure
*/
struct btreenode {
char *str;
struct btreenode *left, *right;
};
struct btreenode *head; /* binary tree head*/
struct btreenode *alloc, *hend; /* allocator pointers */
char *cstore, *cend; /* character array begining and end */
char *nextstr(), *allocstr();
struct btreenode *nextnode(), *allocbtree();
char *calloc(), *malloc(), *gets();
/* Feel free to change the following allocation parameters as desired/needed.
* One can spend as much or as little time in 'malloc/calloc' as one wants.
*/
#define NODES 4096 /* Initial allocation for btree nodes */
#define STR 8192 /* Initial allocation for strings */
#define ADDNOD 1024 /* Additional allocation for btree nodes */
#define ADDSTR 2048 /* Additional allocation for strings */
#ifdef STATS
int charsused = 0;
char statfile[] = "/usr/dict/bstats";
#endif
main(argc,argv)
char *argv[];
{
register int n;
register char *q;
register struct btreenode *r;
#ifdef STATS
register ncmps = 0;
register maxdepth = 0;
register nodesused = 0;
register depth;
register total = 0;
FILE *f;
#endif
if (argc > 1) {
if (freopen(argv[1],"r",stdin) == NULL) {
fprintf(stderr,"funiq: %s not accessible\n",argv[1]);
exit(1);
}
}
/* Initialize the binary tree with first string of the input.
* Being a little more selective here might help
* balance the tree, but who knows? Random input is
* supposed to produce a reasonably balanced tree.
*/
q = cstore = allocstr(STR); /* allocate string space */
cend = cstore + STR;
if (gets(q)==NULL) /* get first line */
exit(0);
alloc = head = allocbtree(NODES);/* allocate tree nodes */
hend = head + NODES; /* we're depending upon 'calloc' */
alloc->str = q; /* to set all the pointers to NULL */
puts(q); /* output the sucker */
#ifdef STATS
total++;
nodesused++;
#endif
q = nextstr(q); /* get new string pointer. */
/* Traverse tree with each input line, inserting and printing
* it if it's NOT found and discarding it if it is.
*/
while(gets(q)!=NULL) {
r = head;
#ifdef STATS
total++;
depth=0;
while ((ncmps++ , (n = strcmp(q,r->str))) != 0) {
#else
while ((n = strcmp(q,r->str)) != 0) {
#endif
if (n < 0) {
if (r->left != NULL) {
r = r->left;
#ifdef STATS
depth++;
#endif
continue; /* traversing left */
}
else {
/* Creating a left subtree */
r->left = nextnode();
#ifdef STATS
nodesused++;
#endif
puts( (r->left)->str = q );
q = nextstr(q);
break; /* get next line */
}
}
else {
if (r->right != NULL) {
r = r->right;
#ifdef STATS
depth++;
#endif
continue; /* traversing right */
}
else {
/* Creating a right subtree */
r->right = nextnode();
#ifdef STATS
nodesused++;
#endif
puts( (r->right)->str = q );
q = nextstr(q);
break; /* get next line */
}
}
}
#ifdef STATS
if (++depth > maxdepth)
maxdepth = depth;
}
f = fopen(statfile,"a");
fprintf(f,"%d\t%d\t%d\t%d\t%d\n",
charsused,nodesused,total,ncmps,maxdepth);
fclose(f);
#else
}
#endif
exit(0); /* That was fairly simple wasn't it? */
}
/*
* Returns pointer to an array of 'n' btree nodes.
*/
struct btreenode *allocbtree(n)
{
register struct btreenode *x;
x = (struct btreenode *)
calloc((unsigned)n,(unsigned)sizeof(struct btreenode));
if (x == NULL) {
fprintf(stderr,"funiq: not enough memory for tree nodes\n");
exit(1);
}
return(x);
}
/*
* Returns a pointer to the next available btree node.
* Gets more space if necessary.
*/
struct btreenode *nextnode()
{
if (++alloc >= hend ) {
alloc = allocbtree(ADDNOD);
hend = alloc + ADDNOD;
}
return(alloc);
}
/*
* Returns pointer to 'n' bytes for character string storage.
*/
char *allocstr(n)
{
register char *x;
x = malloc((unsigned)n);
if (x == NULL) {
fprintf(stderr,"funiq: not enough memory for strings\n");
exit(1);
}
return(x);
}
/*
* Returns a pointer to the next available string space.
* Gets more space if necessary.
*/
char *nextstr(s)
register char *s;
{
register char *x;
register int i;
#ifdef STATS
charsused += (i = strlen(s) + 1);
#else
i = strlen(s) + 1;
#endif
if ((x = s + i) >= cend) {
x = allocstr(ADDSTR);
cend = x + ADDSTR;
}
return(x);
}