|
|
DataMuseum.dkPresents historical artifacts from the history of: Commodore CBM-900 |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about Commodore CBM-900 Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - download
Length: 8445 (0x20fd)
Types: TextFile
Notes: UNIX file
Names: »diff1.c«
└─⟦f27320a65⟧ Bits:30001972 Commodore 900 hard disk image with partial source code
└─⟦f4b8d8c84⟧ UNIX Filesystem
└─⟦this⟧ »cmd/diff/diff1.c«
/*
* Parts of diff that depend on the minimal common
* subsequence algorithm of Hirschberg.
*/
#include "diff.h"
#include <mdata.h>
#define U unsigned /* Short form */
#define NSBRK 512 /* Size of each sbrk -- allocator */
typedef unsigned int hash_t;
#if NBPCHAR==16 /* Short address machines */
#define LAST ((vaddr_t)MININT)
typedef unsigned short vaddr_t;
#else
#define LAST ((vaddr_t)MINLONG)
typedef unsigned long vaddr_t;
#endif
/* Assume MININT is a single bit */
static char partial[] = "diff: partial line omitted from %s\n";
static char jackpot[] = "Jackpot %d %d\n";
/*
* Tables for the table-driven CRC16 algorithm.
* This is used as the hash of the input lines
* and should be relatively uniform statistically.
*/
static hash_t crctab1[] = {
0000000, 0140301, 0140601, 0000500,
0141401, 0001700, 0001200, 0141101,
0143001, 0003300, 0003600, 0143501,
0002400, 0142701, 0142201, 0002100,
};
static hash_t crctab2[] = {
0000000, 0146001, 0154001, 0012000,
0170001, 0036000, 0024000, 0162001,
0120001, 0066000, 0074000, 0132001,
0050000, 0116001, 0104001, 0043000,
};
typedef struct LINES {
vaddr_t l_num;
hash_t l_hash;
} LINES;
/*
* A K-candidate entry
*/
typedef struct CAND {
vaddr_t a;
vaddr_t b;
struct CAND *prev;
} CAND;
int compar();
hash_t inhash();
CAND *candidate();
char line1[LSIZE]; /* First file's input line buffer */
char line2[LSIZE];
char nomem[] = "Out of memory";
int *allrp;
int *allep;
/*
* Called to invoke heurist version of
* diff.
*/
diffh(args)
char **args;
{
execv("/usr/lib/diffh", args);
cerr("-h doesn't work");
}
/*
* Routine is given two file streams and
* it produces the minimal list of
* differences by the Hirschberg algorithm
*/
diff(fp1, fp2)
FILE *fp1, *fp2;
{
register hash_t hash;
register vaddr_t ln1;
register vaddr_t ln2;
register LINES *V;
register vaddr_t *P;
register vaddr_t *E;
register vaddr_t *K;
register vaddr_t *J;
/*
* Calculate the hash tables for
* the second file
* and sort with hash as primary key,
▶15◀ * line-number secondary.
*/
V = (LINES *)alloc(sizeof (LINES));
V->l_num = V->l_hash = 0; /* unneeded? */
ln2 = 0;
while ((hash = inhash(fp2, fn2)) != 0) {
register LINES *Vp;
Vp = (LINES *)alloc(sizeof (LINES));
Vp->l_num = ++ln2;
Vp->l_hash = hash;
}
qsort(V+1, (int)ln2, sizeof (LINES), compar);
/*
* Read first file, building
* a table per line of
* pointers to the first elements
* of the hash-equivalent list in file2.
*/
P = (vaddr_t *)alloc(sizeof (vaddr_t));
for (ln1=0; (hash = inhash(fp1, fn1))!=0; ln1++) {
register vaddr_t *Pp;
register vaddr_t lo, mid, hi;
Pp = (vaddr_t *)alloc(sizeof (vaddr_t));
lo = 1;
hi = ln2;
while (lo <= hi) {
mid = (lo+hi)/2;
if (hash <= V[mid].l_hash)
hi = mid-1; else
lo = mid+1;
}
if (hi!=0 && V[hi].l_hash==V[hi+1].l_hash)
cerr("fatal search botch");
if (hash == V[++hi].l_hash)
*Pp = hi; else
*Pp = 0;
}
/*
* Throw away the hash values.
* Mark the last line of each hash-equivalent
* class of lines and fake line 0.
*/
{
register vaddr_t *Ep, *eEp;
register LINES *Vp;
E = V;
E[0] = LAST;
for (Ep=E+1, Vp=V+1, eEp=&E[ln2]; Ep <= eEp; Vp++, Ep++) {
*Ep = Vp->l_num;
if (Ep==eEp || Vp->l_hash!=(Vp+1)->l_hash)
*Ep |= LAST;
}
}
{
register int i;
register vaddr_t *Pp;
Pp = &E[ln2+3];
for (i=1; i<=ln1; i++)
Pp[i] = P[i];
P = Pp;
ralloc(&Pp[ln1+1]);
}
/*
* Set up to build list of K-candidates for
* all k. The K table overwrites P (when
* we add K[k+2] to the table we no longer
* need P[k].
*/
{
register vaddr_t k;
register vaddr_t i;
register CAND *cp;
K = &E[ln2+1];
K[0] = candidate((vaddr_t)0, (vaddr_t)0, NULL);
K[1] = candidate(ln1+1, ln2+1, NULL);
k = 0;
for (i=1; i<=ln1; i++) {
if (P[i] != 0)
k = merge(K, k, i, E, P[i]);
}
/*
* `k' is the length of the maximal
* common subsequence, thus K[k] points
* to the list of lines in the subsequnce.
* Store the list in `J' in a handier
* format. E, K (all but K[k]) and P
* are redundant so J overlays them.
*/
cp = K[k];
J = E;
for (i=0; i<=ln1; i++)
J[i] = 0;
J[ln1+1] = ln2+1; /* fence */
while (cp != NULL) {
J[cp->a] = cp->b;
cp = cp->prev;
}
}
{
register vaddr_t i, j, k, l;
/*
* Matching lines are now stored as pairs
* (i, J[i]), J[i]!=0. Produce output
* scripts and look for jackpots.
*/
rewind(fp1);
rewind(fp2);
fgets(line1, LSIZE, fp1);
fgets(line2, LSIZE, fp2);
for (i=j=1; i<=ln1 || j<=ln2; ) {
if (j == J[i]) {
if (!(*equal)(line1, line2)) {
/*
* This can be avoided at
* the expense of another
* pass. So we settle for this
* potentially non-minimal result.
* Possibly this printout could
* disappear.
*/
fprintf(stderr, jackpot, i, j);
change( (U)i, (U)i, (U)j, (U)j );
text1(line1);
prsep();
text2(line2);
prend();
} else
text(line1);
i++;
fgets(line1, LSIZE, fp1);
j++;
fgets(line2, LSIZE, fp2);
continue;
}
for (k=i; J[k]==0; k++)
;
l = J[k];
if (i == k)
append( (U)i, (U)j, (U)(l-1) );
else if (j == l)
delete( (U)i, (U)(k-1), (U)j );
else
change( (U)i, (U)(k-1), (U)j, (U)(l-1) );
while (i != k) {
text1(line1);
i++;
fgets(line1, LSIZE, fp1);
}
prsep();
while (j != l) {
text2(line2);
j++;
fgets(line2, LSIZE, fp2);
}
prend();
}
}
}
/*
* Read input characters and hash for each
* line. Return the hash value computed
* using CRC-16 methods. A zero value
* means EOF or error and thus will not
* be returned as a hash value.
*/
hash_t
inhash(fp, fn)
register FILE *fp;
char *fn;
{
register int c;
register int tmp;
register int hash;
register int space;
space = hash = 0;
while ((c = getc(fp)) != '\n') {
if (c == EOF) {
if (hash != 0)
fprintf(stderr, partial, fn);
return (0);
}
if (bflag && (c==' ' || c=='\t')) {
space++;
continue;
}
compute:
tmp = c^hash;
hash = (hash>>8) ^ crctab1[tmp&017] ^ crctab2[(tmp&0360)>>4];
if (space) {
c = ' ';
space = 0;
goto compute;
}
}
if (hash == 0)
hash++;
return (hash);
}
/*
* Sort comparison routine.
* Hash value is primary key,
* line number secondary.
*/
compar(a, b)
register LINES *a, *b;
{
if (a->l_hash < b->l_hash)
return (-1);
if (a->l_hash > b->l_hash)
return (1);
if (a->l_num < b->l_num)
return (-1);
if (a->l_num > b->l_num)
return (1);
return (0);
}
/*
* Build an entry in the K-candidates
* table
*/
CAND *
candidate(a, b, prev)
vaddr_t a;
vaddr_t b;
CAND *prev;
{
register CAND *v;
v = (CAND *)alloc(sizeof(CAND));
v->a = a;
v->b = b;
v->prev = prev;
return (v);
}
/*
* The merge step, called for
* each index in file 1.
* `k' is index of last filled element
* of `K', `i' is current index in
* file 1, and `p' is first element
* of class of lines in file 2
* equivalent to line `i' by hash value.
*/
merge(K, k, i, E, p)
CAND **K;
vaddr_t k, i;
vaddr_t *E;
vaddr_t p;
{
register vaddr_t r;
register CAND *c;
r = 0;
c = K[0];
do {
register vaddr_t lo, mid, hi;
register vaddr_t j;
j = E[p]&~LAST;
lo = r;
hi = k;
if (K[lo]->b >= j)
continue;
while (lo <= hi) {
mid = (lo+hi)/2;
if (j <= K[mid]->b)
hi = mid-1; else
lo = mid+1;
}
if (K[hi]->b<j && j<=K[hi+1]->b) {
if (j < K[hi+1]->b) {
#if SIMUL
register CAND *prev;
prev = K[hi];
#endif
K[r] = c;
r = hi+1;
#if SIMUL
c = candidate(i, j, prev);
#else
c = candidate(i, j, K[hi]);
#endif
}
if (hi == k) {
K[k+2] = K[k+1];
k++;
break;
}
}
} while (!(E[p++] & LAST));
K[r] = c;
return (k);
}
/*
* This allocator simply does sbrk calls so that
* it can grow (easily) the space as it needs without
* threading it. It also checks and prints and error
* if out of space.
*/
char *
alloc(size)
register unsigned size;
{
register int *cp;
if ((size = (size+sizeof(int)-1)/sizeof(int)) == 0)
cerr(nomem);
if (allrp==NULL || allrp+size>=allep) {
if ((allep = (int *)sbrk(NSBRK)) == NULL)
cerr(nomem);
if (allrp == NULL)
allrp = allep;
allep += NSBRK/sizeof(int);
}
cp = allrp;
allrp += size;
return (cp);
}
/*
* Like the brk system call, reset
* current end of memory to `ep'.
*/
ralloc(ep)
register int *ep;
{
allrp = ep;
}