DataMuseum.dk

Presents historical artifacts from the history of:

Commodore CBM-900

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about Commodore CBM-900

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - download

⟦e1ec5dab0⟧ TextFile

    Length: 8933 (0x22e5)
    Types: TextFile
    Notes: UNIX file
    Names: »hyphen.c«

Derivation

└─⟦f27320a65⟧ Bits:30001972 Commodore 900 hard disk image with partial source code
    └─⟦f4b8d8c84⟧ UNIX V7 Filesystem
        └─ ⟦this⟧ »cmd/nroff/hyphen.c« 

TextFile

/*
 * Nroff/Troff.
 * Hyphenation.
 */
#include <stdio.h>
#include "roff.h"
#include "code.h"
#include "hyphen.h"
#include "char.h"

/*
 * Try to hyphenate the word found in the word buffer.
 */
hyphen(cp1, cp2)
CODE *cp1;
CODE *cp2;
{
	register CODE *cpl;
	register int n;
	int wi1, wi2, len, new, con;

	cpl = cp1;
	n = cp2 - cp1;
	while (n--)
		hyphbuf[n] = 0;
	while (cp1 < cp2) {
		if ((n=cp2[-1].c_code)>=CUA && n<=CLZ)
			break;
		--cp2;
	}
	len = cp2 - cp1;
	while (cp1 < cp2) {
		if ((n=cp1->c_code)>=CUA && n<=CLZ)
			break;
		cp1++;
	}
	if (len <= 4)
		return;
	wi1 = 0;
	wi2 = len;
	if (except(cpl, hyphbuf, wi1, wi2))
		return;
	wi2 = 1 + suffix(cpl, hyphbuf, wi2-1, wi1-1);
	wi1 = prefix(cpl, hyphbuf, wi1, wi2);
	middle(cpl, hyphbuf, wi1, wi2);
	n = len;
	hyphbuf[0] = 0;
	hyphbuf[n-3] = 0;
	hyphbuf[n-2] = 0;
	hyphbuf[n-1] = 0;
	if (wi2-wi1 <= 2) {
		hyphbuf[wi1] = 0;
		hyphbuf[wi2-1] = 0;
	}
/*
	automate(remcode, &new, &con, -1, cp1, hyphbuf, 0, wi2);
*/
	n = wi2;
	if (--n>=0 && cpl[n].c_code==LEEE) {
		int m;
		m = 3;
		while (n && m--)
			hyphbuf[n--] = 0;
	}
	n = wi2;
	if (n>=2 && cpl[--n].c_code==LDDD && cpl[--n].c_code==LEEE) {
		if (--n<1 || cpl[n].c_code!=LZZZ || cpl[n-1].c_code!=LIII) {
			if (--n >= 0)
				hyphbuf[n] = 0;
			if (--n >= 0)
				hyphbuf[n] = 0;
		}
	}
}

/*
 * Look for exception words.
 */
except(wbuf, hbuf, wi1, wi2)
CODE *wbuf;
char *hbuf;
{
	unsigned ti, ti0, ti1, ti2, wih, c1, c2;
	register int wi, n;
	register char *bp;

	ti1 = 0;
	ti2 = EXCSIZE;
	for (;;) {
		ti0 = ti;
		if ((ti=(ti1+ti2)/2) == ti0)
			goto fail;
		wi = wi1;
		wih = wi1;
		bp = exctab[ti];
		for (;;) {
			if (*bp == LEOK) {
				if (wi == wi2)
					return (1);
				if (wi==wi2-1 && wbuf[wi].c_code==LSSS)
					return (1);
				ti1 = ti;
				break;
			}
			if (*bp == LHYP) {
				bp++;
				wih = wi;
				hbuf[wi-1] = 1;
				continue;
			}
			if (wi >= wi2) {
				ti1 = ti;
				break;
			}
			if ((c1=wbuf[wi++].c_code) != (c2=*bp++)) {
				if (c1 > c2)
					ti1 = ti;
				else
					ti2 = ti;
				break;
			}
		}
		for (wi=wi1; wi<wih; wi++)
			hbuf[wi] = 0;
	}
fail:
	for (n=wi1; n<wi2; n++)
		hbuf[n] = 0;
	return (0);
}

/*
 * Look for prefixes.
 */
prefix(wbuf, hbuf, wi1, wi2)
CODE *wbuf;
char *hbuf;
register int wi2;
{
	unsigned ti, ti0, ti1, ti2, c1, c2, con;
	register int wi, wih;
	register char *bp;

	do {
		ti0 = -1;
		ti1 = 0;
		ti2 = PRESIZE;
		for (;;) {
			ti0 = ti;
			if ((ti=(ti1+ti2)/2) == ti0)
				return (wi1);
			wi = wi1;
			wih = wi1;
			bp = pretab[ti];
			for (;;) {
				if (*bp == LEOK) {
					bp++;
					goto patn;
				}
				if (*bp == LHYP) {
					bp++;
					wih = wi;
					hbuf[wi-1] = 1;
					continue;
				}
				if (wi >= wi2)
					return (wi1);
				if ((c1=wbuf[wi++].c_code) != (c2=*bp++)) {
					if (c1 > c2)
						ti1 = ti;
					else
						ti2 = ti;
					break;
				}
			}
			for (wi=wi1; wi<wih; wi++)
				hyphbuf[wi] = 0;
		}
	patn:
		if (automate(bp, &wi1, &con, 1, wbuf, hbuf, wi, wi2) == 0) {
			while (wi > wi1)
				hbuf[--wi] = 0;
			return (wi1);
		}
	} while (con != 0);
	return (wi1);
}

/*
 * Look for suffixes.
 */
suffix(wbuf, hbuf, wi1, wi2)
CODE *wbuf;
char *hbuf;
register int wi2;
{
	unsigned ti, ti0, ti1, ti2, c1, c2, con;
	register int wi, wih;
	register char *bp;

	do {
		ti0 = -1;
		ti1 = 0;
		ti2 = SUFSIZE;
		for (;;) {
			ti0 = ti;
			if ((ti=(ti1+ti2)/2) == ti0)
				return (wi1);
			wi = wi1;
			wih = wi1;
			bp = suftab[ti];
			for (;;) {
				if (*bp == LEOK) {
					bp++;
					goto patn;
				}
				if (*bp == LHYP) {
					bp++;
					wih = wi;
					hbuf[wi] = 1;
					continue;
				}
				if (wi <= wi2)
					return (wi1);
				if ((c1=wbuf[wi--].c_code) != (c2=*bp++)) {
					if (c1 > c2)
						ti1 = ti;
					else
						ti2 = ti;
					break;
				}
			}
			for (wi=wi1; wi>wih; wi--)
				hyphbuf[wi] = 0;
		}
	patn:
		if (automate(bp, &wi1, &con, -1, wbuf, hbuf, wi, wi2) == 0) {
			while (wi < wi1)
				hbuf[++wi] = 0;
			return (wi1);
		}
	} while (con != 0);
	return (wi1);
}

/*
 * Try to hyphenate the middle of a word.
 */
middle(wbuf, hbuf, wi1, wi2)
CODE *wbuf;
char *hbuf;
{
	int new, con, bil, c2, c3, n;
	register int wi, bi, c1;

	wi = wi1;
	bi = 0;
	while (wi < wi2) {
		c1 = wbuf[wi++].c_code;
		if (wi<wi2 && wbuf[wi].c_code==LHHH) {
			wi++;
			switch (c1) {
			case LCCC:
				c1 = LDCH;
				break;
			case LGGG:
				c1 = LDGH;
				break;
			case LPPP:
				c1 = LDPH;
				break;
			case LSSS:
				c1 = LDSH;
				break;
			case LTTT:
				c1 = LDTH;
				break;
			default:
				--wi;
				break;
			}
		}
		hletbuf[bi] = c1;
		hindbuf[bi++] = wi-1;
	}
	bil = bi-2;
	for (bi=0; bi<bil; bi++) {
		if (!vowel(hletbuf[bi]))
			continue;
		c1 = hletbuf[bi+1];
		c2 = hletbuf[bi+2];
		if (c1==c2 && consn(c1)) {
			if (c1 == LLLL)
				continue;
			if (c1 == LSSS) {
				if (bi>=bil-1 || !vowel(hletbuf[bi+3]))
					continue;
				if (automate(mm0code, &new, &con, 1,
					wbuf, hbuf, hindbuf[bi+3], wi2)==0)
					continue;
			}
			hbuf[hindbuf[++bi]] = 1;
			continue;
		}
		if (c1==LCCC && c2==LKKK) {
			hbuf[hindbuf[bi+=2]] = 1;
			continue;
		}
		if (c1==LQQQ && c2==LUUU) {
			hbuf[hindbuf[bi]] = 1;
			continue;
		}
		if (bi < bil-1) {
			c3 = hletbuf[bi+3];
			if (!consn(c1) || !consn(c2) || !vowel(c3))
				continue;
			if ((n=matpair(c1, c2)) == 2)
				continue;
			if (n==1 && automate(mm1code, &new, &con, 1,
				wbuf, hbuf, hindbuf[bi+3], wi2)==0)
				continue;
			hbuf[hindbuf[++bi]] = 1;
			continue;
		}
	}
}

/*
 * See if we match a set of double consonants.  If we do,
 * return the associated number in the table.
 */
matpair(c1, c2)
register int c1;
{
	register int c;
	register char *cp;

	cp = dbctab;
	while ((c=*cp++) != LNUL) {
		if (c1 < c)
			return (0);
		if (c1 > c) {
			cp += 2;
			continue;
		}
		if (c2 != *cp++) {
			cp++;
			continue;
		}
		return (*cp);
	}
	return (0);
}

/*
 * Given a pattern string, execute it on the given word
 * buffer.
 */
automate(patp, newp, conp, dirn, wbuf, hbuf, wi1, wi2)
char *patp;
int *newp;
int *conp;
CODE *wbuf;
char *hbuf;
{
	int wi, wis;
	register int c, n;
	register char *bp;

	bp = patp;
	*conp = 0;
	wi = wi1;
	wis = wi1;
	for (;;) {
		switch (*bp++) {
		case LNUL:
			goto succ;
		case LHYP:
			wis = wi;
			*conp = 0;
			hbuf[wi-(dirn>0?1:0)] = 1;
			continue;
		case LRHP:
			hbuf[wi-(dirn>0?1:0)] = 0;
			continue;
		case LCON:
			*conp = 1;
			continue;
		case LNEW:
			n = *bp++;
			if (wi != wi2) {
				c = wbuf[wi].c_code;
				wi += dirn;
				continue;
			}
			if (n == 1)
				goto fail;
			if (n == 2)
				goto succ;
			bp += n-3;
			continue;
		case LOLD:
			wi -= dirn;
			c = wbuf[wi-dirn].c_code;
			continue;
		case LBRF:
			goto fail;
		case LBRS:
			goto succ;
		case LCBT:
			if (*bp++ != c) {
				bp++;
				continue;
			}
			if ((n=*bp++) == 1)
				goto fail;
			if (n == 2)
				goto succ;
			bp += n-3;
			continue;
		case LCBF:
			if (*bp++ == c) {
				bp++;
				continue;
			}
			if ((n=*bp++) == 1)
				goto fail;
			if (n == 2)
				goto succ;
			bp += n-3;
			continue;
		default:
			panic("Bad pattern");
		}
	}
succ:
	*newp = wis;
	return (1);
fail:
	return (0);
}

/*
 * See if the given code is a vowel.
 */
vowel(c)
register int c;
{
	if (c>=LAAA && c<=LYYY && contab[c-LAAA]==0)
		return (1);
	return (0);
}

/*
 * See if the given code is a consonant.
 */
consn(c)
register int c;
{
	if (c>=LAAA && c<=LDTH && contab[c-LAAA]==1)
		return (1);
	return (0);
}

/*
 * Code to fail if we match ((er|ers)$).
 */
char mm0code[] ={
	 0005, 0002, 0012, 0051, 0002, 0005, 0002, 0012,
	 0066, 0002, 0005, 0001, 0012, 0067, 0002, 0005,
	 0001, 0010, 0000
};

/*
 * Code to fail if we match ((er|ers|age|ages|est)$).
 */
char mm1code[] ={
	 0005, 0002, 0011, 0051, 0030, 0012, 0045, 0002,
	 0005, 0002, 0012, 0053, 0002, 0005, 0002, 0012,
	 0051, 0002, 0005, 0001, 0012, 0067, 0002, 0005,
	 0001, 0010, 0005, 0002, 0011, 0066, 0016, 0012,
	 0067, 0002, 0005, 0002, 0012, 0070, 0002, 0005,
	 0001, 0010, 0005, 0001, 0012, 0067, 0002, 0005,
	 0001, 0010, 0000
};

/*
 * Table to determine whether a letter is a constant or a vowel.
 */
char contab[] ={
	0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
	1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1
};

/*
 * Pairs of consonants which aren't allowed somewhere.
 */
char dbctab[] ={
	LBBB, LLLL, 2,
	LBBB, LRRR, 2,
	LCCC, LLLL, 2,
	LCCC, LRRR, 2,
	LFFF, LLLL, 2,
	LFFF, LRRR, 2,
	LFFF, LTTT, 1,
	LGGG, LLLL, 2,
	LGGG, LRRR, 2,
	LKKK, LNNN, 2,
	LLLL, LDDD, 1,
	LLLL, LKKK, 2,
	LLLL, LQQQ, 2,
	LMMM, LPPP, 1,
	LNNN, LDDD, 1,
	LNNN, LGGG, 1,
	LNNN, LKKK, 2,
	LNNN, LSSS, 1,
	LNNN, LTTT, 1,
	LNNN, LXXX, 2,
	LNNN, LDCH, 2,
	LPPP, LLLL, 2,
	LPPP, LRRR, 2,
	LRRR, LGGG, 1,
	LRRR, LKKK, 2,
	LRRR, LMMM, 1,
	LRRR, LNNN, 1,
	LRRR, LTTT, 1,
	LSSS, LPPP, 2,
	LSSS, LQQQ, 2,
	LSSS, LTTT, 1,
	LTTT, LRRR, 2,
	LTTT, LDCH, 2,
	LWWW, LHHH, 2,
	LWWW, LLLL, 2,
	LWWW, LNNN, 2,
	LWWW, LRRR, 2,
	LDCH, LLLL, 2,
	LDCH, LRRR, 2,
	LDDD, LGGG, 2,
	LDDD, LRRR, 2,
	LDGH, LTTT, 2,
	LDPH, LRRR, 2,
	LDTH, LRRR, 2,
	LNUL
};