|
DataMuseum.dkPresents historical artifacts from the history of: Commodore CBM-900 |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about Commodore CBM-900 Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - download
Length: 8933 (0x22e5) Types: TextFile Notes: UNIX file Names: »hyphen.c«
└─⟦f27320a65⟧ Bits:30001972 Commodore 900 hard disk image with partial source code └─⟦f4b8d8c84⟧ UNIX Filesystem └─ ⟦this⟧ »cmd/nroff/hyphen.c«
/* * Nroff/Troff. * Hyphenation. */ #include <stdio.h> #include "roff.h" #include "code.h" #include "hyphen.h" #include "char.h" /* * Try to hyphenate the word found in the word buffer. */ hyphen(cp1, cp2) CODE *cp1; CODE *cp2; { register CODE *cpl; register int n; int wi1, wi2, len, new, con; cpl = cp1; n = cp2 - cp1; while (n--) hyphbuf[n] = 0; while (cp1 < cp2) { if ((n=cp2[-1].c_code)>=CUA && n<=CLZ) break; --cp2; } len = cp2 - cp1; while (cp1 < cp2) { if ((n=cp1->c_code)>=CUA && n<=CLZ) break; cp1++; } if (len <= 4) return; wi1 = 0; wi2 = len; if (except(cpl, hyphbuf, wi1, wi2)) return; wi2 = 1 + suffix(cpl, hyphbuf, wi2-1, wi1-1); wi1 = prefix(cpl, hyphbuf, wi1, wi2); middle(cpl, hyphbuf, wi1, wi2); n = len; hyphbuf[0] = 0; hyphbuf[n-3] = 0; hyphbuf[n-2] = 0; hyphbuf[n-1] = 0; if (wi2-wi1 <= 2) { hyphbuf[wi1] = 0; hyphbuf[wi2-1] = 0; } /* automate(remcode, &new, &con, -1, cp1, hyphbuf, 0, wi2); */ n = wi2; if (--n>=0 && cpl[n].c_code==LEEE) { int m; m = 3; while (n && m--) hyphbuf[n--] = 0; } n = wi2; if (n>=2 && cpl[--n].c_code==LDDD && cpl[--n].c_code==LEEE) { if (--n<1 || cpl[n].c_code!=LZZZ || cpl[n-1].c_code!=LIII) { if (--n >= 0) hyphbuf[n] = 0; if (--n >= 0) hyphbuf[n] = 0; } } } /* * Look for exception words. */ except(wbuf, hbuf, wi1, wi2) CODE *wbuf; char *hbuf; { unsigned ti, ti0, ti1, ti2, wih, c1, c2; register int wi, n; register char *bp; ti1 = 0; ti2 = EXCSIZE; for (;;) { ti0 = ti; if ((ti=(ti1+ti2)/2) == ti0) goto fail; wi = wi1; wih = wi1; bp = exctab[ti]; for (;;) { if (*bp == LEOK) { if (wi == wi2) return (1); if (wi==wi2-1 && wbuf[wi].c_code==LSSS) return (1); ti1 = ti; break; } if (*bp == LHYP) { bp++; wih = wi; hbuf[wi-1] = 1; continue; } if (wi >= wi2) { ti1 = ti; break; } if ((c1=wbuf[wi++].c_code) != (c2=*bp++)) { if (c1 > c2) ti1 = ti; else ti2 = ti; break; } } for (wi=wi1; wi<wih; wi++) hbuf[wi] = 0; } fail: for (n=wi1; n<wi2; n++) hbuf[n] = 0; return (0); } /* * Look for prefixes. */ prefix(wbuf, hbuf, wi1, wi2) CODE *wbuf; char *hbuf; register int wi2; { unsigned ti, ti0, ti1, ti2, c1, c2, con; register int wi, wih; register char *bp; do { ti0 = -1; ti1 = 0; ti2 = PRESIZE; for (;;) { ti0 = ti; if ((ti=(ti1+ti2)/2) == ti0) return (wi1); wi = wi1; wih = wi1; bp = pretab[ti]; for (;;) { if (*bp == LEOK) { bp++; goto patn; } if (*bp == LHYP) { bp++; wih = wi; hbuf[wi-1] = 1; continue; } if (wi >= wi2) return (wi1); if ((c1=wbuf[wi++].c_code) != (c2=*bp++)) { if (c1 > c2) ti1 = ti; else ti2 = ti; break; } } for (wi=wi1; wi<wih; wi++) hyphbuf[wi] = 0; } patn: if (automate(bp, &wi1, &con, 1, wbuf, hbuf, wi, wi2) == 0) { while (wi > wi1) hbuf[--wi] = 0; return (wi1); } } while (con != 0); return (wi1); } /* * Look for suffixes. */ suffix(wbuf, hbuf, wi1, wi2) CODE *wbuf; char *hbuf; register int wi2; { unsigned ti, ti0, ti1, ti2, c1, c2, con; register int wi, wih; register char *bp; do { ti0 = -1; ti1 = 0; ti2 = SUFSIZE; for (;;) { ti0 = ti; if ((ti=(ti1+ti2)/2) == ti0) return (wi1); wi = wi1; wih = wi1; bp = suftab[ti]; for (;;) { if (*bp == LEOK) { bp++; goto patn; } if (*bp == LHYP) { bp++; wih = wi; hbuf[wi] = 1; continue; } if (wi <= wi2) return (wi1); if ((c1=wbuf[wi--].c_code) != (c2=*bp++)) { if (c1 > c2) ti1 = ti; else ti2 = ti; break; } } for (wi=wi1; wi>wih; wi--) hyphbuf[wi] = 0; } patn: if (automate(bp, &wi1, &con, -1, wbuf, hbuf, wi, wi2) == 0) { while (wi < wi1) hbuf[++wi] = 0; return (wi1); } } while (con != 0); return (wi1); } /* * Try to hyphenate the middle of a word. */ middle(wbuf, hbuf, wi1, wi2) CODE *wbuf; char *hbuf; { int new, con, bil, c2, c3, n; register int wi, bi, c1; wi = wi1; bi = 0; while (wi < wi2) { c1 = wbuf[wi++].c_code; if (wi<wi2 && wbuf[wi].c_code==LHHH) { wi++; switch (c1) { case LCCC: c1 = LDCH; break; case LGGG: c1 = LDGH; break; case LPPP: c1 = LDPH; break; case LSSS: c1 = LDSH; break; case LTTT: c1 = LDTH; break; default: --wi; break; } } hletbuf[bi] = c1; hindbuf[bi++] = wi-1; } bil = bi-2; for (bi=0; bi<bil; bi++) { if (!vowel(hletbuf[bi])) continue; c1 = hletbuf[bi+1]; c2 = hletbuf[bi+2]; if (c1==c2 && consn(c1)) { if (c1 == LLLL) continue; if (c1 == LSSS) { if (bi>=bil-1 || !vowel(hletbuf[bi+3])) continue; if (automate(mm0code, &new, &con, 1, wbuf, hbuf, hindbuf[bi+3], wi2)==0) continue; } hbuf[hindbuf[++bi]] = 1; continue; } if (c1==LCCC && c2==LKKK) { hbuf[hindbuf[bi+=2]] = 1; continue; } if (c1==LQQQ && c2==LUUU) { hbuf[hindbuf[bi]] = 1; continue; } if (bi < bil-1) { c3 = hletbuf[bi+3]; if (!consn(c1) || !consn(c2) || !vowel(c3)) continue; if ((n=matpair(c1, c2)) == 2) continue; if (n==1 && automate(mm1code, &new, &con, 1, wbuf, hbuf, hindbuf[bi+3], wi2)==0) continue; hbuf[hindbuf[++bi]] = 1; continue; } } } /* * See if we match a set of double consonants. If we do, * return the associated number in the table. */ matpair(c1, c2) register int c1; { register int c; register char *cp; cp = dbctab; while ((c=*cp++) != LNUL) { if (c1 < c) return (0); if (c1 > c) { cp += 2; continue; } if (c2 != *cp++) { cp++; continue; } return (*cp); } return (0); } /* * Given a pattern string, execute it on the given word * buffer. */ automate(patp, newp, conp, dirn, wbuf, hbuf, wi1, wi2) char *patp; int *newp; int *conp; CODE *wbuf; char *hbuf; { int wi, wis; register int c, n; register char *bp; bp = patp; *conp = 0; wi = wi1; wis = wi1; for (;;) { switch (*bp++) { case LNUL: goto succ; case LHYP: wis = wi; *conp = 0; hbuf[wi-(dirn>0?1:0)] = 1; continue; case LRHP: hbuf[wi-(dirn>0?1:0)] = 0; continue; case LCON: *conp = 1; continue; case LNEW: n = *bp++; if (wi != wi2) { c = wbuf[wi].c_code; wi += dirn; continue; } if (n == 1) goto fail; if (n == 2) goto succ; bp += n-3; continue; case LOLD: wi -= dirn; c = wbuf[wi-dirn].c_code; continue; case LBRF: goto fail; case LBRS: goto succ; case LCBT: if (*bp++ != c) { bp++; continue; } if ((n=*bp++) == 1) goto fail; if (n == 2) goto succ; bp += n-3; continue; case LCBF: if (*bp++ == c) { bp++; continue; } if ((n=*bp++) == 1) goto fail; if (n == 2) goto succ; bp += n-3; continue; default: panic("Bad pattern"); } } succ: *newp = wis; return (1); fail: return (0); } /* * See if the given code is a vowel. */ vowel(c) register int c; { if (c>=LAAA && c<=LYYY && contab[c-LAAA]==0) return (1); return (0); } /* * See if the given code is a consonant. */ consn(c) register int c; { if (c>=LAAA && c<=LDTH && contab[c-LAAA]==1) return (1); return (0); } /* * Code to fail if we match ((er|ers)$). */ char mm0code[] ={ 0005, 0002, 0012, 0051, 0002, 0005, 0002, 0012, 0066, 0002, 0005, 0001, 0012, 0067, 0002, 0005, 0001, 0010, 0000 }; /* * Code to fail if we match ((er|ers|age|ages|est)$). */ char mm1code[] ={ 0005, 0002, 0011, 0051, 0030, 0012, 0045, 0002, 0005, 0002, 0012, 0053, 0002, 0005, 0002, 0012, 0051, 0002, 0005, 0001, 0012, 0067, 0002, 0005, 0001, 0010, 0005, 0002, 0011, 0066, 0016, 0012, 0067, 0002, 0005, 0002, 0012, 0070, 0002, 0005, 0001, 0010, 0005, 0001, 0012, 0067, 0002, 0005, 0001, 0010, 0000 }; /* * Table to determine whether a letter is a constant or a vowel. */ char contab[] ={ 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1 }; /* * Pairs of consonants which aren't allowed somewhere. */ char dbctab[] ={ LBBB, LLLL, 2, LBBB, LRRR, 2, LCCC, LLLL, 2, LCCC, LRRR, 2, LFFF, LLLL, 2, LFFF, LRRR, 2, LFFF, LTTT, 1, LGGG, LLLL, 2, LGGG, LRRR, 2, LKKK, LNNN, 2, LLLL, LDDD, 1, LLLL, LKKK, 2, LLLL, LQQQ, 2, LMMM, LPPP, 1, LNNN, LDDD, 1, LNNN, LGGG, 1, LNNN, LKKK, 2, LNNN, LSSS, 1, LNNN, LTTT, 1, LNNN, LXXX, 2, LNNN, LDCH, 2, LPPP, LLLL, 2, LPPP, LRRR, 2, LRRR, LGGG, 1, LRRR, LKKK, 2, LRRR, LMMM, 1, LRRR, LNNN, 1, LRRR, LTTT, 1, LSSS, LPPP, 2, LSSS, LQQQ, 2, LSSS, LTTT, 1, LTTT, LRRR, 2, LTTT, LDCH, 2, LWWW, LHHH, 2, LWWW, LLLL, 2, LWWW, LNNN, 2, LWWW, LRRR, 2, LDCH, LLLL, 2, LDCH, LRRR, 2, LDDD, LGGG, 2, LDDD, LRRR, 2, LDGH, LTTT, 2, LDPH, LRRR, 2, LDTH, LRRR, 2, LNUL };