|
|
DataMuseum.dkPresents historical artifacts from the history of: Commodore CBM-900 |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about Commodore CBM-900 Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - download
Length: 7922 (0x1ef2)
Types: TextFile
Notes: UNIX file
Names: »grep2.c«
└─⟦f27320a65⟧ Bits:30001972 Commodore 900 hard disk image with partial source code
└─⟦f4b8d8c84⟧ UNIX Filesystem
└─⟦this⟧ »cmd/grep/grep2.c«
/*
* Regular expression routines.
* These routines are currently used by grep but
* they are somewhat general and thus could be
* used elsewhere.
*/
#include <stdio.h>
#include <ctype.h>
#include "grep.h"
char *newcc();
char *e_exec();
static char repri[] = {
0 /* REEND */, 2 /* STEND */,
3 /* OR */, 6 /* LPAR */,
1 /* RPAR */, 5 /* CLOS */,
5 /* NECLOS */, 5 /* ZORO */,
4 /* CONC */
};
RE **rebuild();
RE *renode();
static union rebit relval;
static char cc[NCLASS];
static (*reinf)();
static (*reunf)();
static char *rein;
static char resyn[] = "Regular expression syntax error";
static char reoflo[] = "Regular expression overflow";
static char nospace[] = "Out of space";
char *reerror; /* Erro code */
int redual; /* Set for dual-case comparisons */
int refull; /* Full expressions accepted */
/*
* Lexical token reader for
* regular expressions.
*/
static
relex(ec)
{
register c;
if ((c = (*reinf)()) == ec)
return (REEND);
switch (c) {
case EOF:
case '\n':
reerror = "Non-terminated regular expression";
return (REEND);
case '[':
return (cclass(ec));
case '^':
return (BOL);
case '$':
return (EOL);
case '(':
if (!refull)
goto def;
return (LPAR);
case ')':
if (!refull)
goto def;
return (RPAR);
case '.':
return (ANY);
case '|':
if (!refull)
goto def;
return (OR);
case '*':
return (CLOS);
case '?':
if (!refull)
goto def;
return (ZORO);
case '+':
if (!refull)
goto def;
return (NECLOS);
case '\\':
c = (*reinf)();
default:
def:
relval.u_ival = c;
return (redual ? DCONC : CONC);
}
}
/*
* Read in a character class from
* an RE (called from relex).
*/
static
cclass(ec)
int ec;
{
register c, i, pc;
int comp;
for (i=0; i<sizeof cc; i++)
cc[i] = 0;
if ((c = (*reinf)()) != '^') {
comp = 0;
(*reunf)(c);
} else
comp = 1;
pc = ec;
while ((c = (*reinf)()) != ']') {
if (c == ec) {
reerror = "Non-terminated character class";
return (REEND);
}
if (c=='-' && pc!=ec) {
if ((c = (*reinf)()) == ']')
break;
for (i=pc; i<=c; i++)
cc[i/NBPC] |= 1<<(i%NBPC);
pc = ec;
} else {
cc[c/NBPC] |= 1<<(c%NBPC);
pc = c;
}
}
if (comp)
for (i=0; i<sizeof cc; i++)
cc[i] ^= -1;
if ((relval.u_cptr = newcc(cc)) == NULL)
return (REEND);
return (redual ? DCCLASS : CCLASS);
}
/*
* Allocate space for a new character class
* and copy into it.
*/
static char *
newcc(occ)
register char *occ;
{
register char *ncc;
register unsigned n;
char *rcc;
n = NCLASS;
if ((rcc = ncc = malloc(n)) == NULL) {
reerror = nospace;
return (NULL);
}
do {
*ncc++ = *occ++;
} while (--n);
return (rcc);
}
/*
* Parse a regular expression.
* The arguments are an input string to parse (`in'),
* the end character to terminated the expression (`ec'),
* and (if `in' is NULL) functions to get and unget characters,
* getf and ungetf, respectively.
*/
RE *
reparse(in, ec, getf, ungetf)
char *in;
int (*getf)(), (*ungetf)();
{
RE *restk[NRE];
struct opstk {
char o_op;
char o_pri;
} opstk[NRE];
register struct opstk *osp;
register op;
register RE **rsp;
int concflg, op1, pri;
if ((rein = in) != NULL) {
reinf = reget;
reunf = reunget;
} else {
reinf = getf;
reunf = ungetf;
}
reerror = NULL;
concflg = 0;
osp = opstk;
osp->o_op = STEND;
osp->o_pri = repri[STEND];
rsp = restk;
*rsp++ = NULL;
for (;;) {
if (termop(op = relex(ec))) {
if ((*rsp++ = renode(op, relval.u_cptr, NULL)) == NULL)
return (NULL);
if (rsp >= &restk[NRE-1]) {
reerror = reoflo;
return (NULL);
}
if (!concflg) {
concflg++;
continue;
} else
op = CONC;
} else if (op == OR) {
if (!concflg) {
reerror = resyn;
return (NULL);
}
concflg = 0;
} else if (op==CLOS || op==NECLOS || op==ZORO) {
if (!concflg) {
reerror = resyn;
return (NULL);
}
} else if (op == LPAR) {
if (concflg) {
(++osp)->o_op = CONC;
osp->o_pri = repri[CONC];
concflg = 0;
}
}
pri = repri[op];
for (;;) {
if (reerror != NULL)
return (NULL);
if (pri>osp->o_pri || (op==CONC && osp->o_op==CONC)) {
if (op == LPAR)
pri = repri[RPAR];
if (osp >= &opstk[NRE-1]) {
reerror = reoflo;
return (NULL);
}
(++osp)->o_op = op;
osp->o_pri = pri;
if (!postop(op))
break;
else
pri = repri[REEND];
}
switch (op1 = (osp--)->o_op) {
case STEND:
if (op == REEND)
return (*--rsp);
osp++;
break;
case LPAR:
if (op != RPAR) {
reerror = "Unbalanced parentheses";
return (NULL);
}
break;
default:
rsp = rebuild(rsp, op1);
continue;
}
break;
}
}
}
static RE **
rebuild(rsp, op)
register RE **rsp;
{
register RE *left, *right;
switch (op) {
case OR:
right = *--rsp;
left = *--rsp;
break;
case CLOS:
case NECLOS:
case ZORO:
left = *--rsp;
right = NULL;
break;
case CONC:
right = *--rsp;
left = *--rsp;
if (left->r_next == NULL)
left->r_next = right;
else {
for (; left->r_next->r_next!=NULL; left = left->r_next)
;
left->r_next->r_next = right;
}
return (++rsp);
default:
reerror = "RE botch in rebuild";
}
*rsp++ = renode(op, left, right);
return (rsp);
}
/*
* Build regular expression node.
*/
static RE *
renode(op, left, right)
RE *left, *right;
{
register RE *rep;
if ((rep = (RE *)malloc(sizeof (RE))) == NULL) {
reerror = nospace;
return (NULL);
}
rep->r_next = (RE*)NULL;
rep->r_op = op;
rep->r_left.u_re = left;
rep->r_right.u_re = right;
return (rep);
}
static char *sb; /* string beginning */
/*
* User-callable driver for regular expression
* execution. Called with regular expression code and
* text string.
*/
reinterp(rep, s)
RE *rep;
register char *s;
{
sb = s;
if (rep!=NULL && rep->r_op==BOL)
return(e_exec(rep, s) != NULL);
for (; *s!='\0'; s++)
if (e_exec(rep, s) != NULL)
return (1);
return (0);
}
/*
* Internal regular expression
* execution routines
*/
static char *
e_exec(rep, s)
register RE *rep;
register char *s;
{
register c;
char *ss, *es;
for ( ; rep!=NULL; rep = rep->r_next)
switch (rep->r_op) {
case BOL:
if (s != sb)
return (NULL);
break;
case EOL:
if (*s != '\0')
return (NULL);
break;
case ANY:
if (*s++ == '\0')
return (NULL);
break;
case CONC:
if (*s++ != rep->r_left.u_ival)
return (NULL);
break;
case DCONC:
if (islower(rep->r_left.u_ival)) {
if (isupper(c = *s++))
c = tolower(c);
if (c != rep->r_left.u_ival)
return (NULL);
} else
if (*s++ != rep->r_left.u_ival)
return (NULL);
break;
case CCLASS:
c = *s++;
if ((rep->r_left.u_cptr[c/NBPC] & (1<<(c%NBPC))) == 0)
return (NULL);
break;
case DCCLASS:
if ((c = *s++)>='A' && c<='Z')
c |= 'a'-'A';
if ((rep->r_left.u_cptr[c/NBPC] & (1<<(c%NBPC))) == 0)
return (NULL);
break;
case OR:
ss = s;
if ((s = e_exec(rep->r_left.u_re, s)) == NULL)
if ((s = e_exec(rep->r_right.u_re, ss))==NULL)
return (NULL);
break;
case CLOS:
ss = s;
while ((es = e_exec(rep->r_left.u_re, s)) != NULL)
s = es;
while (s >= ss)
if ((es = e_exec(rep->r_next, s--))!=NULL)
return(es);
return (NULL);
case NECLOS:
ss = s;
while ((es = e_exec(rep->r_left.u_re, s)) != NULL)
s = es;
while (s > ss)
if ((es = e_exec(rep->r_next, s--))!=NULL)
return (es);
return (NULL);
case ZORO:
ss = s;
if ((es = e_exec(rep->r_left.u_re, s)) != NULL)
s = es;
while (s >= ss)
if ((es = e_exec(rep->r_next, s--))!=NULL)
return (es);
return (NULL);
default:
fprintf(stderr, "Regular expression botch\n");
exit(2);
}
return (s);
}
/*
* Default functions to get and unget
* characters from the regular expression.
*/
static
reget()
{
return (*rein++);
}
static
reunget(c)
{
--rein;
}