|
DataMuseum.dkPresents historical artifacts from the history of: Commodore CBM-900 |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about Commodore CBM-900 Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - download
Length: 6593 (0x19c1) Types: TextFile Notes: UNIX file Names: »nfa.c«
└─⟦f27320a65⟧ Bits:30001972 Commodore 900 hard disk image with partial source code └─⟦f4b8d8c84⟧ UNIX Filesystem └─ ⟦this⟧ »cmd/egrep/nfa.c«
/* * creation of the NFA */ #include <ctype.h> #include "egrep.h" #include "newt.h" /* only one is set by init( ) */ char *regexp; /* user's regular expression */ FILE *rexf; /* file containing user's rex */ static curc; /* current char from rex */ struct newt *getrex( ), *getterm( ), *getfac( ), *getatom( ), *newnewt( ); /* * create NFA from rex * A pointer to the NFA is returned. */ struct newt * makenfa( ) { register struct newt *p; advance( ); p = getrex( ); if (curc) misplaced( curc); if (p == NULL) fatal( "empty pattern"); return (p); } /* * check NFA semantics * Ensure correct usage of '^' and '$'. */ ncheck( npp) register struct newt **npp; { register struct newt *np; ++uniq; while (np = *npp++) if (np->n_cp && np->n_c!=EPSILON) { if (np->n_flags & N_EOL) nbeeline( np); nwalk( np->n_cp); } } /* * add finishing touch to NFA * Prepend a sort of ".*" to the front of the NFA. */ struct newt * npolish( np) register struct newt *np; { register struct newt *p; p = newnewt( ); p->n_b = newbits( TRUE); p->n_cp = newnewt( ); p->n_cp->n_c = EPSILON; p->n_cp->n_cp = p; p->n_ep = np; p->n_fp = np->n_fp; return (p); } /* * get regular expression * A trailing '\n' is tolerated, to accommodate the "-f" option. */ static struct newt * getrex( ) { register struct newt *p, *q, *start, *final; start = getterm( ); if (start == NULL) return (start); final = newnewt( ); start->n_fp->n_c = EPSILON; start->n_fp->n_cp = final; start->n_fp = final; for (; ; ) { switch (curc) { case '|': advance( ); if (p = getterm( )) break; misplaced( '|'); case '\n': advance( ); if (p = getterm( )) break; if (curc != '\0') misplaced( '\n'); default: return (start); } p->n_fp->n_c = EPSILON; p->n_fp->n_cp = final; q = newnewt( ); q->n_c = EPSILON; q->n_cp = p; q->n_ep = start; q->n_fp = final; start = q; } } /* * get term */ static struct newt * getterm( ) { register struct newt *start, *p; start = getfac( ); if (start == NULL) return (start); while (p = getfac( )) { start->n_fp->n_c = EPSILON; start->n_fp->n_cp = p; start->n_fp = p->n_fp; } return (start); } /* * get factor */ static struct newt * getfac( ) { register struct newt *p, *q, *start; start = getatom( ); if (start == NULL) return (start); p = start; if (curc == '*') { advance( ); start = newnewt( ); q = newnewt( ); start->n_c = EPSILON; start->n_cp = p; start->n_ep = q; p->n_fp->n_c = EPSILON; p->n_fp->n_cp = q; p->n_fp->n_ep = p; start->n_fp = q; } else if (curc == '+') { advance( ); q = newnewt( ); p->n_fp->n_c = EPSILON; p->n_fp->n_cp = q; p->n_fp->n_ep = p; start->n_fp = q; } else if (curc == '?') { advance( ); start = newnewt( ); start->n_c = EPSILON; start->n_cp = p; start->n_ep = p->n_fp; start->n_fp = p->n_fp; } return (start); } /* * get atom * The interpretation of the "-y" option is given by this example: * egrep -y 'R\i\c\o H[a-z]* Tudor' * means * egrep 'Rico H[A-Za-z]* T[Uu][Dd][Oo][Rr]' */ static struct newt * getatom( ) { register struct newt *start; char *charclass( ); start = newnewt( ); start->n_cp = newnewt( ); start->n_fp = start->n_cp; switch (curc) { case '\0': case '\n': case '|': case '*': case ')': case ']': case '+': case '?': nonewts( start); return (0); case '.': start->n_b = newbits( TRUE); bitclr( '\n', start->n_b); equiv( start); break; case '^': start->n_flags |= N_BOL; start->n_c = '\n'; equiv( start); break; case '$': start->n_flags |= N_EOL; start->n_c = '\n'; equiv( start); break; case '[': advance( ); start->n_b = charclass( ); equiv( start); break; case '(': advance( ); nonewts( start); start = getrex( ); if (curc == '\0') fatal( "missing ')'"); if ((start == NULL) || curc!=')') misplaced( curc); break; case '\\': advance( ); if (curc=='\0' || curc=='\n') misplaced( '\\'); start->n_c = curc; equiv( start); break; default: if (yflag && islower( curc)) { start->n_b = newbits( FALSE); bitset( curc, start->n_b); bitset( toupper( curc), start->n_b); } else start->n_c = curc; equiv( start); } advance( ); return (start); } static char * charclass( ) { register char *p; register c; register bool compl; compl = FALSE; p = newbits( FALSE); if (curc == '^') { compl = TRUE; advance( ); } if (curc == '\0') badclass( ); do { c = curc; advance( ); if (c == '\0') badclass( ); if (curc == '-') { advance( ); if (curc == '\0') badclass( ); if (curc == ']') { bitset( '-', p); bitset( c, p); } else if (c > curc) fatal( "bad char class range %c-%c", c, curc); else { do { bitset( c, p); } while (++c <= curc); advance( ); } } else bitset( c, p); } while (curc != ']'); if (bittst( '\n', p)) misplaced( '\n'); if (yflag) for (c='a'; c<='z'; ++c) if (bittst( c, p)) bitset( toupper( c), p); if (compl) { for (c=0; c<NCHARS; ++c) bitcom( c, p); bitclr( '\n', p); } return (p); } /* * allocate new newt struct */ static struct newt * newnewt( ) { register struct newt *p; p = malloc( sizeof *p); if (p == NULL) nomem( ); p->n_c = 0; p->n_flags = 0; p->n_b = NULL; p->n_cp = NULL; p->n_ep = NULL; p->n_fp = NULL; p->n_uniq = 0; p->n_id = n_id++; return (p); } /* * free newt structs * Used to free unused atoms. */ static nonewts( np) struct newt *np; { free( (char *)np->n_cp); free( (char *)np); n_id -= 2; } /* * get next char from rex * The next char from the file or the string is placed in `curc'. */ static advance( ) { register c; if (regexp) c = *regexp++; else { c = getc( rexf); if (c == EOF) c = '\0'; } curc = c & 0177; } /* * report misplaced '^' */ static nwalk( np) register struct newt *np; { while (np->n_cp) { if (np->n_uniq == uniq) return; np->n_uniq = uniq; if (np->n_flags & N_EOL) nbeeline( np); if (np->n_flags & N_BOL) misplaced( '^'); if (np->n_ep) nwalk( np->n_ep); np = np->n_cp; } } /* * report misplaced '$' */ static nbeeline( np) register struct newt *np; { while (np = np->n_cp) { if ((np->n_cp && np->n_c!=EPSILON) or (np->n_ep)) misplaced( '$'); } } static misplaced( c) { static char s[] = "`c'"; s[1] = c; fatal( "misplaced %s in pattern", c=='\n'? "newline": s); } static badclass( ) { fatal( "non-terminated char class"); }