|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T c
Length: 15805 (0x3dbd) Types: TextFile Names: »clex.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki └─⟦this⟧ »EUUGD11/euug-87hel/sec1/clex/clex.c«
#ifndef INCLUDED_STREAM #include <stream.h> #endif #ifndef INCLUDED_STRING #include <string.h> #endif #ifndef INCLUDED_STDLIB #include <stdlib.h> #endif #ifndef INCLUDED_ASSERT #include <assert.h> #endif #ifndef INCLUDED_CTYPE #include <ctype.h> #endif #include "clex.h" // get string value tables, sym_str[] and keyword[] : #define CLEX_IMPLEMENTATION 1 #include "clex_sym.h" /****************************************************************************** * * * KWTABLE -- keyword hash table (internal use only) * * KWtable implements a collision-free hash table of C++ keywords. The * * table size and hash function are computed by use of a standalone C * * program, kwhash.c, included in this directory. * * * ******************************************************************************/ #define U_short unsigned short #define U_char unsigned char struct KWtable { enum { HASHSIZE = 131 }; // as computed by kwhash.c, for a=9,b=2,c=2 struct { char* kwp; Clex_sym sym; } kwhash[HASHSIZE]; KWtable(char**); U_short hash(const U_char*, U_short len); void insert(char*, Clex_sym); Clex_sym lookup(char*, short len); }; static KWtable kwt = KWtable(keywords); // keywords[] defined in Clex_sym.h KWtable:: KWtable (char** kwl) { short int i; for (i = 0; i < HASHSIZE; ++i) kwhash[i].kwp = NULL; for (i = 0; i < CLEX_NUMKEYS; ++i) insert(kwl[i], KEYWORD_S + i); // rely on assert() to prevent hash collisions -- may need // a new hash function or table size when keyword added. } // the values used in the following hash function, and HASHSIZE, were // determined by use of the standalone C program kwhash.c, to // ensure that no collisions occur. inline U_short KWtable:: hash (const U_char* cp, U_short len) { return (((U_short)cp[0] ) ^ ((U_short)cp[1] << 9) ^ ((U_short)cp[len-1] << 2) ^ (len << 2) ) % HASHSIZE; } void KWtable:: insert (char* cp, Clex_sym s) { U_short h = hash(cp, strlen(cp)); assert(kwt.kwhash[h].kwp == NULL); // collisions not permitted. kwt.kwhash[h].kwp = cp; kwt.kwhash[h].sym = s; } Clex_sym KWtable:: lookup (char* cp, short len) { if (len < 2 || len > 9) return (IDENT_S); short h = hash(cp, len); if (kwt.kwhash[h].kwp == NULL) return (IDENT_S); if (strcmp(kwt.kwhash[h].kwp, cp)) return (IDENT_S); return (kwt.kwhash[h].sym); } /****************************************************************************** * * * CLEX -- c++ lexical scanner * * * ******************************************************************************/ // CONSTRUCTOR Clex: // The argument block_brack, if TRUE, dictates that the contents // of square brackets "[]" be returned as a string in the string // buffer. If false, square brackets are treated as simple tokens. Clex:: Clex (FILE* f, Boolean b) { fp = f; block_brack = b; filename[0] = '\0'; bufsiz = 0; buf[0] = '\0'; // prime the pipeline: line_num = 0; look = '\n'; // be prepared to handle '#' as first char } Clex_sym Clex:: num (char c) { Clex_sym s = NUM_S; bufsiz = 0; put_in_buf(c); while (isdigit(look)) buf_one(); // hexadecimal if (bufsiz == 1 && *buf == '0' && (look == 'x' || look == 'X')) { do { buf_one(); } while (isxdigit(look)); if (look == 'L' || look == 'l' || look == 'U' || look == 'u') buf_one(); return terminate(s); } // long or unsigned if (look == 'L' || look == 'l' || look == 'U' || look == 'u') { buf_one(); return terminate(NUM_S); } // floating point else if (look == '.') { s = FLOATNUM_S; do { buf_one(); } while (isdigit(look)); } // scientific notation if (look == 'e' || look == 'E') { s = FLOATNUM_S; do { buf_one(); } while (isdigit(look)); } else return terminate(s); if (look == '+' || look == '-') do { buf_one(); } while (isdigit(look)); return terminate(s); } Clex_sym Clex:: ident (char first) { register Boolean maybe_kw = TRUE; register short bs = 0; buf[bs++] = first; while (isalnum(look) || look == '_' || look == '$') { // note: this function accounts for 30% of the total scan time if (maybe_kw && (isupper(look) || look == '_' )) maybe_kw = FALSE; buf[bs++] = look; // don't worry about overflow eat_one(); } buf[bs] = '\0'; bufsiz = bs; if (maybe_kw) return kwt.lookup(buf, bufsiz); return IDENT_S; } Clex_sym Clex:: quote (char c, Clex_sym s, Clex_mode m) { if (m == CL_NONE) bufsiz = 0; while (look != c) { if (look == EOF) { return terminate(ERROR_EOF_S); } else if (look == '\n') { return terminate(ERROR_EOLN_S); } else if (look == '\\') { eat_one(); if (look == '\n') { eat_one(); eoln(m|CL_QUOTE); continue; } else if (look == EOF) { return terminate(ERROR_EOF_S); } else put_in_buf('\\'); // this handles \' and \" too. } buf_one(); } eat_one(); // eat the closing quote return terminate(s); } // lbrack() accumulates the contents between "[" and "]" into // the string buffer, handling syntactically quoted strings, // comments, and nested brackets. Note that lbrack() is // called recursively in the case of nested brackets. Clex_sym Clex:: lbrack (Clex_mode m) { if (m == CL_NONE) bufsiz = 0; while (look != ']') { if (look == EOF) return terminate(ERROR_EOF_S); else if (look == '\n') { eat_one(); eoln(m|CL_BRACK); } else if (look == '[') { buf_one(); if (lbrack(m|CL_BRACK) == ERROR_EOF_S) return ERROR_EOF_S; // already cleaned up. else put_in_buf(']'); } else if (look == '\'' || look == '"') { char c = look; buf_one(); (void) quote(c, NONE_S, m|CL_BRACK); put_in_buf(c); } else if (look == '/') // maybe a comment { eat_one(); if (look == '/') line_comment(); else if (look == '*') { block_comment(m|CL_BRACK); if (look == EOF) return terminate(ERROR_EOF_S); } else // stash the '/' and the char after { put_in_buf('/'); buf_one(); } } else // just a character to save buf_one(); } eat_one(); // eat the ']'. return terminate(LBRACK_S); } void Clex:: block_comment(Clex_mode m) { eat_one(); // eat the '*' while (! (look == '*' && (eat_one(), look == '/')) ) { if (look == EOF) return; if (look == '\n') { eat_one(); eoln(m|CL_COMMENT); } else if (look != '*') eat_one(); } eat_one(); // eat the '/' } void Clex:: line_comment() { do { eat_one(); } while (look != '\n' && look != EOF); } // eat_return() is intended to save space in Clex::next() -- the // inline function eat_one() produces quite a lot of code. Clex_sym Clex:: eat_return(Clex_sym s) { eat_one(); return s; } Clex_sym Clex:: next() { short val; while (val = look, eat_one(), val != EOF) { char ch = char(val); switch (ch) { case ' ' : continue; case '_' : case '$' : return ident(ch); case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : return num(ch); case ',' : return COMMA_S; case ';' : return SEMI_S; case '[' : if (block_brack) return lbrack(CL_NONE); else return LBRACK_S; case ']' : return RBRACK_S; case '{' : return LBRACE_S; case '}' : return RBRACE_S; case '(' : return LPAR_S; case ')' : return RPAR_S; case '~' : return TILDE_S; case '?' : return QUEST_S; case '"' : return quote(ch, QUOTE_S, CL_NONE); case '\'': return quote(ch, APOS_S, CL_NONE); case '=' : // '=', '==' if (look != '=') return AS_S; else return eat_return(EQ_S); case ':' : // ":", "::" if (look != ':') return COLON_S; else return eat_return(SCOPE_S); case '!' : // "!", "!=" if (look != '=') return BANG_S; else return eat_return(NE_S); case '^' : // "^", "^=" if (look != '=') return CARET_S; else return eat_return(XORAS_S); case '*' : // '*', '*=' if (look != '=') return STAR_S; else return eat_return(MULAS_S); case '%' : // '%', '%=' if (look != '=') return MOD_S; else return eat_return(MODAS_S); case '|' : // "|=", "||", "|" if (look == '|') return eat_return(LOR_S); else if (look == '=') return eat_return(ORAS_S); else return VBAR_S; case '&' : // "&", "&=", "&&" if (look == '&') return eat_return(LAND_S); else if (look == '=') return eat_return(ANDAS_S); else return AMPER_S; case '+' : // '+', '++', '+=' if (look == '+') return eat_return(INCRE_S); else if (look == '=') return eat_return(ADDAS_S); else return PLUS_S; case '-' : // '--', '-=', '->', '-', if (look == '-') return eat_return(DECRE_S); else if (look == '=') return eat_return(SUBAS_S); else if (look == '>') return eat_return(DEREF_S); else return MINUS_S; case '/' : // '/*', '//', '/=', '/' if (look == '*') { block_comment(CL_NONE); if (look == EOF) // almost certainly a mistake: return ERROR_EOF_S; else continue; } else if (look == '/') { line_comment(); continue; } else if (look == '=') return eat_return(DIVAS_S); else return SLASH_S; case '.' : // ".", "..." if (isdigit(look)) return num(ch); else if (look == '.') { eat_one(); // check for "..", undefined. if (look != '.') return ERROR_UNKN_S; else return eat_return(ELLIP_S); } else return DOT_S; case '<' : // '<=', '<', '<<', '<<=' if (look == '=') return eat_return(LE_S); else if (look == '<') { eat_one(); if (look != '=') return SHL_S; else return eat_return(SHLAS_S); } else return LT_S; case '>' : // '>=', '>', '>>', '>>=' if (look == '=') return eat_return(GE_S); else if (look == '>') { eat_one(); if (look != '=') return SHR_S; else return eat_return(SHRAS_S); } else return GT_S; default: if (isalpha(ch)) return ident(ch); if (ch == '\n') eoln(CL_NONE); else if (iscntrl(ch)) continue; else return ERROR_UNKN_S; } } return EOF_S; } struct Quickbuf { short len; char line[10240]; void put_in(char c) { if (len < sizeof(line)-1) line[len++] = c; } void terminate() { line[len] = '\0'; } Quickbuf() { len = 0; } }; void Clex:: eoln(Clex_mode m) { // assume NL character already eaten. ++line_num; // don't process '#' lines in quotes, comments, or '#' continuations. if (m & (CL_QUOTE|CL_POUND|CL_COMMENT)) return; // eat whitespace while (look != EOF && look != '\n') { if (look == ' ' || iscntrl(char(look))) eat_one(); else break; } if (look != '#') return; // eat the '#' and subsequent whitespace do { eat_one(); if (look == EOF || look == '\n') break; } while (look == ' ' || iscntrl(char(look))); // collect the '#' line Quickbuf b; do { // record line if (look == '\\') // check for continuation line { eat_one(); if (look == '\n') { eat_one(); eoln(m|CL_POUND); } else { b.put_in('\\'); } } else if (look == '/') // check for comment in '#' line { eat_one(); if (look == '*') { block_comment(m|CL_POUND); if (look == EOF) break; } else if (look == '/') line_comment(); else { b.put_in('/'); } } else { if (iscntrl(char(look))) look = ' '; b.put_in(look); eat_one(); } } while (look != '\n' && look != EOF); b.terminate(); (void) pound(m, b.line, b.len); // call virtual handler } Boolean Clex:: pound (Clex_mode m, char* line, short len) { void(m); // to keep cfront blissful char* cp = line; if (!isdigit(*cp)) { if (len < 5) return FALSE; if (strncmp(cp, "line ", 5) != 0) return FALSE; // don't know what it is cp += 4; while (*cp == ' ') ++cp; if (!isdigit(*cp)) return FALSE; } // # <line> "<filename>" or #line <line> "<filename>" line_num = atoi(cp) - 1; // will be incremented by eoln() later while (isdigit(*cp)) ++cp; while (*cp == ' ') ++cp; if (*cp == '"') { char* cpq = cp; do { ++cpq; } while (*cpq != '"' && *cpq != '\0'); strncpy(filename, cp+1, cpq - cp - 1); filename[cpq - cp - 1] = '\0'; } return TRUE; } const char* Clex:: debug (Clex_sym s) { return (s >= KEYWORD_S) ? keywords[s - KEYWORD_S] : sym_str[s] ; }