|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T c
Length: 15805 (0x3dbd)
Types: TextFile
Names: »clex.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/euug-87hel/sec1/clex/clex.c«
#ifndef INCLUDED_STREAM
#include <stream.h>
#endif
#ifndef INCLUDED_STRING
#include <string.h>
#endif
#ifndef INCLUDED_STDLIB
#include <stdlib.h>
#endif
#ifndef INCLUDED_ASSERT
#include <assert.h>
#endif
#ifndef INCLUDED_CTYPE
#include <ctype.h>
#endif
#include "clex.h"
// get string value tables, sym_str[] and keyword[] :
#define CLEX_IMPLEMENTATION 1
#include "clex_sym.h"
/******************************************************************************
* *
* KWTABLE -- keyword hash table (internal use only) *
* KWtable implements a collision-free hash table of C++ keywords. The *
* table size and hash function are computed by use of a standalone C *
* program, kwhash.c, included in this directory. *
* *
******************************************************************************/
#define U_short unsigned short
#define U_char unsigned char
struct KWtable
{
enum { HASHSIZE = 131 }; // as computed by kwhash.c, for a=9,b=2,c=2
struct {
char* kwp;
Clex_sym sym;
} kwhash[HASHSIZE];
KWtable(char**);
U_short hash(const U_char*, U_short len);
void insert(char*, Clex_sym);
Clex_sym lookup(char*, short len);
};
static KWtable kwt = KWtable(keywords); // keywords[] defined in Clex_sym.h
KWtable::
KWtable (char** kwl)
{
short int i;
for (i = 0; i < HASHSIZE; ++i)
kwhash[i].kwp = NULL;
for (i = 0; i < CLEX_NUMKEYS; ++i)
insert(kwl[i], KEYWORD_S + i);
// rely on assert() to prevent hash collisions -- may need
// a new hash function or table size when keyword added.
}
// the values used in the following hash function, and HASHSIZE, were
// determined by use of the standalone C program kwhash.c, to
// ensure that no collisions occur.
inline
U_short KWtable::
hash (const U_char* cp, U_short len)
{
return (((U_short)cp[0] ) ^
((U_short)cp[1] << 9) ^
((U_short)cp[len-1] << 2) ^
(len << 2) ) % HASHSIZE;
}
void KWtable::
insert (char* cp, Clex_sym s)
{
U_short h = hash(cp, strlen(cp));
assert(kwt.kwhash[h].kwp == NULL); // collisions not permitted.
kwt.kwhash[h].kwp = cp;
kwt.kwhash[h].sym = s;
}
Clex_sym KWtable::
lookup (char* cp, short len)
{
if (len < 2 || len > 9) return (IDENT_S);
short h = hash(cp, len);
if (kwt.kwhash[h].kwp == NULL) return (IDENT_S);
if (strcmp(kwt.kwhash[h].kwp, cp)) return (IDENT_S);
return (kwt.kwhash[h].sym);
}
/******************************************************************************
* *
* CLEX -- c++ lexical scanner *
* *
******************************************************************************/
// CONSTRUCTOR Clex:
// The argument block_brack, if TRUE, dictates that the contents
// of square brackets "[]" be returned as a string in the string
// buffer. If false, square brackets are treated as simple tokens.
Clex::
Clex (FILE* f, Boolean b)
{
fp = f;
block_brack = b;
filename[0] = '\0';
bufsiz = 0; buf[0] = '\0';
// prime the pipeline:
line_num = 0;
look = '\n'; // be prepared to handle '#' as first char
}
Clex_sym Clex::
num (char c)
{
Clex_sym s = NUM_S;
bufsiz = 0;
put_in_buf(c);
while (isdigit(look))
buf_one();
// hexadecimal
if (bufsiz == 1 && *buf == '0' && (look == 'x' || look == 'X'))
{
do { buf_one(); }
while (isxdigit(look));
if (look == 'L' || look == 'l' || look == 'U' || look == 'u')
buf_one();
return terminate(s);
}
// long or unsigned
if (look == 'L' || look == 'l' || look == 'U' || look == 'u')
{ buf_one(); return terminate(NUM_S); }
// floating point
else if (look == '.')
{
s = FLOATNUM_S;
do { buf_one(); }
while (isdigit(look));
}
// scientific notation
if (look == 'e' || look == 'E')
{
s = FLOATNUM_S;
do { buf_one(); }
while (isdigit(look));
}
else
return terminate(s);
if (look == '+' || look == '-')
do { buf_one(); }
while (isdigit(look));
return terminate(s);
}
Clex_sym Clex::
ident (char first)
{
register Boolean maybe_kw = TRUE;
register short bs = 0;
buf[bs++] = first;
while (isalnum(look) || look == '_' || look == '$')
{
// note: this function accounts for 30% of the total scan time
if (maybe_kw && (isupper(look) || look == '_' ))
maybe_kw = FALSE;
buf[bs++] = look; // don't worry about overflow
eat_one();
}
buf[bs] = '\0';
bufsiz = bs;
if (maybe_kw)
return kwt.lookup(buf, bufsiz);
return IDENT_S;
}
Clex_sym Clex::
quote (char c, Clex_sym s, Clex_mode m)
{
if (m == CL_NONE)
bufsiz = 0;
while (look != c)
{
if (look == EOF)
{ return terminate(ERROR_EOF_S); }
else if (look == '\n')
{ return terminate(ERROR_EOLN_S); }
else if (look == '\\')
{
eat_one();
if (look == '\n')
{ eat_one(); eoln(m|CL_QUOTE); continue; }
else if (look == EOF)
{ return terminate(ERROR_EOF_S); }
else
put_in_buf('\\'); // this handles \' and \" too.
}
buf_one();
}
eat_one(); // eat the closing quote
return terminate(s);
}
// lbrack() accumulates the contents between "[" and "]" into
// the string buffer, handling syntactically quoted strings,
// comments, and nested brackets. Note that lbrack() is
// called recursively in the case of nested brackets.
Clex_sym Clex::
lbrack (Clex_mode m)
{
if (m == CL_NONE)
bufsiz = 0;
while (look != ']')
{
if (look == EOF)
return terminate(ERROR_EOF_S);
else if (look == '\n')
{ eat_one(); eoln(m|CL_BRACK); }
else if (look == '[')
{
buf_one();
if (lbrack(m|CL_BRACK) == ERROR_EOF_S)
return ERROR_EOF_S; // already cleaned up.
else put_in_buf(']');
}
else if (look == '\'' || look == '"')
{
char c = look;
buf_one();
(void) quote(c, NONE_S, m|CL_BRACK);
put_in_buf(c);
}
else if (look == '/') // maybe a comment
{
eat_one();
if (look == '/')
line_comment();
else if (look == '*')
{
block_comment(m|CL_BRACK);
if (look == EOF) return terminate(ERROR_EOF_S);
}
else // stash the '/' and the char after
{ put_in_buf('/'); buf_one(); }
}
else // just a character to save
buf_one();
}
eat_one(); // eat the ']'.
return terminate(LBRACK_S);
}
void Clex::
block_comment(Clex_mode m)
{
eat_one(); // eat the '*'
while (! (look == '*' && (eat_one(), look == '/')) )
{
if (look == EOF) return;
if (look == '\n') { eat_one(); eoln(m|CL_COMMENT); }
else if (look != '*') eat_one();
}
eat_one(); // eat the '/'
}
void Clex::
line_comment()
{
do { eat_one(); }
while (look != '\n' && look != EOF);
}
// eat_return() is intended to save space in Clex::next() -- the
// inline function eat_one() produces quite a lot of code.
Clex_sym Clex::
eat_return(Clex_sym s)
{ eat_one(); return s; }
Clex_sym Clex::
next()
{
short val;
while (val = look, eat_one(), val != EOF)
{
char ch = char(val);
switch (ch)
{
case ' ' : continue;
case '_' :
case '$' : return ident(ch);
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
return num(ch);
case ',' : return COMMA_S;
case ';' : return SEMI_S;
case '[' : if (block_brack) return lbrack(CL_NONE);
else return LBRACK_S;
case ']' : return RBRACK_S;
case '{' : return LBRACE_S;
case '}' : return RBRACE_S;
case '(' : return LPAR_S;
case ')' : return RPAR_S;
case '~' : return TILDE_S;
case '?' : return QUEST_S;
case '"' : return quote(ch, QUOTE_S, CL_NONE);
case '\'': return quote(ch, APOS_S, CL_NONE);
case '=' : // '=', '=='
if (look != '=') return AS_S;
else return eat_return(EQ_S);
case ':' : // ":", "::"
if (look != ':') return COLON_S;
else return eat_return(SCOPE_S);
case '!' : // "!", "!="
if (look != '=') return BANG_S;
else return eat_return(NE_S);
case '^' : // "^", "^="
if (look != '=') return CARET_S;
else return eat_return(XORAS_S);
case '*' : // '*', '*='
if (look != '=') return STAR_S;
else return eat_return(MULAS_S);
case '%' : // '%', '%='
if (look != '=') return MOD_S;
else return eat_return(MODAS_S);
case '|' : // "|=", "||", "|"
if (look == '|') return eat_return(LOR_S);
else if (look == '=') return eat_return(ORAS_S);
else return VBAR_S;
case '&' : // "&", "&=", "&&"
if (look == '&') return eat_return(LAND_S);
else if (look == '=') return eat_return(ANDAS_S);
else return AMPER_S;
case '+' : // '+', '++', '+='
if (look == '+') return eat_return(INCRE_S);
else if (look == '=') return eat_return(ADDAS_S);
else return PLUS_S;
case '-' : // '--', '-=', '->', '-',
if (look == '-') return eat_return(DECRE_S);
else if (look == '=') return eat_return(SUBAS_S);
else if (look == '>') return eat_return(DEREF_S);
else return MINUS_S;
case '/' : // '/*', '//', '/=', '/'
if (look == '*')
{
block_comment(CL_NONE);
if (look == EOF) // almost certainly a mistake:
return ERROR_EOF_S;
else continue;
}
else if (look == '/')
{ line_comment(); continue; }
else if (look == '=') return eat_return(DIVAS_S);
else return SLASH_S;
case '.' : // ".", "..."
if (isdigit(look)) return num(ch);
else if (look == '.')
{
eat_one(); // check for "..", undefined.
if (look != '.') return ERROR_UNKN_S;
else return eat_return(ELLIP_S);
}
else return DOT_S;
case '<' : // '<=', '<', '<<', '<<='
if (look == '=') return eat_return(LE_S);
else if (look == '<')
{
eat_one();
if (look != '=') return SHL_S;
else return eat_return(SHLAS_S);
}
else return LT_S;
case '>' : // '>=', '>', '>>', '>>='
if (look == '=') return eat_return(GE_S);
else if (look == '>')
{
eat_one();
if (look != '=') return SHR_S;
else return eat_return(SHRAS_S);
}
else return GT_S;
default:
if (isalpha(ch))
return ident(ch);
if (ch == '\n')
eoln(CL_NONE);
else if (iscntrl(ch))
continue;
else
return ERROR_UNKN_S;
}
}
return EOF_S;
}
struct Quickbuf
{
short len;
char line[10240];
void put_in(char c) { if (len < sizeof(line)-1) line[len++] = c; }
void terminate() { line[len] = '\0'; }
Quickbuf() { len = 0; }
};
void Clex::
eoln(Clex_mode m)
{
// assume NL character already eaten.
++line_num;
// don't process '#' lines in quotes, comments, or '#' continuations.
if (m & (CL_QUOTE|CL_POUND|CL_COMMENT))
return;
// eat whitespace
while (look != EOF && look != '\n')
{
if (look == ' ' || iscntrl(char(look))) eat_one();
else break;
}
if (look != '#')
return;
// eat the '#' and subsequent whitespace
do { eat_one(); if (look == EOF || look == '\n') break; }
while (look == ' ' || iscntrl(char(look)));
// collect the '#' line
Quickbuf b;
do { // record line
if (look == '\\') // check for continuation line
{
eat_one();
if (look == '\n') { eat_one(); eoln(m|CL_POUND); }
else { b.put_in('\\'); }
}
else if (look == '/') // check for comment in '#' line
{
eat_one();
if (look == '*')
{
block_comment(m|CL_POUND);
if (look == EOF) break;
}
else if (look == '/') line_comment();
else { b.put_in('/'); }
}
else
{
if (iscntrl(char(look))) look = ' ';
b.put_in(look);
eat_one();
}
} while (look != '\n' && look != EOF);
b.terminate();
(void) pound(m, b.line, b.len); // call virtual handler
}
Boolean Clex::
pound (Clex_mode m, char* line, short len)
{
void(m); // to keep cfront blissful
char* cp = line;
if (!isdigit(*cp))
{
if (len < 5) return FALSE;
if (strncmp(cp, "line ", 5) != 0)
return FALSE; // don't know what it is
cp += 4;
while (*cp == ' ') ++cp;
if (!isdigit(*cp))
return FALSE;
}
// # <line> "<filename>" or #line <line> "<filename>"
line_num = atoi(cp) - 1; // will be incremented by eoln() later
while (isdigit(*cp)) ++cp;
while (*cp == ' ') ++cp;
if (*cp == '"')
{
char* cpq = cp;
do { ++cpq; }
while (*cpq != '"' && *cpq != '\0');
strncpy(filename, cp+1, cpq - cp - 1);
filename[cpq - cp - 1] = '\0';
}
return TRUE;
}
const char* Clex::
debug (Clex_sym s)
{
return (s >= KEYWORD_S) ? keywords[s - KEYWORD_S] : sym_str[s] ;
}