|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T l
Length: 16488 (0x4068) Types: TextFile Names: »lexi.c«
└─⟦a05ed705a⟧ Bits:30007078 DKUUG GNU 2/12/89 └─⟦df1e72981⟧ »./indent-1.1.tar.Z« └─⟦d08ad2b83⟧ └─⟦this⟧ »dist-indent/lexi.c«
/* * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980 The Regents of the University of California. * Copyright (c) 1976 Board of Trustees of the University of Illinois. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that the above copyright notice and this paragraph are * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed * by the University of California, Berkeley, the University of Illinois, * Urbana, and Sun Microsystems, Inc. The name of either University * or Sun Microsystems may not be used to endorse or promote products * derived from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint static char sccsid[] = "@(#)lexi.c 5.11 (Berkeley) 9/15/88"; #endif /* not lint */ /* * Here we have the token scanner for indent. It scans off one token and puts * it in the global variable "token". It returns a code, indicating the type * of token scanned. */ #include "indent_globs.h" #include <ctype.h> #define alphanum 1 #define opchar 3 enum rwcodes { rw_none, rw_break, rw_switch, rw_case, rw_struct_like, /* struct, enum, union */ rw_decl, rw_sp_paren, /* if, while, for */ rw_sp_nparen, /* do, else */ rw_sizeof }; struct templ { char *rwd; enum rwcodes rwcode; }; struct templ *user_specials = 0; unsigned int user_specials_max, user_specials_idx; char chartype[128] = { /* this is used to facilitate the decision of * what type (alphanumeric, operator) each * character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, 0, 0, 3, 3, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 0, 3, 0 }; /* The generated perfect hash function functions that recognize the reserved words. C code produced by gperf version 1.8.1 (GNU C++ version) Command-line: gperf -c -p -t -T -g -j1 -o -K rwd -N is_reserved indent.gperf */ #define MIN_WORD_LENGTH 2 #define MAX_WORD_LENGTH 8 #define MIN_HASH_VALUE 4 #define MAX_HASH_VALUE 40 /* 29 keywords 37 is the maximum key range */ #ifdef __GNUC__ inline #endif static int hash (str, len) register char *str; register int len; { static unsigned char hash_table[] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 24, 9, 10, 0, 15, 20, 6, 8, 40, 0, 0, 19, 1, 16, 40, 40, 1, 0, 0, 12, 21, 4, 40, 40, 40, 40, 40, 40, 40, 40, }; return len + hash_table[str[len - 1]] + hash_table[str[0]]; } #ifdef __GNUC__ inline #endif struct templ* is_reserved (str, len) register char *str; register int len; { static struct templ wordlist[] = { {"",}, {"",}, {"",}, {"",}, {"else", rw_sp_nparen,}, {"short", rw_decl,}, {"struct", rw_struct_like,}, {"extern", rw_decl,}, {"return", rw_break,}, {"while", rw_sp_paren,}, {"register", rw_decl,}, {"int", rw_decl,}, {"switch", rw_switch,}, {"case", rw_case,}, {"char", rw_decl,}, {"static", rw_decl,}, {"double", rw_decl,}, {"default", rw_case,}, {"union", rw_struct_like,}, {"for", rw_sp_paren,}, {"float", rw_decl,}, {"sizeof", rw_sizeof,}, {"typedef", rw_decl,}, {"enum", rw_struct_like,}, {"long", rw_decl,}, {"if", rw_sp_paren,}, {"global", rw_decl,}, {"va_dcl", rw_decl,}, {"do", rw_sp_nparen,}, {"break", rw_break,}, {"unsigned", rw_decl,}, {"",}, {"",}, {"",}, {"",}, {"void", rw_decl,}, {"",}, {"",}, {"",}, {"",}, {"goto", rw_break,}, }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = hash (str, len); if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE) { register char *s = wordlist[key].rwd; if (*s == *str && !strncmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } return 0; } enum codes lexi() { /* used to walk through the token */ char *tok; int unary_delim; /* this is set to 1 if the current token * * forces a following operator to be unary */ static enum codes last_code; /* the last token type returned */ static int l_struct; /* set to 1 if the last token was 'struct' */ enum codes code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ unary_delim = false; parser_state_tos->col_1 = parser_state_tos->last_nl; /* tell world that this token started in * column 1 iff the last thing scanned was nl */ parser_state_tos->last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ parser_state_tos->col_1 = false; /* leading blanks imply token is not in column * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } token = buf_ptr; /* Scan an alphanumeric token */ if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { /* * we have a character or number */ register char *j; /* used for searching thru list of * * reserved words */ register struct templ *p; if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { int seendot = 0, seenexp = 0; if (*buf_ptr == '0' && (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { buf_ptr += 2; while (isxdigit(*buf_ptr)) buf_ptr++; } else while (1) { if (*buf_ptr == '.') if (seendot) break; else seendot++; buf_ptr++; if (!isdigit(*buf_ptr) && *buf_ptr != '.') if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) break; else { seenexp++; seendot++; buf_ptr++; if (*buf_ptr == '+' || *buf_ptr == '-') buf_ptr++; } } if (*buf_ptr == 'L' || *buf_ptr == 'l') buf_ptr++; } else while (chartype[*buf_ptr] == alphanum) { /* copy it over */ buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } token_end = buf_ptr; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) fill_buffer(); } parser_state_tos->its_a_keyword = false; parser_state_tos->sizeof_keyword = false; if (l_struct) { /* if last token was 'struct', then this token * should be treated as a declaration */ l_struct = false; last_code = ident; parser_state_tos->last_u_d = true; return (decl); } parser_state_tos->last_u_d = false; /* Operator after indentifier is binary */ last_code = ident; /* Remember that this is the code we will * return */ /* Check whether the token is a reserved word. Use perfect hashing... */ p = is_reserved (token, token_end - token); if (p) { /* we have a keyword */ found_keyword: parser_state_tos->its_a_keyword = true; parser_state_tos->last_u_d = true; switch (p->rwcode) { case rw_switch: /* it is a switch */ return (swstmt); case rw_case: /* a case or default */ return (casestmt); case rw_struct_like: /* a "struct" */ if (parser_state_tos->p_l_follow) break; /* inside parens: cast */ l_struct = true; /* * Next time around, we will want to know that we have had a * 'struct' */ case rw_decl: /* one of the declaration keywords */ if (parser_state_tos->p_l_follow) { parser_state_tos->cast_mask |= 1 << parser_state_tos->p_l_follow; break; /* inside parens: cast */ } last_code = decl; return (decl); case rw_sp_paren: /* if, while, for */ return (sp_paren); case rw_sp_nparen: /* do, else */ return (sp_nparen); case rw_sizeof: parser_state_tos->sizeof_keyword = true; default: /* all others are treated like any other * identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ if (*buf_ptr == '(' && parser_state_tos->tos <= 1 && parser_state_tos->ind_level == 0) { register char *tp = buf_ptr; while (tp < buf_end) if (*tp++ == ')' && (*tp == ';' || *tp == ',')) goto not_proc; parser_state_tos->procname = token; parser_state_tos->procname_end = token_end; parser_state_tos->in_parameter_declaration = 1; not_proc:; } /* * The following hack attempts to guess whether or not the current * token is in fact a declaration keyword -- one that has been * typedefd */ if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') && !parser_state_tos->p_l_follow && !parser_state_tos->block_init && (parser_state_tos->last_token == rparen || parser_state_tos->last_token == semicolon || parser_state_tos->last_token == decl || parser_state_tos->last_token == lbrace || parser_state_tos->last_token == rbrace)) { parser_state_tos->its_a_keyword = true; parser_state_tos->last_u_d = true; last_code = decl; return decl; } if (last_code == decl) /* if this is a declared variable, then * following sign is unary */ parser_state_tos->last_u_d = true; /* will make "int a -1" work */ last_code = ident; return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ /* Scan a non-alphanumeric token */ /* If it is not a one character token, token_end will get changed later. */ token_end = buf_ptr + 1; if (++buf_ptr >= buf_end) fill_buffer(); switch (*token) { case '\n': unary_delim = parser_state_tos->last_u_d; parser_state_tos->last_nl = true; /* remember that we just had a newline */ code = (had_eof ? code_eof : newline); /* * if data has been exausted, the newline is a dummy, and we should * return code to stop */ break; case '\'': /* start of quoted character */ case '"': /* start of string */ qchar = *token; /* Find out how big the literal is so we can set token_end. */ /* Invariant: before loop test buf_ptr points to the next */ /* character that we have not yet checked. */ while (*buf_ptr != qchar && *buf_ptr != 0 && *buf_ptr != '\n') { if (*buf_ptr == '\\') { buf_ptr++; if (buf_ptr >= buf_end) fill_buffer (); if (*buf_ptr == '\n') ++line_no; if (*buf_ptr == 0) break; } buf_ptr++; if (buf_ptr >= buf_end) fill_buffer (); } if (*buf_ptr == '\n' || *buf_ptr == 0) { diag (1, qchar == '\'' ? "Unterminated character constant" : "Unterminated string constant" ); } else { /* Advance over end quote char. */ buf_ptr++; if (buf_ptr >= buf_end) fill_buffer (); } code = ident; break; case ('('): case ('['): unary_delim = true; code = lparen; break; case (')'): case (']'): code = rparen; break; case '#': unary_delim = parser_state_tos->last_u_d; code = preesc; break; case '?': unary_delim = true; code = question; break; case (':'): code = colon; unary_delim = true; break; case (';'): unary_delim = true; code = semicolon; break; case ('{'): unary_delim = true; /* This check is made in the code for '='. No one who writes initializers without '=' these days deserves to have indent work on their code (besides which, uncommenting this would screw up anything which assumes that parser_state_tos->block_init really means you are in an initializer. */ /* * if (parser_state_tos->in_or_st) parser_state_tos->block_init = 1; */ /* The following neat hack causes the braces in structure initializations to be treated as parentheses, thus causing initializations to line up correctly, e.g. struct foo bar = {{a, b, c}, {1, 2}}; If lparen is returned, token can be used to distinguish between '{' and '(' where necessary. */ code = parser_state_tos->block_init ? lparen : lbrace; break; case ('}'): unary_delim = true; /* The following neat hack is explained under '{' above. */ code = parser_state_tos->block_init ? rparen : rbrace; break; case 014: /* a form feed */ unary_delim = parser_state_tos->last_u_d; parser_state_tos->last_nl = true; /* remember this so we can set 'parser_state_tos->col_1' * right */ code = form_feed; break; case (','): unary_delim = true; code = comma; break; case '.': unary_delim = false; code = period; break; case '-': case '+': /* check for -, +, --, ++ */ code = (parser_state_tos->last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ buf_ptr++; /* buffer overflow will be checked at end of loop */ if (last_code == ident || last_code == rparen) { code = (parser_state_tos->last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } } else if (*buf_ptr == '=') /* check for operator += */ buf_ptr++; else if (*buf_ptr == '>') { /* check for operator -> */ buf_ptr++; if (!pointer_as_binop) { unary_delim = false; code = unary_op; parser_state_tos->want_blank = false; } } break; /* buffer overflow will be checked at end of * switch */ case '=': if (parser_state_tos->in_or_st) parser_state_tos->block_init = 1; if (*buf_ptr == '=') /* == */ buf_ptr++; code = binary_op; unary_delim = true; break; /* can drop thru!!! */ case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { if (++buf_ptr >= buf_end) fill_buffer(); } code = (parser_state_tos->last_u_d ? unary_op : binary_op); unary_delim = true; break; default: if (token[0] == '/' && *buf_ptr == '*') { /* it is start of comment */ if (++buf_ptr >= buf_end) fill_buffer(); code = comment; unary_delim = parser_state_tos->last_u_d; break; } while (*(buf_ptr - 1) == *buf_ptr || *buf_ptr == '=') { /* * handle ||, &&, etc, and also things as in int *****i */ if (++buf_ptr >= buf_end) fill_buffer(); } code = (parser_state_tos->last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ if (code != newline) { l_struct = false; last_code = code; } token_end = buf_ptr; if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer(); parser_state_tos->last_u_d = unary_delim; return (code); } /* * Add the given keyword to the keyword table, using val as the keyword type */ addkey(key, val) char *key; enum rwcodes val; { register struct templ *p; /* Check to see whether key is a reserved word or not. */ if (is_reserved (key, strlen (key)) != 0) return; if (user_specials == 0) { user_specials = (struct templ *) xmalloc (5 * sizeof (struct templ)); if (user_specials == 0) { fputs ("indent: out of memory\n", stderr); exit (1); } user_specials_max = 5; user_specials_idx = 0; } else if (user_specials_idx == user_specials_max) { user_specials_max += 5; user_specials = (struct templ *) xrealloc ((char *) user_specials, user_specials_max * sizeof (struct templ)); } p = &user_specials[user_specials_idx++]; p->rwd = key; p->rwcode = val; p[1].rwd = 0; p[1].rwcode = rw_none; return; }