⟦a0b22a382⟧

TextFile

/*
 * Copyright (c) 1985 Sun Microsystems, Inc.
 * Copyright (c) 1980 The Regents of the University of California.
 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that the above copyright notice and this paragraph are
 * duplicated in all such forms and that any documentation,
 * advertising materials, and other materials related to such
 * distribution and use acknowledge that the software was developed
 * by the University of California, Berkeley, the University of Illinois,
 * Urbana, and Sun Microsystems, Inc.  The name of either University
 * or Sun Microsystems may not be used to endorse or promote products
 * derived from this software without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

#ifndef lint
static char sccsid[] = "@(#)lexi.c	5.11 (Berkeley) 9/15/88";
#endif /* not lint */

/*
 * Here we have the token scanner for indent.  It scans off one token and puts
 * it in the global variable "token".  It returns a code, indicating the type
 * of token scanned.
 */

#include "indent_globs.h"
#include <ctype.h>

#define alphanum 1
#define opchar 3

enum rwcodes {
  rw_none,
  rw_break,
  rw_switch,
  rw_case,
  rw_struct_like, /* struct, enum, union */
  rw_decl,
  rw_sp_paren, /* if, while, for */
  rw_sp_nparen, /* do, else */
  rw_sizeof
  };

struct templ {
    char       *rwd;
    enum rwcodes rwcode;
};

struct templ *user_specials = 0;
unsigned int user_specials_max, user_specials_idx;

char        chartype[128] =
{				/* this is used to facilitate the decision of
				 * what type (alphanumeric, operator) each
				 * character is */
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 3, 0, 0, 1, 3, 3, 0,
    0, 0, 3, 3, 0, 3, 0, 3,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 0, 0, 3, 3, 3, 3,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 0, 0, 3, 1,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 3, 0, 3, 0
};

/* The generated perfect hash function functions that recognize the reserved words. 
   C code produced by gperf version 1.8.1 (GNU C++ version) 
   Command-line: gperf -c -p -t -T -g -j1 -o -K rwd -N is_reserved indent.gperf  */

#define MIN_WORD_LENGTH 2
#define MAX_WORD_LENGTH 8
#define MIN_HASH_VALUE 4
#define MAX_HASH_VALUE 40
/*
   29 keywords
   37 is the maximum key range
*/

#ifdef __GNUC__
inline
#endif
static int
hash (str, len)
     register char  *str;
     register int  len;
{
  static unsigned char hash_table[] =
    {
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
     40, 40, 40, 40, 40, 40, 40, 40, 24,  9,
     10,  0, 15, 20,  6,  8, 40,  0,  0, 19,
      1, 16, 40, 40,  1,  0,  0, 12, 21,  4,
     40, 40, 40, 40, 40, 40, 40, 40,
    };
  return len + hash_table[str[len - 1]] + hash_table[str[0]];
}

#ifdef __GNUC__
inline
#endif
struct templ*
is_reserved  (str, len)
     register char *str;
     register int len;
{

  static struct templ wordlist[] =
    {
      {"",}, {"",}, {"",}, {"",}, 
      {"else",  rw_sp_nparen,},
      {"short",  rw_decl,},
      {"struct",  rw_struct_like,},
      {"extern",  rw_decl,},
      {"return",  rw_break,},
      {"while",  rw_sp_paren,},
      {"register",  rw_decl,},
      {"int",  rw_decl,},
      {"switch",  rw_switch,},
      {"case",  rw_case,},
      {"char",  rw_decl,},
      {"static",  rw_decl,},
      {"double",  rw_decl,},
      {"default",  rw_case,},
      {"union",  rw_struct_like,},
      {"for",  rw_sp_paren,},
      {"float",  rw_decl,},
      {"sizeof",  rw_sizeof,},
      {"typedef",  rw_decl,},
      {"enum",  rw_struct_like,},
      {"long",  rw_decl,},
      {"if",  rw_sp_paren,},
      {"global",  rw_decl,},
      {"va_dcl",  rw_decl,},
      {"do",  rw_sp_nparen,},
      {"break",  rw_break,},
      {"unsigned",  rw_decl,},
      {"",}, {"",}, {"",}, {"",}, 
      {"void",  rw_decl,},
      {"",}, {"",}, {"",}, {"",}, 
      {"goto",  rw_break,},
    };

  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
    {
      register int key = hash (str, len);

      if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
        {
          register char *s = wordlist[key].rwd;

          if (*s == *str && !strncmp (str + 1, s + 1, len - 1))
            return &wordlist[key];
        }
    }
  return 0;
}

enum codes
lexi()
{
    /* used to walk through the token */
    char *tok;
    
    int         unary_delim;	/* this is set to 1 if the current token
				 * 
				 * forces a following operator to be unary */
    static enum codes last_code;	/* the last token type returned */
    static int  l_struct;	/* set to 1 if the last token was 'struct' */
    enum codes  code;		/* internal code to be returned */
    char        qchar;		/* the delimiter character for a string */

    unary_delim = false;
    parser_state_tos->col_1 = parser_state_tos->last_nl;	/* tell world that this token started in
				 * column 1 iff the last thing scanned was nl */
    parser_state_tos->last_nl = false;

    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	parser_state_tos->col_1 = false;	/* leading blanks imply token is not in column
				 * 1 */
	if (++buf_ptr >= buf_end)
	    fill_buffer();
    }

    token = buf_ptr;

    /* Scan an alphanumeric token */
    if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
	/*
	 * we have a character or number
	 */
	register char *j;	/* used for searching thru list of
				 * 
				 * reserved words */
	register struct templ *p;

	if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
	    int         seendot = 0,
	                seenexp = 0;
	    if (*buf_ptr == '0' &&
		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
	        buf_ptr += 2;
		while (isxdigit(*buf_ptr))
		    buf_ptr++;
	    }
	    else
		while (1) {
		    if (*buf_ptr == '.')
			if (seendot)
			    break;
			else
			    seendot++;
		    buf_ptr++;
		    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
			    break;
			else {
			    seenexp++;
			    seendot++;
			    buf_ptr++;
			    if (*buf_ptr == '+' || *buf_ptr == '-')
				buf_ptr++;
			}
		}
	    if (*buf_ptr == 'L' || *buf_ptr == 'l')
		buf_ptr++;
	}
	else
	    while (chartype[*buf_ptr] == alphanum) {	/* copy it over */
		buf_ptr++;
		if (buf_ptr >= buf_end)
		    fill_buffer();
	    }
	token_end = buf_ptr;
	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	parser_state_tos->its_a_keyword = false;
	parser_state_tos->sizeof_keyword = false;
	if (l_struct) {		/* if last token was 'struct', then this token
				 * should be treated as a declaration */
	    l_struct = false;
	    last_code = ident;
	    parser_state_tos->last_u_d = true;
	    return (decl);
	}
	parser_state_tos->last_u_d = false;	/* Operator after indentifier is binary */
	last_code = ident;	/* Remember that this is the code we will
				 * return */

	/* Check whether the token is a reserved word.  Use perfect hashing... */
        p = is_reserved (token, token_end - token);

	if (p) {		/* we have a keyword */
    found_keyword:
	    parser_state_tos->its_a_keyword = true;
	    parser_state_tos->last_u_d = true;
	    switch (p->rwcode) {
	    case rw_switch:		/* it is a switch */
		return (swstmt);
	    case rw_case:		/* a case or default */
		return (casestmt);

	    case rw_struct_like:		/* a "struct" */
		if (parser_state_tos->p_l_follow)
		    break;	/* inside parens: cast */
		l_struct = true;

		/*
		 * Next time around, we will want to know that we have had a
		 * 'struct'
		 */
	    case rw_decl:		/* one of the declaration keywords */
		if (parser_state_tos->p_l_follow) {
		    parser_state_tos->cast_mask |= 1 << parser_state_tos->p_l_follow;
		    break;	/* inside parens: cast */
		}
		last_code = decl;
		return (decl);

	    case rw_sp_paren:		/* if, while, for */
		return (sp_paren);

	    case rw_sp_nparen:		/* do, else */
		return (sp_nparen);

	    case rw_sizeof:
		parser_state_tos->sizeof_keyword = true;
	    default:		/* all others are treated like any other
				 * identifier */
		return (ident);
	    }			/* end of switch */
	}			/* end of if (found_it) */
	if (*buf_ptr == '(' && parser_state_tos->tos <= 1 && parser_state_tos->ind_level == 0) {
	    register char *tp = buf_ptr;
	    while (tp < buf_end)
		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
		    goto not_proc;
	    parser_state_tos->procname = token;
	    parser_state_tos->procname_end = token_end;
	    parser_state_tos->in_parameter_declaration = 1;
    not_proc:;
	}
	/*
	 * The following hack attempts to guess whether or not the current
	 * token is in fact a declaration keyword -- one that has been
	 * typedefd
	 */
	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
		&& !parser_state_tos->p_l_follow
	        && !parser_state_tos->block_init
		&& (parser_state_tos->last_token == rparen || parser_state_tos->last_token == semicolon ||
		    parser_state_tos->last_token == decl ||
		    parser_state_tos->last_token == lbrace || parser_state_tos->last_token == rbrace)) {
	    parser_state_tos->its_a_keyword = true;
	    parser_state_tos->last_u_d = true;
	    last_code = decl;
	    return decl;
	}
	if (last_code == decl)	/* if this is a declared variable, then
				 * following sign is unary */
	    parser_state_tos->last_u_d = true;	/* will make "int a -1" work */
	last_code = ident;
	return (ident);		/* the ident is not in the list */
    }				/* end of procesing for alpanum character */
    /* Scan a non-alphanumeric token */

    /* If it is not a one character token, token_end will get changed
       later.  */
    token_end = buf_ptr + 1;

    if (++buf_ptr >= buf_end)
	fill_buffer();

    switch (*token) {
    case '\n':
	unary_delim = parser_state_tos->last_u_d;
	parser_state_tos->last_nl = true;	/* remember that we just had a newline */
	code = (had_eof ? code_eof : newline);

	/*
	 * if data has been exausted, the newline is a dummy, and we should
	 * return code to stop
	 */
	break;

    case '\'':			/* start of quoted character */
    case '"':			/* start of string */
	qchar = *token;

	/* Find out how big the literal is so we can set token_end.  */
	
	/* Invariant:  before loop test buf_ptr points to the next */
	/* character that we have not yet checked. */
	while (*buf_ptr != qchar && *buf_ptr != 0 && *buf_ptr != '\n')
	  {
	    if (*buf_ptr == '\\')
	      {
		buf_ptr++;
		if (buf_ptr >= buf_end)
		  fill_buffer ();
		if (*buf_ptr == '\n')
		  ++line_no;
		if (*buf_ptr == 0)
		  break;
	      }
	    buf_ptr++;
	    if (buf_ptr >= buf_end)
	      fill_buffer ();
	  }
	if (*buf_ptr == '\n' || *buf_ptr == 0)
	  {
	    diag (1,
		  qchar == '\''
		    ? "Unterminated character constant"
		    : "Unterminated string constant"
		 );
	  }
	else
	  {
	    /* Advance over end quote char.  */
	    buf_ptr++;
	    if (buf_ptr >= buf_end)
	      fill_buffer ();
	  }

	code = ident;
	break;

    case ('('):
    case ('['):
	unary_delim = true;
	code = lparen;
	break;

    case (')'):
    case (']'):
	code = rparen;
	break;

    case '#':
	unary_delim = parser_state_tos->last_u_d;
	code = preesc;
	break;

    case '?':
	unary_delim = true;
	code = question;
	break;

    case (':'):
	code = colon;
	unary_delim = true;
	break;

    case (';'):
	unary_delim = true;
	code = semicolon;
	break;

    case ('{'):
	unary_delim = true;

	/* This check is made in the code for '='.  No one who writes
	   initializers without '=' these days deserves to have indent
	   work on their code (besides which, uncommenting this would
	   screw up anything which assumes that parser_state_tos->block_init really
	   means you are in an initializer.  */
	/*
	 * if (parser_state_tos->in_or_st) parser_state_tos->block_init = 1;
	 */

	/* The following neat hack causes the braces in structure
	   initializations to be treated as parentheses, thus causing
	   initializations to line up correctly, e.g.
	   struct foo bar =
	   {{a,
	     b,
	     c},
	    {1,
	     2}};
	   If lparen is returned, token can be used to distinguish
	   between '{' and '(' where necessary.  */

	code = parser_state_tos->block_init ? lparen : lbrace;
	break;

    case ('}'):
	unary_delim = true;
	/* The following neat hack is explained under '{' above.  */
	code = parser_state_tos->block_init ? rparen : rbrace;

	break;

    case 014:			/* a form feed */
	unary_delim = parser_state_tos->last_u_d;
	parser_state_tos->last_nl = true;	/* remember this so we can set 'parser_state_tos->col_1'
				 * right */
	code = form_feed;
	break;

    case (','):
	unary_delim = true;
	code = comma;
	break;

    case '.':
	unary_delim = false;
	code = period;
	break;

    case '-':
    case '+':			/* check for -, +, --, ++ */
	code = (parser_state_tos->last_u_d ? unary_op : binary_op);
	unary_delim = true;

	if (*buf_ptr == token[0]) {
	    /* check for doubled character */
	    buf_ptr++;
	    /* buffer overflow will be checked at end of loop */
	    if (last_code == ident || last_code == rparen) {
		code = (parser_state_tos->last_u_d ? unary_op : postop);
		/* check for following ++ or -- */
		unary_delim = false;
	    }
	}
	else if (*buf_ptr == '=')
	    /* check for operator += */
	    buf_ptr++;
	else if (*buf_ptr == '>') {
	    /* check for operator -> */
	    buf_ptr++;
	    if (!pointer_as_binop) {
		unary_delim = false;
		code = unary_op;
		parser_state_tos->want_blank = false;
	    }
	}
	break;			/* buffer overflow will be checked at end of
				 * switch */

    case '=':
	if (parser_state_tos->in_or_st)
	    parser_state_tos->block_init = 1;

	if (*buf_ptr == '=') /* == */
	    buf_ptr++;

	code = binary_op;
	unary_delim = true;
	break;
	/* can drop thru!!! */

    case '>':
    case '<':
    case '!':			/* ops like <, <<, <=, !=, etc */
	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}

	code = (parser_state_tos->last_u_d ? unary_op : binary_op);
	unary_delim = true;
	break;

    default:
	if (token[0] == '/' && *buf_ptr == '*') {
	    /* it is start of comment */

	    if (++buf_ptr >= buf_end)
		fill_buffer();

	    code = comment;
	    unary_delim = parser_state_tos->last_u_d;
	    break;
	}
	while (*(buf_ptr - 1) == *buf_ptr || *buf_ptr == '=') {
	    /*
	     * handle ||, &&, etc, and also things as in int *****i
	     */
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	code = (parser_state_tos->last_u_d ? unary_op : binary_op);
	unary_delim = true;


    }				/* end of switch */
    if (code != newline) {
	l_struct = false;
	last_code = code;
    }
    token_end = buf_ptr;
    if (buf_ptr >= buf_end)	/* check for input buffer empty */
	fill_buffer();
    parser_state_tos->last_u_d = unary_delim;

    return (code);
}

/*
 * Add the given keyword to the keyword table, using val as the keyword type
 */
addkey(key, val)
    char       *key;
     enum rwcodes val;
{
    register struct templ *p;

    /* Check to see whether key is a reserved word or not. */
    if (is_reserved (key, strlen (key)) != 0)
      return;

    if (user_specials == 0)
      {
	user_specials = (struct templ *) xmalloc (5 * sizeof (struct templ));
	if (user_specials == 0)
	  {
	    fputs ("indent: out of memory\n", stderr);
	    exit (1);
	  }
	user_specials_max = 5;
	user_specials_idx = 0;
      }
    else if (user_specials_idx == user_specials_max)
      {
	user_specials_max += 5;
	user_specials = (struct templ *) xrealloc ((char *) user_specials,
						  user_specials_max
						  * sizeof (struct templ));
      }
    p = &user_specials[user_specials_idx++];

    p->rwd = key;
    p->rwcode = val;
    p[1].rwd = 0;
    p[1].rwcode = rw_none;
    return;
}
DataMuseum.dk

DKUUG/EUUG Conference tapes

⟦a0b22a382⟧ TextFile

Derivation

TextFile