⟦3daac1e69⟧

TextFile

/* Copyright 1988 Stephan v. Bechtolsheim */

/* This file is part of the TeXPS Software Package.

The TeXPS Software Package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY.  No author or distributor
accepts responsibility to anyone for the consequences of using it
or for whether it serves any particular purpose or works at all,
unless he says so in writing.  Refer to the TeXPS Software Package
General Public License for full details.

Everyone is granted permission to copy, modify and redistribute
the TeXPS Software Package, but only under the conditions described in the
TeXPS Software Package General Public License.   A copy of this license is
supposed to have been given to you along with TeXPS Software Package so you
can know your rights and responsibilities.  It should be in a
file named CopyrightLong.  Among other things, the copyright notice
and this notice must be preserved on all copies.  */

#include <stdio.h>
#include <ctype.h>
#include "release.h"
#include "extfil.h"

/* This program reads in a token definition file (.tdef) to generate
   output which can be used as partial input to yacc and lex
   to recognize keywords. */

/* Externals. */
extern char * StrcpyAlloc();
extern char * StrncpyAlloc();
extern char * optarg;
extern int optind;
extern char * tmpdir_default;

/* Definitions. */
#define V_VERBOSE 2
#define V_REGULAR 1
#define V_QUIET   0

#define VERSION "1.2"
#define DATE "May 9, 1990"

#define TRUE 1
#define FALSE 0

/* Forward declarations. */
void Usage();

/* Global variables. */

/* Input token definintion file. */
EX_FILES Ex_TokensIn; 

/* Output of .h file goes here. */
EX_FILES Ex_HOut;

/* Output for building the lexical analyzer goes here. */
EX_FILES Ex_LOut;

/* Name of the function which is generated when the -C option is used.
   The name "PrintToken" specified here can be changed using the -N option. */
char *PrintTokenFunctionName = "PrintToken";

/* Output for building a C program which prints the tokens in
   a "symbolic" way. Use the -C option of this program. */
EX_FILES Ex_COut;

/* Output for the token definitions used in yacc. Use the -Y option of this
   program. */
EX_FILES Ex_YOut;

/* Output for my sgmllex.c program. */
EX_FILES Ex_Sgtok;

/* Verbose level is any number between 0 [quiet] and 2 [verbose] */
int Verbose = 1;

/* Program name */
char *ProgName;

/* TokenCounter. Start at 1, because token zero has a special meaning.
   It is used by lex to signal the end of the input. */
int TokenCounter = 1;

/* Keywords are only matched at the beginning of a line in lex? Default
   is no, see -b option though. */
int LexBeginningOfLineOnly = FALSE;

/* Keywords in angle brackets (SGML)? Default is no, see -a option. */
int LexKeywordsSgmlType = FALSE;

/* Keywords for lex are usually lower case. Use the -c option to preserve
   case sensitivity. */
int LexKeywordsLowerCase = TRUE;

/*
 * Main
 * ****
 */
main(argc, argv)
     int argc;
     char *argv[];
{
  char line[256];
  char word[256];
  char word2[256];
  char *token_string_org;
  char *token_string;
  char *token_string_l;
  char *token_string_yacc;
  char *token_string_yacc_1;
  char *token_string_yacc_2;
  int i;
  int c;
  int linenumber = 0;

  ProgName = argv[0];
  InitProgName (ProgName, tmpdir_default);

  FExInit();
  InitSignalHandling();

  /* Usage of the program is as follows */
  if (argc == 1) {
    Usage();
    exit(1);
  }

  /* Initialize all the file pointers. */
  FExOpen(&Ex_TokensIn, EFT_INIT, NULL, NULL, NULL);
  FExOpen(&Ex_HOut,  EFT_INIT, NULL, NULL, NULL);
  FExOpen(&Ex_LOut,  EFT_INIT, NULL, NULL, NULL);
  FExOpen(&Ex_COut,  EFT_INIT, NULL, NULL, NULL);
  FExOpen(&Ex_YOut,  EFT_INIT, NULL, NULL, NULL);
  FExOpen(&Ex_Sgtok, EFT_INIT, NULL, NULL, NULL);

  /* Option Processing. */
  while ((c=getopt(argc, argv, "abcqvC:H:L:N:S:Y:")) != EOF) {
    switch (c) {
      /* -a: The lexical analyzer will read in the keywords and enclose them
	 in angle brackets so they are recognized in an SGML like style. */
      case 'a':
        LexKeywordsSgmlType = TRUE;
        break;

      /* -b: Keywords are now only matched at the beginning of a line, not
	 anywhere else in the text. */
      case 'b':
        LexBeginningOfLineOnly = TRUE;
	break;

      /* -c: Keywords are case sensitive now. By default this program maps
	 all keywords to lower case (regardless of how they were entered.) */
      case 'c':
        LexKeywordsLowerCase = FALSE;
	break;

      /* -q: quiet. */
      case 'q':
        Verbose = V_QUIET;
	break;

      /* -v: verbose. */
      case 'v':
	Verbose = V_VERBOSE;
	break;

      /* -C file[.c]: generate a C program which given a token value prints the
	 token in a symbolic represenation. This C program must be compiled and
	 linked to the source which wants to print tokens. The name of the C
	 procedure generated here is by defailt PrintTokenFunctionName but can
	 be changed with the -N option, if necessary. */
      case 'C':
	FExOpen(&Ex_COut, EFT_WRITE, EFQ_NO_STDOUT, optarg, "c");
	fprintf(EX_FP(Ex_COut), "/* This file is generated by program \"%s\". */\n", ProgName);
	fprintf(EX_FP(Ex_COut), "/* DO NOT CHANGE, change the input file to \"%s\" instead. */\n\n", ProgName);
	fprintf(EX_FP(Ex_COut), "#include <stdio.h>\n");
	fprintf(EX_FP(Ex_COut), "void %s(fp, t)\nFILE *fp;\n int t;\n{\n", PrintTokenFunctionName);
	fprintf(EX_FP(Ex_COut), "switch(t) {\n");
	fprintf(EX_FP(Ex_COut), "\tcase  0: fprintf(fp, \"<<end of lex>>\"); break;\n");
	break;

      /* -H file[.h]: generate a .h file with all the definitions of
	 tokens in it. This file can be used by the lexical analyzer to return proper
	 values. */
      case 'H':
	FExOpen(&Ex_HOut, EFT_WRITE, EFQ_NO_STDOUT, optarg, "h");
	fprintf(EX_FP(Ex_HOut), "/* File generated by program \"%s\". */\n", ProgName);
	fprintf(EX_FP(Ex_HOut), "/* DO NOT CHANGE, change the input file to \"%s\" instead. */\n\n", ProgName);
	break;

      /* -L file[.lex]: generate a (partial) .lex file with all the definitions for
	 reading in the keywords. */
      case 'L':
	FExOpen(&Ex_LOut, EFT_WRITE, EFQ_NO_STDOUT, optarg, "lex");
	break;

      /* -N name: print token function name. This option is used to overwrite
	 the default used by '-C'. */
      case 'N':
	if (Ex_COut.ef_open)
	  Fatal ("main(): -N option: -C option must be used AFTER -N option.");
	PrintTokenFunctionName = StrcpyAlloc(optarg);
	break;

      /* -Y file[.y]: generate a .y file (for yacc) with "%token name number"
	 definitions. */
      case 'Y':
	FExOpen(&Ex_YOut, EFT_WRITE, EFQ_NO_STDOUT, optarg, "y");
	fprintf(EX_FP(Ex_YOut), "/* File generated by program \"%s\". */\n", ProgName);
	fprintf(EX_FP(Ex_YOut), "/* DO NOT CHANGE, change the input file to \"%s\" instead. */\n\n", ProgName);
	break;

      /* -S file[.sgtok]: generate a .sgtok file (for sgmllex). */
      case 'S':
	FExOpen(&Ex_Sgtok, EFT_WRITE, EFQ_NO_STDOUT, optarg, "sgtok");
	break;

      case '?':
	Fatal ("main(): Illegal option.");
	break;

      default:
	Fatal ("main(): defaults.");
      } /* switch */
  } /* while */

  /* If no output file is specified all we can do is parse the file. */
  if (!Ex_HOut.ef_open && !Ex_HOut.ef_open &&
      !Ex_LOut.ef_open && !Ex_COut.ef_open && !Ex_YOut.ef_open &&
      !Ex_Sgtok.ef_open)
    fprintf (stderr, "main(): no output file open, parse input file only\n.");

  /* Open token definitions file. The following also explains the
     structure of the input of this file. */
  FExOpen(&Ex_TokensIn, EFT_READ, NULL, argv[optind], "tdef");

  /* Now read in this file on a line by line basis */
  while (ReadLineIntoBuffer(EX_FP(Ex_TokensIn), line, 256) != EOF) {
    linenumber++;
    if (line[0] == '%') /* Skip comment lines. */
      continue;
    if (Strlen(line) == 0)
      continue; /* Skip empty lines. */

#ifdef DEBUG
    printf ("%s\n", line); /* Print the line for debugging purposes. */
#endif
    /* First in the line comes the token itself. Pick up this token. */
    for (i = 0; line[i] != ' ' && line[i] != '\t' && line[i] != '\0'; i++)
      ;
    token_string_org = StrncpyAlloc(line, i);

    if (Ex_HOut.ef_open) /* Write #defines into a .h file. */
      fprintf (EX_FP(Ex_HOut), "#define %-20s %d\n",
	       token_string_org, TokenCounter);

    if (Ex_COut.ef_open) /* Write the C program which prints the tokens. */
      fprintf(EX_FP(Ex_COut), "\tcase %2d: fprintf(fp, \"%s\"); break;\n",
	      TokenCounter, token_string_org);

    /* The second "field", if available, contains information which is used
       by yacc (type information). */
    if (Ex_YOut.ef_open) { /* Write the stuff for yacc. */
      token_string_yacc = StrcpyAlloc(line);
      RemoveLeadingWhiteSpace(token_string_yacc);
      RemoveTrailingWhiteSpace(token_string_yacc);
      for (i=0;
	   *(token_string_yacc+i) != ' ' &&
	   *(token_string_yacc+i) != '\t' &&
	   *(token_string_yacc+i) != '\0'; i++)
	word[i] = *(token_string_yacc+i);
      word[i] = '\0';
#ifdef DEBUG
      fprintf (stderr, "WORD: \"%s\", \"%s\" (%d)\n", word, token_string_yacc, Strlen(word));
#endif      
      RemoveInLine(token_string_yacc, Strlen(word));
#ifdef DEBUG
      fprintf (stderr, "NOW: %s\n", token_string_yacc);
#endif
      RemoveLeadingWhiteSpace(token_string_yacc);

      if (Strlen(token_string_yacc) == 0)
	fprintf (EX_FP(Ex_YOut), "%%token %-20s %2d\n",
		 word, TokenCounter);
      else
	fprintf (EX_FP(Ex_YOut), "%%token <%s> %-20s %2d\n",
		 token_string_yacc, word, TokenCounter);
    }

    /* Is it a keyword type of token? Yes, if it starts with K_ */
    if (IsBeginningOfLine(line, "K_")) {

      /* Pick up the keyword! */
      for (i=0; line[i] != ' ' && line[i] != '\t' && line[i] != '\0'; i++)
	;

      /* Generate the keyword, also in a lower case version only. */
      token_string =   StrncpyAlloc(line, i);
      token_string_l = StrncpyAlloc(line, i);
      for (i=0; token_string_l[i] != '\0'; i++)
	if (isupper(token_string_l[i]))
	  token_string_l[i] = tolower(token_string_l[i]);
#ifdef DEBUG
      printf ("K_ type of token: \"%s\" / \"%s\"\n", token_string, token_string_l);
#endif
      /* Write the keyword parsing stuff for lex. Take into account that if
	 the -b option is used then keywords are only matched at the
	 beginning of a line. Also take the <...> into consideration. */
      if (Ex_LOut.ef_open)
	fprintf (EX_FP(Ex_LOut), "%s\"%s%s%s\" {return(%d);}\n",
		 LexBeginningOfLineOnly ? "^":"",
		 LexKeywordsSgmlType ? "<":"",
		 LexKeywordsLowerCase? token_string_l:token_string,
		 LexKeywordsSgmlType ? ">":"",
		 TokenCounter);

      if (Ex_Sgtok.ef_open) { /* sgmlex token definitions. */
	if (Strcmp (token_string_l+Strlen(token_string_l)-2, "_e") == 0) {
	  token_string_l[Strlen(token_string_l)-2] = '\0';
	  fprintf(EX_FP(Ex_Sgtok), "/%-19s %2d\n", token_string_l, TokenCounter);
	} else
	  fprintf(EX_FP(Ex_Sgtok), "%-20s %2d\n", token_string_l, TokenCounter);
      }

      /* S_ type of token ? Special keyword token (already programmed). */
    } else if (IsBeginningOfLine(line, "S_")) {
    }

    /* T_ type of token ? Ordinary token. */
    else if (IsBeginningOfLine(line, "T_")) {
    }

    /* Any other is an error. */
    else
      Fatal3 ("main(): token does not start with\n\tK_, T_ or S_; token is \"%s\", line %d",
	      line, linenumber);
    TokenCounter ++;
  } /* while loop reading the token definitions file. */

  /* Close the C program which prints the tokens, that is close the switch
     statement and the procedure itself. */
  if (Ex_COut.ef_open) {
    fprintf(EX_FP(Ex_COut), "\tdefault: Fatal2 (\"%s(): illegal token, #%%d\", t);\n",
	    PrintTokenFunctionName);
    fprintf(EX_FP(Ex_COut), "}}\n");
  }

  FExClose(&Ex_TokensIn); /* Close all other files now. */
  FExClose(&Ex_HOut);
  FExClose(&Ex_LOut);
  FExClose(&Ex_COut);
  FExClose(&Ex_YOut);
  FExClose(&Ex_Sgtok);
  exit (0);
}

/* Usage text */
char *UsageText[] = {
  "\t-a (keywords enclosed in angle brackets <...>, SGML style)",
  "\t-b (keywords are matched at the begining of a line only by lex)",
  "\t-c (keywords are by default mapped to lower case, -c: preserve case sensitivity)",
  "\t[-N Function name] (define function name for printing token C program.)",
  "\t[-C file[.c]] (Output file for a C-program printing the tokens)",
  "\t[-L file[.lex]] (Output file for lex)",
  "\t[-H file[.h]] (.h file for all tokens)",
  "\t[-S file[.sgtok]] (.sgtok SGML token definitions / sgmllex)",
  "\t[-Y file[.y]] (Output file for yacc token definitions)",
  "\t[-q] (quiet)      [-v] (verbose)",
  "\tfile[.tdef]... (or '-' for stdin)",
  NULL};

/*
 * Usage
 * *****
 * Print usage message.
 */
void
Usage()
{
  char **ptr;

  fprintf (stderr, "%s:  Version %s of %s\n", ProgName, VERSION, DATE);
  fprintf (stderr, "%s: TeXPS version %s of %s\n", ProgName, RELEASE_VERSION, RELEASE_DATE);
  fprintf (stderr, "usage: %s\n", ProgName);
  ptr = &UsageText[0];
  while (*ptr != NULL)
    fprintf (stderr, "%s\n", *ptr++);
}
DataMuseum.dk

DKUUG/EUUG Conference tapes

⟦3daac1e69⟧ TextFile

Derivation

TextFile