|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T d
Length: 11857 (0x2e51) Types: TextFile Names: »detex.l«
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12 └─⟦c319c2751⟧ »unix3.0/TeX3.0.tar.Z« └─⟦036c765ac⟧ └─⟦this⟧ »TeX3.0/TeXcontrib/trinkle/detex.l« └─⟦060c9c824⟧ Bits:30007080 DKUUG TeX 2/12/89 └─⟦this⟧ »./tex82/TeXcontrib/trinkle/detex.l« └─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12 └─⟦63303ae94⟧ »unix3.14/TeX3.14.tar.Z« └─⟦c58930e5c⟧ └─⟦this⟧ »TeX3.14/TeXcontrib/trinkle/detex.l«
%{ static char rcsid[] = "$Header: /u1/src/local/bin/tex/tools/detex/RCS/detex.l,v 2.3 86/10/23 16:29:27 trinkle Exp $"; /* * detex [-w] [-l] [-e environment-list] [file[.tex]] * * This program is used to remove TeX or LaTeX constructs from a text * file. * * Written by: * Daniel Trinkle * Department of Computer Science * Purdue University * */ #include "detex.h" #include <strings.h> #include <sys/param.h> #define LaBEGIN if (fLatex) BEGIN #define IGNORE if (!fWord) putchar(' ') #define NEWLINE if (!fWord) putchar('\n') char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */ char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */ char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */ char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */ char *sbProgName; /* name we were invoked with */ FILE *rgfp[NOFILE+1]; /* stack of input/include files */ int cfp = 0; /* count of files in stack */ int csbEnvIgnore; /* count of environments ignored */ int csbIncList = 0; /* count of includeonly files */ int csbInputPaths; /* count of input paths */ int fLatex = 0; /* flag to indicated delatex */ int fWord = 0; /* flag for -w option */ %} S [ \t\n]* W [a-zA-Z]+ %Start Def Display IncOnly Input Math Normal %Start LaBegin LaDisplay LaEnd LaEnv LaForm LaInc LaMacro %% <Normal>"%".* /* ignore comments */ ; <Normal>"\\begin"{S}"{"{S}"document"{S}"}" {fLatex = 1; IGNORE;} <Normal>"\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;} <LaBegin>{W} { if (BeginEnv(yytext)) BEGIN LaEnv; else BEGIN LaMacro; IGNORE; } <LaBegin>"\n" NEWLINE; <LaBegin>. ; <LaEnv>"\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;} <LaEnv>"\n" NEWLINE; <LaEnv>. ; <LaEnd>{W} /* end environment */ { if (EndEnv(yytext)) BEGIN Normal; IGNORE; } <LaEnd>"}" {BEGIN LaEnv; IGNORE;} <LaEnd>"\n" NEWLINE; <LaEnd>. ; <Normal>"\\bibitem" /* ignore args */ {LaBEGIN LaMacro; IGNORE;} <Normal>"\\bibliography" /* of these \cs */ {LaBEGIN LaMacro; IGNORE;} <Normal>"\\bibstyle" {LaBEGIN LaMacro; IGNORE;} <Normal>"\\cite" {LaBEGIN LaMacro; IGNORE;} <Normal>"\\documentstyle" {LaBEGIN LaMacro; IGNORE;} <Normal>"\\end" {LaBEGIN LaMacro; IGNORE;} <Normal>"\\label" {LaBEGIN LaMacro; IGNORE;} <Normal>"\\pageref" {LaBEGIN LaMacro; IGNORE;} <Normal>"\\ref" {LaBEGIN LaMacro; IGNORE;} <LaMacro>"}" BEGIN Normal; <LaMacro>"\n" NEWLINE; <LaMacro>. ; <Normal>"\\def" /* ignore def begin */ {BEGIN Def; IGNORE;} <Def>"{" BEGIN Normal; <Def>"\n" NEWLINE; <Def>. ; <Normal>"\\(" /* formula mode */ {LaBEGIN LaForm; IGNORE;} <LaForm>"\\)" BEGIN Normal; <LaForm>"\n" NEWLINE; <LaForm>. ; <Normal>"\\[" /* display mode */ {LaBEGIN LaDisplay; IGNORE;} <LaDisplay>"\\]" BEGIN Normal; <LaDisplay>"\n" NEWLINE; <LaDisplay>. ; <Normal>"$$" /* display mode */ {BEGIN Display; IGNORE;} <Display>"$$" BEGIN Normal; <Display>"\n" NEWLINE; <Display>. ; <Normal>"$" /* math mode */ {BEGIN Math; IGNORE;} <Math>"$" BEGIN Normal; <Math>"\n" NEWLINE; <Math>. ; <Normal>"\\include" /* process files */ {LaBEGIN LaInc; IGNORE;} <LaInc>[^{ \t\n}]+ { IncludeFile(yytext); BEGIN Normal; } <LaInc>"\n" NEWLINE; <LaInc>. ; <Normal>"\\includeonly" {BEGIN IncOnly; IGNORE;} <IncOnly>[^{ \t,\n}]+ AddInclude(yytext); <IncOnly>"}" { if (csbIncList == 0) rgsbIncList[csbIncList++] = NULL; BEGIN Normal; } <IncOnly>"\n" NEWLINE; <IncOnly>. ; <Normal>"\\input" {BEGIN Input; IGNORE;} <Input>[^{ \t\n}]+ { InputFile(yytext); BEGIN Normal; } <Input>"\n" NEWLINE; <Input>. ; <Normal>\\[a-zA-Z@]+ /* ignore other \cs */ IGNORE; <Normal>\\. IGNORE; <Normal>\\[a-zA-Z@][a-zA-Z@0-9]*['=`][^ \t\n]* IGNORE; <Normal>[{}\\|~] /* special characters */ IGNORE; <Normal>[!?]"`" IGNORE; <Normal>{W}[']*{W} { if (fWord) printf("%s\n", yytext); else ECHO; } <Normal>[0-9]+ if (!fWord) ECHO; <Normal>(.|\n) if (!fWord) ECHO; %% /****** ** main -- ** Set sbProgName to the base of arg 0. ** Set the input paths. ** Check for options ** -w word only output ** -l force latex mode ** -e <env-list> list of LaTeX environments to ignore ** Set the list of LaTeX environments to ignore. ** Process each input file. ** If no input files are specified on the command line, process stdin. ******/ main(cArgs,rgsbArgs) int cArgs; char *rgsbArgs[]; { char *pch, *sbEnvList = DEFAULTENV, sbBadOpt[2]; FILE *TexOpen(); int fSawFile = 0, iArgs = 1; /* get base name and decide what we are doing, detex or delatex */ if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL) sbProgName++; else sbProgName = rgsbArgs[0]; /* set rgsbInputPaths for use with TexOpen() */ SetInputPaths(); /* process command line options */ while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) { while (*++pch) switch (*pch) { case CHENVOPT: sbEnvList = rgsbArgs[++iArgs]; break; case CHLATEXOPT: fLatex = 1; break; case CHWORDOPT: fWord = 1; break; default: sbBadOpt[0] = *pch; sbBadOpt[1] = '\0'; Warning("unknown option ignored -", sbBadOpt); } iArgs++; } SetEnvIgnore(sbEnvList); /* process input files */ for (; iArgs < cArgs; iArgs++) { fSawFile++; if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) { Warning("can't open file", rgsbArgs[iArgs]); continue;; } BEGIN Normal; yylex(); } /* if there were no input files, assume stdin */ if (!fSawFile) { yyin = stdin; BEGIN Normal; yylex(); } if (YYSTATE != Normal) ErrorExit("input contains an unterminated mode or environment"); exit(0); } /****** ** yywrap -- handles EOF for lex. Check to see if the stack of open files ** has anything on it. If it does, set yyin to the to value. If not ** return the termination signal for lex. ******/ yywrap() { fclose(yyin); if (cfp > 0) { yyin = rgfp[--cfp]; return(0); } return(1); } /****** ** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the ** sbEnvList. ******/ SetEnvIgnore(sbEnvList) char *sbEnvList; { csbEnvIgnore = SeparateList(sbEnvList, rgsbEnvIgnore, CHENVSEP, MAXENVS); if (csbEnvIgnore == ERROR) ErrorExit("The environtment list contains too many environments"); } /****** ** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it ** is, sbCurrentEnv is set to sbEnv. ******/ BeginEnv(sbEnv) char *sbEnv; { int i; if (!fLatex) return(0); for (i = 0; i < csbEnvIgnore; i++) if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) { strcpy(sbCurrentEnv, sbEnv); return(1); } return(0); } /****** ** EndEnv -- checks to see if sbEnv is the current environment being ignored. ******/ EndEnv(sbEnv) char *sbEnv; { char *sb; if (!fLatex) return(0); if (strcmp(sbEnv, sbCurrentEnv) == 0) return(1); return(0); } /****** ** InputFile -- push the current yyin and open sbFile. If the open fails, ** the sbFile is ignored. ******/ InputFile(sbFile) char *sbFile; { FILE *TexOpen(); rgfp[cfp++] = yyin; if ((yyin = TexOpen(sbFile)) == NULL) { Warning("can't open \\input file", sbFile); yyin = rgfp[--cfp]; } } /****** ** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin ** and open sbFile. If the open fails, the sbFile is ignored. ******/ IncludeFile(sbFile) char *sbFile; { FILE *TexOpen(); if (!InList(sbFile)) return(0); rgfp[cfp++] = yyin; if ((yyin = TexOpen(sbFile, "r")) == NULL) { Warning("can't open \\include file", sbFile); yyin = rgfp[--cfp]; } } /****** ** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList. ** If the include list is too long, sbFile is ignored. ******/ AddInclude(sbFile) char *sbFile; { if (csbIncList >= MAXINCLIST) Warning("\\includeonly list is too long, ignoring", sbFile); rgsbIncList[csbIncList] = (char *)malloc(strlen(sbFile) + 1); strcpy(rgsbIncList[csbIncList++], sbFile); } /****** ** InList -- checks to see if sbFile is in the rgsbIncList. If there is ** no list, all files are assumed to be "in the list". ******/ InList(sbFile) char *sbFile; { char *pch, sbBase[MAXPATHLEN]; int i; if (csbIncList == 0) /* no list */ return(1); strcpy(sbBase, sbFile); if ((pch = rindex(sbBase, '.')) != NULL) *pch = '\0'; i = 0; while ((i < csbIncList) && rgsbIncList[i]) if (strcmp(rgsbIncList[i++], sbBase) == 0) return(1); return(0); } /****** ** SetInputPaths -- sets rgsbInputPaths to the values indicated by the ** TEXINPUTS environment variable if set or else DEFAULTINPUTS. ******/ SetInputPaths() { char *sb, *sbPaths, *getenv(); if ((sb = getenv("TEXINPUTS")) == NULL) sbPaths = DEFAULTINPUTS; else { sbPaths = (char *)malloc(strlen(sb) + 1); strcpy(sbPaths, sb); } csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS); if (csbInputPaths == ERROR) ErrorExit("TEXINPUTS environment variable has too many paths"); } /****** ** SeparateList -- takes a chSep separated list sbList, replaces the ** chSep's with NULLs and sets rgsbList[i] to the beginning of ** the ith word in sbList. The number of words is returned. A ** ERROR is returned if there are more than csbMax words. ******/ SeparateList(sbList, rgsbList, chSep, csbMax) char *sbList, *rgsbList[], chSep; int csbMax; { char *pch; int csbList = 0; while (sbList && *sbList && csbList < csbMax) { rgsbList[csbList++] = sbList; if (sbList = index(sbList, chSep)) *sbList++ = NULL; } return(sbList && *sbList ? ERROR : csbList); } /****** ** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn. ** For each input path the following order is used: ** file.tex - must be as named, if not there go to the next path ** file.ext - random extension, try it ** file - base name, add .tex and try it ** file - try it as is ** Notice that if file exists in the first path and file.tex exists in ** one of the other paths, file in the first path is what is opened. ** If the sbFile begins with a '/', no paths are searched. ******/ FILE * TexOpen(sbFile) char *sbFile; { char *pch, *sbNew; FILE *fp; int iPath; static char sbFullPath[MAXPATHLEN]; for (iPath = 0; iPath < csbInputPaths; iPath++) { if (*sbFile == '/') { /* absolute path */ sprintf(sbFullPath, "%s", sbFile); iPath = csbInputPaths; /* only check once */ } else sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile); /* If sbFile ends in .tex then it must be there */ if ((pch = rindex(sbFullPath, '.')) != NULL && (strcmp(pch, ".tex") == 0)) if ((fp = fopen(sbFullPath, "r")) != NULL) return(fp); else continue; /* if .<ext> then try to open it */ if (pch != NULL && (fp = fopen(sbFullPath, "r")) != NULL) return(fp); /* just base name, add .tex to the name */ sbNew = (char *)malloc(strlen(sbFullPath) + 5); strcpy(sbNew, sbFullPath); strcat(sbNew, ".tex"); if ((fp = fopen(sbNew, "r")) != NULL) return(fp); /* try sbFile regardless */ if ((fp = fopen(sbFullPath, "r")) != NULL) return(fp); } return((FILE *)NULL); } /****** ** Warning -- print a warning message preceded by the program name. ******/ Warning(sb1, sb2) char *sb1, *sb2; { fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2); } /****** ** ErrorExit -- print an error message preceded by the program name. ** Stdout is flushed and detex exits. ******/ ErrorExit(sb1) char *sb1; { (void)fflush(stdout); fprintf(stderr, "%s: error: %s\n", sbProgName, sb1); exit(1); }