|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T d
Length: 13145 (0x3359) Types: TextFile Names: »detex.l«
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12 └─⟦af57ea0c3⟧ »utils/detex-2.3.tar« └─⟦this⟧ »./detex.l«
%{ #ifndef lint static char rcsid[] = "$Header: /usr/src/local/bin/detex/RCS/detex.l,v 2.8 1990/09/07 20:48:32 trinkle Exp trinkle $"; #endif /* * detex [-e environment-list] [-c] [-l] [-n] [-s] [-w] [file[.tex]] * * This program is used to remove TeX or LaTeX constructs from a text * file. * * Written by: * Daniel Trinkle * Department of Computer Science * Purdue University * */ #include "detex.h" #include <strings.h> #include <sys/param.h> #define LaBEGIN if (fLatex) BEGIN #define CITEBEGIN if (!fLatex || !fCite) BEGIN #define IGNORE if (fSpace && !fWord) putchar(' ') #define SPACE if (!fWord) putchar(' ') #define NEWLINE if (!fWord) putchar('\n') char *malloc(); char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */ char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */ char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */ char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */ char *sbProgName; /* name we were invoked with */ FILE *rgfp[NOFILE+1]; /* stack of input/include files */ int cfp = 0; /* count of files in stack */ int csbEnvIgnore; /* count of environments ignored */ int csbIncList = 0; /* count of includeonly files */ int csbInputPaths; /* count of input paths */ int fLatex = 0; /* flag to indicated delatex */ int fWord = 0; /* flag for -w option */ int fFollow = 1; /* flag to follow input/include */ int fCite = 0; /* flag to echo \cite and \ref args */ int fSpace = 0; /* flag to replace \cs with space */ %} S [ \t\n]* W [a-zA-Z]+ %Start Def Disp IncOnly Input Math Nrm Ctl %Start LaBegin LaDisp LaEnd LaEnv LaForm LaInc LaMacro LaVerb %% <Nrm>"%".* /* ignore comments */ ; <Nrm>"\\begin"{S}"{"{S}"document"{S}"}" {fLatex = 1; IGNORE;} <Nrm>"\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;} <LaBegin>{S}"{"{S}"verbatim"{S}"}" { if (BeginEnv("verbatim")) BEGIN LaEnv; else BEGIN LaVerb; IGNORE; } <LaVerb>"\\end"{S}"{"{S}"verbatim"{S}"}" /* verbatim mode */ {BEGIN Nrm; IGNORE;} <LaVerb>. ECHO; <LaBegin>{W} { if (BeginEnv(yytext)) BEGIN LaEnv; else BEGIN LaMacro; IGNORE; } <LaBegin>"\n" NEWLINE; <LaBegin>. ; <LaEnv>"\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;} <LaEnv>"\n" NEWLINE; <LaEnv>. ; <LaEnd>{W} /* end environment */ { if (EndEnv(yytext)) BEGIN Nrm; IGNORE; } <LaEnd>"}" {BEGIN LaEnv; IGNORE;} <LaEnd>"\n" NEWLINE; <LaEnd>. ; <Nrm>"\\bibitem" /* ignore args */ {LaBEGIN LaMacro; IGNORE;} <Nrm>"\\bibliography" /* of these \cs */ {LaBEGIN LaMacro; IGNORE;} <Nrm>"\\bibstyle" {LaBEGIN LaMacro; IGNORE;} <Nrm>"\\cite" {CITEBEGIN LaMacro; IGNORE;} <Nrm>"\\documentstyle" {LaBEGIN LaMacro; IGNORE;} <Nrm>"\\end" {LaBEGIN LaMacro; IGNORE;} <Nrm>"\\index" {LaBEGIN LaMacro; SPACE;} <Nrm>"\\label" {LaBEGIN LaMacro; IGNORE;} <Nrm>"\\pageref" {CITEBEGIN LaMacro; IGNORE;} <Nrm>"\\ref" {CITEBEGIN LaMacro; IGNORE;} <LaMacro>"}" BEGIN Nrm; <LaMacro>"\n" NEWLINE; <LaMacro>. ; <Nrm>"\\def" /* ignore def begin */ {BEGIN Def; IGNORE;} <Def>"{" BEGIN Nrm; <Def>"\n" NEWLINE; <Def>. ; <Nrm>"\\(" /* formula mode */ {LaBEGIN LaForm; IGNORE;} <LaForm>"\\)" BEGIN Nrm; <LaForm>"\n" NEWLINE; <LaForm>. ; <Nrm>"\\[" /* display mode */ {LaBEGIN LaDisp; IGNORE;} <LaDisp>"\\]" BEGIN Nrm; <LaDisp>"\n" NEWLINE; <LaDisp>. ; <Nrm>"$$" /* display mode */ {BEGIN Disp; IGNORE;} <Disp>"$$" BEGIN Nrm; <Disp>"\n" NEWLINE; <Disp>. ; <Nrm>"$" /* math mode */ {BEGIN Math; IGNORE;} <Math>"$" BEGIN Nrm; <Math>"\n" NEWLINE; <Math>"\\$" ; <Math>. ; <Nrm>"\\include" /* process files */ {LaBEGIN LaInc; IGNORE;} <LaInc>[^{ \t\n}]+ { IncludeFile(yytext); BEGIN Nrm; } <LaInc>"\n" NEWLINE; <LaInc>. ; <Nrm>"\\includeonly" {BEGIN IncOnly; IGNORE;} <IncOnly>[^{ \t,\n}]+ AddInclude(yytext); <IncOnly>"}" { if (csbIncList == 0) rgsbIncList[csbIncList++] = NULL; BEGIN Nrm; } <IncOnly>"\n" NEWLINE; <IncOnly>. ; <Nrm>"\\input" {BEGIN Input; IGNORE;} <Input>[^{ \t\n}]+ { InputFile(yytext); BEGIN Nrm; } <Input>"\n" NEWLINE; <Input>. ; <Nrm>\\[a-zA-Z@]+ /* ignore other \cs */ {BEGIN Ctl; IGNORE;} <Nrm>"\\ " SPACE; <Nrm>\\. IGNORE; <Ctl>\\[a-zA-Z@]+ IGNORE; <Ctl>[a-zA-Z@0-9]*[-'=`][^ \t\n{]* IGNORE; <Ctl>"\n" {BEGIN Nrm; NEWLINE;} <Ctl>[ \t{]* {BEGIN Nrm; IGNORE;} <Ctl>. {yyless(0);BEGIN Nrm;} <Nrm>[{}\\|] /* special characters */ IGNORE; <Nrm>[!?]"`" IGNORE; <Nrm>~ SPACE; <Nrm>{W}[']*{W} { if (fWord) printf("%s\n", yytext); else ECHO; } <Nrm>[0-9]+ if (!fWord) ECHO; <Nrm>(.|\n) if (!fWord) ECHO; %% /****** ** main -- ** Set sbProgName to the base of arg 0. ** Set the input paths. ** Check for options ** -c echo LaTeX \cite, \ref, and \pageref values ** -e <env-list> list of LaTeX environments to ignore ** -l force latex mode ** -n do not follow \input and \include ** -w word only output ** Set the list of LaTeX environments to ignore. ** Process each input file. ** If no input files are specified on the command line, process stdin. ******/ main(cArgs,rgsbArgs) int cArgs; char *rgsbArgs[]; { char *pch, *sbEnvList = DEFAULTENV, sbBadOpt[2]; FILE *TexOpen(); int fSawFile = 0, iArgs = 1; /* get base name and decide what we are doing, detex or delatex */ if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL) sbProgName++; else sbProgName = rgsbArgs[0]; /* set rgsbInputPaths for use with TexOpen() */ SetInputPaths(); /* process command line options */ while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) { while (*++pch) switch (*pch) { case CHCITEOPT: fCite = 1; break; case CHENVOPT: sbEnvList = rgsbArgs[++iArgs]; break; case CHLATEXOPT: fLatex = 1; break; case CHNOFOLLOWOPT: fFollow = 0; break; case CHSPACEOPT: fSpace = 1; break; case CHWORDOPT: fWord = 1; break; default: sbBadOpt[0] = *pch; sbBadOpt[1] = '\0'; Warning("unknown option ignored -", sbBadOpt); } iArgs++; } SetEnvIgnore(sbEnvList); /* process input files */ for (; iArgs < cArgs; iArgs++) { fSawFile++; if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) { Warning("can't open file", rgsbArgs[iArgs]); continue;; } BEGIN Nrm; (void)yylex(); } /* if there were no input files, assume stdin */ if (!fSawFile) { yyin = stdin; BEGIN Nrm; (void)yylex(); } if (YYSTATE != Nrm) ErrorExit("input contains an unterminated mode or environment"); exit(0); } /****** ** yywrap -- handles EOF for lex. Check to see if the stack of open files ** has anything on it. If it does, set yyin to the to value. If not ** return the termination signal for lex. ******/ yywrap() { (void)fclose(yyin); if (cfp > 0) { yyin = rgfp[--cfp]; return(0); } return(1); } /****** ** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the ** sbEnvList. ******/ SetEnvIgnore(sbEnvList) char *sbEnvList; { csbEnvIgnore = SeparateList(sbEnvList, rgsbEnvIgnore, CHENVSEP, MAXENVS); if (csbEnvIgnore == ERROR) ErrorExit("The environtment list contains too many environments"); } /****** ** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it ** is, sbCurrentEnv is set to sbEnv. ******/ BeginEnv(sbEnv) char *sbEnv; { int i; if (!fLatex) return(0); for (i = 0; i < csbEnvIgnore; i++) if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) { (void)strcpy(sbCurrentEnv, sbEnv); return(1); } return(0); } /****** ** EndEnv -- checks to see if sbEnv is the current environment being ignored. ******/ EndEnv(sbEnv) char *sbEnv; { if (!fLatex) return(0); if (strcmp(sbEnv, sbCurrentEnv) == 0) return(1); return(0); } /****** ** InputFile -- push the current yyin and open sbFile. If the open fails, ** the sbFile is ignored. ******/ InputFile(sbFile) char *sbFile; { FILE *TexOpen(); if (!fFollow) return; rgfp[cfp++] = yyin; if ((yyin = TexOpen(sbFile)) == NULL) { Warning("can't open \\input file", sbFile); yyin = rgfp[--cfp]; } } /****** ** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin ** and open sbFile. If the open fails, the sbFile is ignored. ******/ IncludeFile(sbFile) char *sbFile; { FILE *TexOpen(); if (!fFollow) return; if (!InList(sbFile)) return; rgfp[cfp++] = yyin; if ((yyin = TexOpen(sbFile)) == NULL) { Warning("can't open \\include file", sbFile); yyin = rgfp[--cfp]; } } /****** ** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList. ** If the include list is too long, sbFile is ignored. ******/ AddInclude(sbFile) char *sbFile; { if (!fFollow) return; if (csbIncList >= MAXINCLIST) Warning("\\includeonly list is too long, ignoring", sbFile); rgsbIncList[csbIncList] = malloc((unsigned)(strlen(sbFile) + 1)); (void)strcpy(rgsbIncList[csbIncList++], sbFile); } /****** ** InList -- checks to see if sbFile is in the rgsbIncList. If there is ** no list, all files are assumed to be "in the list". ******/ InList(sbFile) char *sbFile; { char *pch, sbBase[MAXPATHLEN]; int i; if (csbIncList == 0) /* no list */ return(1); (void)strcpy(sbBase, sbFile); if ((pch = rindex(sbBase, '.')) != NULL) *pch = '\0'; i = 0; while ((i < csbIncList) && rgsbIncList[i]) if (strcmp(rgsbIncList[i++], sbBase) == 0) return(1); return(0); } /****** ** SetInputPaths -- sets rgsbInputPaths to the values indicated by the ** TEXINPUTS environment variable if set or else DEFAULTINPUTS. ******/ SetInputPaths() { char *sb, *sbPaths, *getenv(); if ((sb = getenv("TEXINPUTS")) == NULL) sbPaths = DEFAULTINPUTS; else { sbPaths = malloc((unsigned)(strlen(sb) + 1)); (void)strcpy(sbPaths, sb); } csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS); if (csbInputPaths == ERROR) ErrorExit("TEXINPUTS environment variable has too many paths"); } /****** ** SeparateList -- takes a chSep separated list sbList, replaces the ** chSep's with NULLs and sets rgsbList[i] to the beginning of ** the ith word in sbList. The number of words is returned. A ** ERROR is returned if there are more than csbMax words. ******/ SeparateList(sbList, rgsbList, chSep, csbMax) char *sbList, *rgsbList[], chSep; int csbMax; { int csbList = 0; while (sbList && *sbList && csbList < csbMax) { rgsbList[csbList++] = sbList; if (sbList = index(sbList, chSep)) *sbList++ = NULL; } return(sbList && *sbList ? ERROR : csbList); } /****** ** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn. ** For each input path the following order is used: ** file.tex - must be as named, if not there go to the next path ** file.ext - random extension, try it ** file - base name, add .tex and try it ** file - try it as is ** Notice that if file exists in the first path and file.tex exists in ** one of the other paths, file in the first path is what is opened. ** If the sbFile begins with a '/', no paths are searched. ******/ FILE * TexOpen(sbFile) char *sbFile; { char *pch, *sbNew; FILE *fp; int iPath; static char sbFullPath[MAXPATHLEN]; for (iPath = 0; iPath < csbInputPaths; iPath++) { if (*sbFile == '/') { /* absolute path */ (void)sprintf(sbFullPath, "%s", sbFile); iPath = csbInputPaths; /* only check once */ } else (void)sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile); /* If sbFile ends in .tex then it must be there */ if ((pch = rindex(sbFullPath, '.')) != NULL && (strcmp(pch, ".tex") == 0)) if ((fp = fopen(sbFullPath, "r")) != NULL) return(fp); else continue; /* if .<ext> then try to open it. the '.' represents */ /* the beginning of an extension if it is not the first */ /* character and it does not follow a '.' or a '/' */ if (pch != NULL && pch > &(sbFullPath[0]) && *(pch - 1) != '.' && *(pch - 1) != '/' && (fp = fopen(sbFullPath, "r")) != NULL) return(fp); /* just base name, add .tex to the name */ sbNew = malloc((unsigned)(strlen(sbFullPath) + 5)); (void)strcpy(sbNew, sbFullPath); (void)strcat(sbNew, ".tex"); if ((fp = fopen(sbNew, "r")) != NULL) return(fp); /* try sbFile regardless */ if ((fp = fopen(sbFullPath, "r")) != NULL) return(fp); } return((FILE *)NULL); } /****** ** Warning -- print a warning message preceded by the program name. ******/ Warning(sb1, sb2) char *sb1, *sb2; { fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2); } /****** ** ErrorExit -- print an error message preceded by the program name. ** Stdout is flushed and detex exits. ******/ ErrorExit(sb1) char *sb1; { (void)fflush(stdout); fprintf(stderr, "%s: error: %s\n", sbProgName, sb1); exit(1); }