|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T d
Length: 13145 (0x3359)
Types: TextFile
Names: »detex.l«
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
└─⟦af57ea0c3⟧ »utils/detex-2.3.tar«
└─⟦this⟧ »./detex.l«
%{
#ifndef lint
static char rcsid[] = "$Header: /usr/src/local/bin/detex/RCS/detex.l,v 2.8 1990/09/07 20:48:32 trinkle Exp trinkle $";
#endif
/*
* detex [-e environment-list] [-c] [-l] [-n] [-s] [-w] [file[.tex]]
*
* This program is used to remove TeX or LaTeX constructs from a text
* file.
*
* Written by:
* Daniel Trinkle
* Department of Computer Science
* Purdue University
*
*/
#include "detex.h"
#include <strings.h>
#include <sys/param.h>
#define LaBEGIN if (fLatex) BEGIN
#define CITEBEGIN if (!fLatex || !fCite) BEGIN
#define IGNORE if (fSpace && !fWord) putchar(' ')
#define SPACE if (!fWord) putchar(' ')
#define NEWLINE if (!fWord) putchar('\n')
char *malloc();
char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */
char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */
char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */
char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */
char *sbProgName; /* name we were invoked with */
FILE *rgfp[NOFILE+1]; /* stack of input/include files */
int cfp = 0; /* count of files in stack */
int csbEnvIgnore; /* count of environments ignored */
int csbIncList = 0; /* count of includeonly files */
int csbInputPaths; /* count of input paths */
int fLatex = 0; /* flag to indicated delatex */
int fWord = 0; /* flag for -w option */
int fFollow = 1; /* flag to follow input/include */
int fCite = 0; /* flag to echo \cite and \ref args */
int fSpace = 0; /* flag to replace \cs with space */
%}
S [ \t\n]*
W [a-zA-Z]+
%Start Def Disp IncOnly Input Math Nrm Ctl
%Start LaBegin LaDisp LaEnd LaEnv LaForm LaInc LaMacro LaVerb
%%
<Nrm>"%".* /* ignore comments */ ;
<Nrm>"\\begin"{S}"{"{S}"document"{S}"}" {fLatex = 1; IGNORE;}
<Nrm>"\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;}
<LaBegin>{S}"{"{S}"verbatim"{S}"}" { if (BeginEnv("verbatim"))
BEGIN LaEnv;
else
BEGIN LaVerb;
IGNORE;
}
<LaVerb>"\\end"{S}"{"{S}"verbatim"{S}"}" /* verbatim mode */ {BEGIN Nrm; IGNORE;}
<LaVerb>. ECHO;
<LaBegin>{W} { if (BeginEnv(yytext))
BEGIN LaEnv;
else
BEGIN LaMacro;
IGNORE;
}
<LaBegin>"\n" NEWLINE;
<LaBegin>. ;
<LaEnv>"\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;}
<LaEnv>"\n" NEWLINE;
<LaEnv>. ;
<LaEnd>{W} /* end environment */ { if (EndEnv(yytext))
BEGIN Nrm;
IGNORE;
}
<LaEnd>"}" {BEGIN LaEnv; IGNORE;}
<LaEnd>"\n" NEWLINE;
<LaEnd>. ;
<Nrm>"\\bibitem" /* ignore args */ {LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\bibliography" /* of these \cs */ {LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\bibstyle" {LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\cite" {CITEBEGIN LaMacro; IGNORE;}
<Nrm>"\\documentstyle" {LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\end" {LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\index" {LaBEGIN LaMacro; SPACE;}
<Nrm>"\\label" {LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\pageref" {CITEBEGIN LaMacro; IGNORE;}
<Nrm>"\\ref" {CITEBEGIN LaMacro; IGNORE;}
<LaMacro>"}" BEGIN Nrm;
<LaMacro>"\n" NEWLINE;
<LaMacro>. ;
<Nrm>"\\def" /* ignore def begin */ {BEGIN Def; IGNORE;}
<Def>"{" BEGIN Nrm;
<Def>"\n" NEWLINE;
<Def>. ;
<Nrm>"\\(" /* formula mode */ {LaBEGIN LaForm; IGNORE;}
<LaForm>"\\)" BEGIN Nrm;
<LaForm>"\n" NEWLINE;
<LaForm>. ;
<Nrm>"\\[" /* display mode */ {LaBEGIN LaDisp; IGNORE;}
<LaDisp>"\\]" BEGIN Nrm;
<LaDisp>"\n" NEWLINE;
<LaDisp>. ;
<Nrm>"$$" /* display mode */ {BEGIN Disp; IGNORE;}
<Disp>"$$" BEGIN Nrm;
<Disp>"\n" NEWLINE;
<Disp>. ;
<Nrm>"$" /* math mode */ {BEGIN Math; IGNORE;}
<Math>"$" BEGIN Nrm;
<Math>"\n" NEWLINE;
<Math>"\\$" ;
<Math>. ;
<Nrm>"\\include" /* process files */ {LaBEGIN LaInc; IGNORE;}
<LaInc>[^{ \t\n}]+ { IncludeFile(yytext);
BEGIN Nrm;
}
<LaInc>"\n" NEWLINE;
<LaInc>. ;
<Nrm>"\\includeonly" {BEGIN IncOnly; IGNORE;}
<IncOnly>[^{ \t,\n}]+ AddInclude(yytext);
<IncOnly>"}" { if (csbIncList == 0)
rgsbIncList[csbIncList++] = NULL;
BEGIN Nrm;
}
<IncOnly>"\n" NEWLINE;
<IncOnly>. ;
<Nrm>"\\input" {BEGIN Input; IGNORE;}
<Input>[^{ \t\n}]+ { InputFile(yytext);
BEGIN Nrm;
}
<Input>"\n" NEWLINE;
<Input>. ;
<Nrm>\\[a-zA-Z@]+ /* ignore other \cs */ {BEGIN Ctl; IGNORE;}
<Nrm>"\\ " SPACE;
<Nrm>\\. IGNORE;
<Ctl>\\[a-zA-Z@]+ IGNORE;
<Ctl>[a-zA-Z@0-9]*[-'=`][^ \t\n{]* IGNORE;
<Ctl>"\n" {BEGIN Nrm; NEWLINE;}
<Ctl>[ \t{]* {BEGIN Nrm; IGNORE;}
<Ctl>. {yyless(0);BEGIN Nrm;}
<Nrm>[{}\\|] /* special characters */ IGNORE;
<Nrm>[!?]"`" IGNORE;
<Nrm>~ SPACE;
<Nrm>{W}[']*{W} { if (fWord)
printf("%s\n", yytext);
else
ECHO;
}
<Nrm>[0-9]+ if (!fWord) ECHO;
<Nrm>(.|\n) if (!fWord) ECHO;
%%
/******
** main --
** Set sbProgName to the base of arg 0.
** Set the input paths.
** Check for options
** -c echo LaTeX \cite, \ref, and \pageref values
** -e <env-list> list of LaTeX environments to ignore
** -l force latex mode
** -n do not follow \input and \include
** -w word only output
** Set the list of LaTeX environments to ignore.
** Process each input file.
** If no input files are specified on the command line, process stdin.
******/
main(cArgs,rgsbArgs)
int cArgs;
char *rgsbArgs[];
{
char *pch, *sbEnvList = DEFAULTENV, sbBadOpt[2];
FILE *TexOpen();
int fSawFile = 0, iArgs = 1;
/* get base name and decide what we are doing, detex or delatex */
if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
sbProgName++;
else
sbProgName = rgsbArgs[0];
/* set rgsbInputPaths for use with TexOpen() */
SetInputPaths();
/* process command line options */
while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
while (*++pch)
switch (*pch) {
case CHCITEOPT:
fCite = 1;
break;
case CHENVOPT:
sbEnvList = rgsbArgs[++iArgs];
break;
case CHLATEXOPT:
fLatex = 1;
break;
case CHNOFOLLOWOPT:
fFollow = 0;
break;
case CHSPACEOPT:
fSpace = 1;
break;
case CHWORDOPT:
fWord = 1;
break;
default:
sbBadOpt[0] = *pch;
sbBadOpt[1] = '\0';
Warning("unknown option ignored -", sbBadOpt);
}
iArgs++;
}
SetEnvIgnore(sbEnvList);
/* process input files */
for (; iArgs < cArgs; iArgs++) {
fSawFile++;
if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
Warning("can't open file", rgsbArgs[iArgs]);
continue;;
}
BEGIN Nrm;
(void)yylex();
}
/* if there were no input files, assume stdin */
if (!fSawFile) {
yyin = stdin;
BEGIN Nrm;
(void)yylex();
}
if (YYSTATE != Nrm)
ErrorExit("input contains an unterminated mode or environment");
exit(0);
}
/******
** yywrap -- handles EOF for lex. Check to see if the stack of open files
** has anything on it. If it does, set yyin to the to value. If not
** return the termination signal for lex.
******/
yywrap()
{
(void)fclose(yyin);
if (cfp > 0) {
yyin = rgfp[--cfp];
return(0);
}
return(1);
}
/******
** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the
** sbEnvList.
******/
SetEnvIgnore(sbEnvList)
char *sbEnvList;
{
csbEnvIgnore = SeparateList(sbEnvList, rgsbEnvIgnore, CHENVSEP, MAXENVS);
if (csbEnvIgnore == ERROR)
ErrorExit("The environtment list contains too many environments");
}
/******
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it
** is, sbCurrentEnv is set to sbEnv.
******/
BeginEnv(sbEnv)
char *sbEnv;
{
int i;
if (!fLatex) return(0);
for (i = 0; i < csbEnvIgnore; i++)
if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
(void)strcpy(sbCurrentEnv, sbEnv);
return(1);
}
return(0);
}
/******
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
******/
EndEnv(sbEnv)
char *sbEnv;
{
if (!fLatex) return(0);
if (strcmp(sbEnv, sbCurrentEnv) == 0)
return(1);
return(0);
}
/******
** InputFile -- push the current yyin and open sbFile. If the open fails,
** the sbFile is ignored.
******/
InputFile(sbFile)
char *sbFile;
{
FILE *TexOpen();
if (!fFollow)
return;
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile)) == NULL) {
Warning("can't open \\input file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin
** and open sbFile. If the open fails, the sbFile is ignored.
******/
IncludeFile(sbFile)
char *sbFile;
{
FILE *TexOpen();
if (!fFollow)
return;
if (!InList(sbFile))
return;
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile)) == NULL) {
Warning("can't open \\include file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
** If the include list is too long, sbFile is ignored.
******/
AddInclude(sbFile)
char *sbFile;
{
if (!fFollow)
return;
if (csbIncList >= MAXINCLIST)
Warning("\\includeonly list is too long, ignoring", sbFile);
rgsbIncList[csbIncList] = malloc((unsigned)(strlen(sbFile) + 1));
(void)strcpy(rgsbIncList[csbIncList++], sbFile);
}
/******
** InList -- checks to see if sbFile is in the rgsbIncList. If there is
** no list, all files are assumed to be "in the list".
******/
InList(sbFile)
char *sbFile;
{
char *pch, sbBase[MAXPATHLEN];
int i;
if (csbIncList == 0) /* no list */
return(1);
(void)strcpy(sbBase, sbFile);
if ((pch = rindex(sbBase, '.')) != NULL)
*pch = '\0';
i = 0;
while ((i < csbIncList) && rgsbIncList[i])
if (strcmp(rgsbIncList[i++], sbBase) == 0)
return(1);
return(0);
}
/******
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
** TEXINPUTS environment variable if set or else DEFAULTINPUTS.
******/
SetInputPaths()
{
char *sb, *sbPaths, *getenv();
if ((sb = getenv("TEXINPUTS")) == NULL)
sbPaths = DEFAULTINPUTS;
else {
sbPaths = malloc((unsigned)(strlen(sb) + 1));
(void)strcpy(sbPaths, sb);
}
csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
if (csbInputPaths == ERROR)
ErrorExit("TEXINPUTS environment variable has too many paths");
}
/******
** SeparateList -- takes a chSep separated list sbList, replaces the
** chSep's with NULLs and sets rgsbList[i] to the beginning of
** the ith word in sbList. The number of words is returned. A
** ERROR is returned if there are more than csbMax words.
******/
SeparateList(sbList, rgsbList, chSep, csbMax)
char *sbList, *rgsbList[], chSep;
int csbMax;
{
int csbList = 0;
while (sbList && *sbList && csbList < csbMax) {
rgsbList[csbList++] = sbList;
if (sbList = index(sbList, chSep))
*sbList++ = NULL;
}
return(sbList && *sbList ? ERROR : csbList);
}
/******
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
** For each input path the following order is used:
** file.tex - must be as named, if not there go to the next path
** file.ext - random extension, try it
** file - base name, add .tex and try it
** file - try it as is
** Notice that if file exists in the first path and file.tex exists in
** one of the other paths, file in the first path is what is opened.
** If the sbFile begins with a '/', no paths are searched.
******/
FILE *
TexOpen(sbFile)
char *sbFile;
{
char *pch, *sbNew;
FILE *fp;
int iPath;
static char sbFullPath[MAXPATHLEN];
for (iPath = 0; iPath < csbInputPaths; iPath++) {
if (*sbFile == '/') { /* absolute path */
(void)sprintf(sbFullPath, "%s", sbFile);
iPath = csbInputPaths; /* only check once */
} else
(void)sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile);
/* If sbFile ends in .tex then it must be there */
if ((pch = rindex(sbFullPath, '.')) != NULL
&& (strcmp(pch, ".tex") == 0))
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
else
continue;
/* if .<ext> then try to open it. the '.' represents */
/* the beginning of an extension if it is not the first */
/* character and it does not follow a '.' or a '/' */
if (pch != NULL && pch > &(sbFullPath[0])
&& *(pch - 1) != '.' && *(pch - 1) != '/'
&& (fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
/* just base name, add .tex to the name */
sbNew = malloc((unsigned)(strlen(sbFullPath) + 5));
(void)strcpy(sbNew, sbFullPath);
(void)strcat(sbNew, ".tex");
if ((fp = fopen(sbNew, "r")) != NULL)
return(fp);
/* try sbFile regardless */
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
}
return((FILE *)NULL);
}
/******
** Warning -- print a warning message preceded by the program name.
******/
Warning(sb1, sb2)
char *sb1, *sb2;
{
fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2);
}
/******
** ErrorExit -- print an error message preceded by the program name.
** Stdout is flushed and detex exits.
******/
ErrorExit(sb1)
char *sb1;
{
(void)fflush(stdout);
fprintf(stderr, "%s: error: %s\n", sbProgName, sb1);
exit(1);
}