|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T d
Length: 11857 (0x2e51)
Types: TextFile
Names: »detex.l«
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
└─⟦c319c2751⟧ »unix3.0/TeX3.0.tar.Z«
└─⟦036c765ac⟧
└─⟦this⟧ »TeX3.0/TeXcontrib/trinkle/detex.l«
└─⟦060c9c824⟧ Bits:30007080 DKUUG TeX 2/12/89
└─⟦this⟧ »./tex82/TeXcontrib/trinkle/detex.l«
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
└─⟦63303ae94⟧ »unix3.14/TeX3.14.tar.Z«
└─⟦c58930e5c⟧
└─⟦this⟧ »TeX3.14/TeXcontrib/trinkle/detex.l«
%{
static char rcsid[] = "$Header: /u1/src/local/bin/tex/tools/detex/RCS/detex.l,v 2.3 86/10/23 16:29:27 trinkle Exp $";
/*
* detex [-w] [-l] [-e environment-list] [file[.tex]]
*
* This program is used to remove TeX or LaTeX constructs from a text
* file.
*
* Written by:
* Daniel Trinkle
* Department of Computer Science
* Purdue University
*
*/
#include "detex.h"
#include <strings.h>
#include <sys/param.h>
#define LaBEGIN if (fLatex) BEGIN
#define IGNORE if (!fWord) putchar(' ')
#define NEWLINE if (!fWord) putchar('\n')
char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */
char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */
char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */
char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */
char *sbProgName; /* name we were invoked with */
FILE *rgfp[NOFILE+1]; /* stack of input/include files */
int cfp = 0; /* count of files in stack */
int csbEnvIgnore; /* count of environments ignored */
int csbIncList = 0; /* count of includeonly files */
int csbInputPaths; /* count of input paths */
int fLatex = 0; /* flag to indicated delatex */
int fWord = 0; /* flag for -w option */
%}
S [ \t\n]*
W [a-zA-Z]+
%Start Def Display IncOnly Input Math Normal
%Start LaBegin LaDisplay LaEnd LaEnv LaForm LaInc LaMacro
%%
<Normal>"%".* /* ignore comments */ ;
<Normal>"\\begin"{S}"{"{S}"document"{S}"}" {fLatex = 1; IGNORE;}
<Normal>"\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;}
<LaBegin>{W} { if (BeginEnv(yytext))
BEGIN LaEnv;
else
BEGIN LaMacro;
IGNORE;
}
<LaBegin>"\n" NEWLINE;
<LaBegin>. ;
<LaEnv>"\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;}
<LaEnv>"\n" NEWLINE;
<LaEnv>. ;
<LaEnd>{W} /* end environment */ { if (EndEnv(yytext))
BEGIN Normal;
IGNORE;
}
<LaEnd>"}" {BEGIN LaEnv; IGNORE;}
<LaEnd>"\n" NEWLINE;
<LaEnd>. ;
<Normal>"\\bibitem" /* ignore args */ {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\bibliography" /* of these \cs */ {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\bibstyle" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\cite" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\documentstyle" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\end" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\label" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\pageref" {LaBEGIN LaMacro; IGNORE;}
<Normal>"\\ref" {LaBEGIN LaMacro; IGNORE;}
<LaMacro>"}" BEGIN Normal;
<LaMacro>"\n" NEWLINE;
<LaMacro>. ;
<Normal>"\\def" /* ignore def begin */ {BEGIN Def; IGNORE;}
<Def>"{" BEGIN Normal;
<Def>"\n" NEWLINE;
<Def>. ;
<Normal>"\\(" /* formula mode */ {LaBEGIN LaForm; IGNORE;}
<LaForm>"\\)" BEGIN Normal;
<LaForm>"\n" NEWLINE;
<LaForm>. ;
<Normal>"\\[" /* display mode */ {LaBEGIN LaDisplay; IGNORE;}
<LaDisplay>"\\]" BEGIN Normal;
<LaDisplay>"\n" NEWLINE;
<LaDisplay>. ;
<Normal>"$$" /* display mode */ {BEGIN Display; IGNORE;}
<Display>"$$" BEGIN Normal;
<Display>"\n" NEWLINE;
<Display>. ;
<Normal>"$" /* math mode */ {BEGIN Math; IGNORE;}
<Math>"$" BEGIN Normal;
<Math>"\n" NEWLINE;
<Math>. ;
<Normal>"\\include" /* process files */ {LaBEGIN LaInc; IGNORE;}
<LaInc>[^{ \t\n}]+ { IncludeFile(yytext);
BEGIN Normal;
}
<LaInc>"\n" NEWLINE;
<LaInc>. ;
<Normal>"\\includeonly" {BEGIN IncOnly; IGNORE;}
<IncOnly>[^{ \t,\n}]+ AddInclude(yytext);
<IncOnly>"}" { if (csbIncList == 0)
rgsbIncList[csbIncList++] = NULL;
BEGIN Normal;
}
<IncOnly>"\n" NEWLINE;
<IncOnly>. ;
<Normal>"\\input" {BEGIN Input; IGNORE;}
<Input>[^{ \t\n}]+ { InputFile(yytext);
BEGIN Normal;
}
<Input>"\n" NEWLINE;
<Input>. ;
<Normal>\\[a-zA-Z@]+ /* ignore other \cs */ IGNORE;
<Normal>\\. IGNORE;
<Normal>\\[a-zA-Z@][a-zA-Z@0-9]*['=`][^ \t\n]* IGNORE;
<Normal>[{}\\|~] /* special characters */ IGNORE;
<Normal>[!?]"`" IGNORE;
<Normal>{W}[']*{W} { if (fWord)
printf("%s\n", yytext);
else
ECHO;
}
<Normal>[0-9]+ if (!fWord) ECHO;
<Normal>(.|\n) if (!fWord) ECHO;
%%
/******
** main --
** Set sbProgName to the base of arg 0.
** Set the input paths.
** Check for options
** -w word only output
** -l force latex mode
** -e <env-list> list of LaTeX environments to ignore
** Set the list of LaTeX environments to ignore.
** Process each input file.
** If no input files are specified on the command line, process stdin.
******/
main(cArgs,rgsbArgs)
int cArgs;
char *rgsbArgs[];
{
char *pch, *sbEnvList = DEFAULTENV, sbBadOpt[2];
FILE *TexOpen();
int fSawFile = 0, iArgs = 1;
/* get base name and decide what we are doing, detex or delatex */
if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
sbProgName++;
else
sbProgName = rgsbArgs[0];
/* set rgsbInputPaths for use with TexOpen() */
SetInputPaths();
/* process command line options */
while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
while (*++pch)
switch (*pch) {
case CHENVOPT:
sbEnvList = rgsbArgs[++iArgs];
break;
case CHLATEXOPT:
fLatex = 1;
break;
case CHWORDOPT:
fWord = 1;
break;
default:
sbBadOpt[0] = *pch;
sbBadOpt[1] = '\0';
Warning("unknown option ignored -", sbBadOpt);
}
iArgs++;
}
SetEnvIgnore(sbEnvList);
/* process input files */
for (; iArgs < cArgs; iArgs++) {
fSawFile++;
if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
Warning("can't open file", rgsbArgs[iArgs]);
continue;;
}
BEGIN Normal;
yylex();
}
/* if there were no input files, assume stdin */
if (!fSawFile) {
yyin = stdin;
BEGIN Normal;
yylex();
}
if (YYSTATE != Normal)
ErrorExit("input contains an unterminated mode or environment");
exit(0);
}
/******
** yywrap -- handles EOF for lex. Check to see if the stack of open files
** has anything on it. If it does, set yyin to the to value. If not
** return the termination signal for lex.
******/
yywrap()
{
fclose(yyin);
if (cfp > 0) {
yyin = rgfp[--cfp];
return(0);
}
return(1);
}
/******
** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the
** sbEnvList.
******/
SetEnvIgnore(sbEnvList)
char *sbEnvList;
{
csbEnvIgnore = SeparateList(sbEnvList, rgsbEnvIgnore, CHENVSEP, MAXENVS);
if (csbEnvIgnore == ERROR)
ErrorExit("The environtment list contains too many environments");
}
/******
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it
** is, sbCurrentEnv is set to sbEnv.
******/
BeginEnv(sbEnv)
char *sbEnv;
{
int i;
if (!fLatex) return(0);
for (i = 0; i < csbEnvIgnore; i++)
if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
strcpy(sbCurrentEnv, sbEnv);
return(1);
}
return(0);
}
/******
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
******/
EndEnv(sbEnv)
char *sbEnv;
{
char *sb;
if (!fLatex) return(0);
if (strcmp(sbEnv, sbCurrentEnv) == 0)
return(1);
return(0);
}
/******
** InputFile -- push the current yyin and open sbFile. If the open fails,
** the sbFile is ignored.
******/
InputFile(sbFile)
char *sbFile;
{
FILE *TexOpen();
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile)) == NULL) {
Warning("can't open \\input file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin
** and open sbFile. If the open fails, the sbFile is ignored.
******/
IncludeFile(sbFile)
char *sbFile;
{
FILE *TexOpen();
if (!InList(sbFile))
return(0);
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile, "r")) == NULL) {
Warning("can't open \\include file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
** If the include list is too long, sbFile is ignored.
******/
AddInclude(sbFile)
char *sbFile;
{
if (csbIncList >= MAXINCLIST)
Warning("\\includeonly list is too long, ignoring", sbFile);
rgsbIncList[csbIncList] = (char *)malloc(strlen(sbFile) + 1);
strcpy(rgsbIncList[csbIncList++], sbFile);
}
/******
** InList -- checks to see if sbFile is in the rgsbIncList. If there is
** no list, all files are assumed to be "in the list".
******/
InList(sbFile)
char *sbFile;
{
char *pch, sbBase[MAXPATHLEN];
int i;
if (csbIncList == 0) /* no list */
return(1);
strcpy(sbBase, sbFile);
if ((pch = rindex(sbBase, '.')) != NULL)
*pch = '\0';
i = 0;
while ((i < csbIncList) && rgsbIncList[i])
if (strcmp(rgsbIncList[i++], sbBase) == 0)
return(1);
return(0);
}
/******
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
** TEXINPUTS environment variable if set or else DEFAULTINPUTS.
******/
SetInputPaths()
{
char *sb, *sbPaths, *getenv();
if ((sb = getenv("TEXINPUTS")) == NULL)
sbPaths = DEFAULTINPUTS;
else {
sbPaths = (char *)malloc(strlen(sb) + 1);
strcpy(sbPaths, sb);
}
csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
if (csbInputPaths == ERROR)
ErrorExit("TEXINPUTS environment variable has too many paths");
}
/******
** SeparateList -- takes a chSep separated list sbList, replaces the
** chSep's with NULLs and sets rgsbList[i] to the beginning of
** the ith word in sbList. The number of words is returned. A
** ERROR is returned if there are more than csbMax words.
******/
SeparateList(sbList, rgsbList, chSep, csbMax)
char *sbList, *rgsbList[], chSep;
int csbMax;
{
char *pch;
int csbList = 0;
while (sbList && *sbList && csbList < csbMax) {
rgsbList[csbList++] = sbList;
if (sbList = index(sbList, chSep))
*sbList++ = NULL;
}
return(sbList && *sbList ? ERROR : csbList);
}
/******
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
** For each input path the following order is used:
** file.tex - must be as named, if not there go to the next path
** file.ext - random extension, try it
** file - base name, add .tex and try it
** file - try it as is
** Notice that if file exists in the first path and file.tex exists in
** one of the other paths, file in the first path is what is opened.
** If the sbFile begins with a '/', no paths are searched.
******/
FILE *
TexOpen(sbFile)
char *sbFile;
{
char *pch, *sbNew;
FILE *fp;
int iPath;
static char sbFullPath[MAXPATHLEN];
for (iPath = 0; iPath < csbInputPaths; iPath++) {
if (*sbFile == '/') { /* absolute path */
sprintf(sbFullPath, "%s", sbFile);
iPath = csbInputPaths; /* only check once */
} else
sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile);
/* If sbFile ends in .tex then it must be there */
if ((pch = rindex(sbFullPath, '.')) != NULL
&& (strcmp(pch, ".tex") == 0))
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
else
continue;
/* if .<ext> then try to open it */
if (pch != NULL && (fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
/* just base name, add .tex to the name */
sbNew = (char *)malloc(strlen(sbFullPath) + 5);
strcpy(sbNew, sbFullPath);
strcat(sbNew, ".tex");
if ((fp = fopen(sbNew, "r")) != NULL)
return(fp);
/* try sbFile regardless */
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
}
return((FILE *)NULL);
}
/******
** Warning -- print a warning message preceded by the program name.
******/
Warning(sb1, sb2)
char *sb1, *sb2;
{
fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2);
}
/******
** ErrorExit -- print an error message preceded by the program name.
** Stdout is flushed and detex exits.
******/
ErrorExit(sb1)
char *sb1;
{
(void)fflush(stdout);
fprintf(stderr, "%s: error: %s\n", sbProgName, sb1);
exit(1);
}