DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - metrics - download
Index: T d

⟦d1ad84ce1⟧ TextFile

    Length: 13145 (0x3359)
    Types: TextFile
    Names: »detex.l«

Derivation

└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
    └─⟦af57ea0c3⟧ »utils/detex-2.3.tar« 
        └─⟦this⟧ »./detex.l« 

TextFile

%{
#ifndef lint
static char	rcsid[] = "$Header: /usr/src/local/bin/detex/RCS/detex.l,v 2.8 1990/09/07 20:48:32 trinkle Exp trinkle $";
#endif

/*
 * detex [-e environment-list] [-c] [-l] [-n] [-s] [-w] [file[.tex]]
 *
 *	This program is used to remove TeX or LaTeX constructs from a text
 *	file.
 *
 * Written by:
 *	Daniel Trinkle
 *	Department of Computer Science
 *	Purdue University
 *
 */

#include "detex.h"
#include <strings.h>
#include <sys/param.h>

#define	LaBEGIN		if (fLatex) BEGIN
#define	CITEBEGIN	if (!fLatex || !fCite) BEGIN
#define	IGNORE		if (fSpace && !fWord) putchar(' ')
#define	SPACE		if (!fWord) putchar(' ')
#define	NEWLINE		if (!fWord) putchar('\n')

char	*malloc();

char	*rgsbEnvIgnore[MAXENVS];	/* list of environments ignored */
char	*rgsbIncList[MAXINCLIST];	/* list of includeonly files */
char	*rgsbInputPaths[MAXINPUTPATHS];	/* list of input paths in order */
char	sbCurrentEnv[CCHMAXENV];	/* current environment being ignored */
char	*sbProgName;			/* name we were invoked with */
FILE	*rgfp[NOFILE+1];		/* stack of input/include files */
int	cfp = 0;			/* count of files in stack */
int	csbEnvIgnore;			/* count of environments ignored */
int	csbIncList = 0;			/* count of includeonly files */
int	csbInputPaths;			/* count of input paths */
int	fLatex = 0;			/* flag to indicated delatex */
int	fWord = 0;			/* flag for -w option */
int	fFollow = 1;			/* flag to follow input/include */
int	fCite = 0;			/* flag to echo \cite and \ref args */
int	fSpace = 0;			/* flag to replace \cs with space */
%}

S	[ \t\n]*
W	[a-zA-Z]+

%Start Def Disp IncOnly Input Math Nrm Ctl
%Start LaBegin LaDisp LaEnd LaEnv LaForm LaInc LaMacro LaVerb

%%
<Nrm>"%".*		/* ignore comments */	;

<Nrm>"\\begin"{S}"{"{S}"document"{S}"}"		{fLatex = 1; IGNORE;}

<Nrm>"\\begin"     /* environment start */	{LaBEGIN LaBegin; IGNORE;}

<LaBegin>{S}"{"{S}"verbatim"{S}"}"		{   if (BeginEnv("verbatim"))
							BEGIN LaEnv;
						    else
							BEGIN LaVerb;
						    IGNORE;
						}

<LaVerb>"\\end"{S}"{"{S}"verbatim"{S}"}" /* verbatim mode */	{BEGIN Nrm; IGNORE;}
<LaVerb>.					ECHO;

<LaBegin>{W}					{   if (BeginEnv(yytext))
							BEGIN LaEnv;
						    else
							BEGIN LaMacro;
						    IGNORE;
						}
<LaBegin>"\n"					NEWLINE;
<LaBegin>.					;

<LaEnv>"\\end" /* absorb some environments */	{LaBEGIN LaEnd; IGNORE;}
<LaEnv>"\n"					NEWLINE;
<LaEnv>.					;

<LaEnd>{W}		/* end environment */	{   if (EndEnv(yytext))
							BEGIN Nrm;
						    IGNORE;
						}
<LaEnd>"}"					{BEGIN LaEnv; IGNORE;}
<LaEnd>"\n"					NEWLINE;
<LaEnd>.					;

<Nrm>"\\bibitem"	/* ignore args  */	{LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\bibliography"	/* of these \cs */	{LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\bibstyle"				{LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\cite"					{CITEBEGIN LaMacro; IGNORE;}
<Nrm>"\\documentstyle"				{LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\end"					{LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\index"					{LaBEGIN LaMacro; SPACE;}
<Nrm>"\\label"					{LaBEGIN LaMacro; IGNORE;}
<Nrm>"\\pageref"				{CITEBEGIN LaMacro; IGNORE;}
<Nrm>"\\ref"					{CITEBEGIN LaMacro; IGNORE;}
<LaMacro>"}"					BEGIN Nrm;
<LaMacro>"\n"					NEWLINE;
<LaMacro>.					;

<Nrm>"\\def"		/* ignore def begin */	{BEGIN Def; IGNORE;}
<Def>"{"					BEGIN Nrm;
<Def>"\n"					NEWLINE;
<Def>.						;

<Nrm>"\\("		/* formula mode */	{LaBEGIN LaForm; IGNORE;}
<LaForm>"\\)"					BEGIN Nrm;
<LaForm>"\n"					NEWLINE;
<LaForm>.					;

<Nrm>"\\["		/* display mode */	{LaBEGIN LaDisp; IGNORE;}
<LaDisp>"\\]"					BEGIN Nrm;
<LaDisp>"\n"					NEWLINE;
<LaDisp>.					;

<Nrm>"$$"		/* display mode */	{BEGIN Disp; IGNORE;}
<Disp>"$$"					BEGIN Nrm;
<Disp>"\n"					NEWLINE;
<Disp>.						;

<Nrm>"$"		/* math mode */		{BEGIN Math; IGNORE;}
<Math>"$"					BEGIN Nrm;
<Math>"\n"					NEWLINE;
<Math>"\\$"					;
<Math>.						;

<Nrm>"\\include"	/* process files */	{LaBEGIN LaInc; IGNORE;}
<LaInc>[^{ \t\n}]+				{   IncludeFile(yytext);
						    BEGIN Nrm;
						}
<LaInc>"\n"					NEWLINE;
<LaInc>.					;

<Nrm>"\\includeonly"				{BEGIN IncOnly; IGNORE;}
<IncOnly>[^{ \t,\n}]+				AddInclude(yytext);
<IncOnly>"}"					{   if (csbIncList == 0)
							rgsbIncList[csbIncList++] = NULL;
						    BEGIN Nrm;
						}
<IncOnly>"\n"					NEWLINE;
<IncOnly>.					;

<Nrm>"\\input"					{BEGIN Input; IGNORE;}
<Input>[^{ \t\n}]+				{   InputFile(yytext);
						    BEGIN Nrm;
						}
<Input>"\n"					NEWLINE;
<Input>.					;

<Nrm>\\[a-zA-Z@]+	/* ignore other \cs */	{BEGIN Ctl; IGNORE;}
<Nrm>"\\ "					SPACE;
<Nrm>\\.					IGNORE;
<Ctl>\\[a-zA-Z@]+				IGNORE;
<Ctl>[a-zA-Z@0-9]*[-'=`][^ \t\n{]*		IGNORE;
<Ctl>"\n"					{BEGIN Nrm; NEWLINE;}
<Ctl>[ \t{]*					{BEGIN Nrm; IGNORE;}
<Ctl>.						{yyless(0);BEGIN Nrm;}

<Nrm>[{}\\|]	/* special characters */	IGNORE;
<Nrm>[!?]"`"					IGNORE;
<Nrm>~						SPACE;

<Nrm>{W}[']*{W}					{   if (fWord)
							printf("%s\n", yytext);
						    else
							ECHO;
						}
<Nrm>[0-9]+					if (!fWord) ECHO;
<Nrm>(.|\n)					if (!fWord) ECHO;
%%
/******
** main --
**	Set sbProgName to the base of arg 0.
**	Set the input paths.
**	Check for options
**		-c		echo LaTeX \cite, \ref, and \pageref values
**		-e <env-list>	list of LaTeX environments to ignore
**		-l		force latex mode
**		-n		do not follow \input and \include
**		-w		word only output
**	Set the list of LaTeX environments to ignore.
**	Process each input file.
**	If no input files are specified on the command line, process stdin.
******/

main(cArgs,rgsbArgs)
int	cArgs;
char	*rgsbArgs[];
{
	char	*pch, *sbEnvList = DEFAULTENV, sbBadOpt[2];
	FILE	*TexOpen();
	int	fSawFile = 0, iArgs = 1;
	
	/* get base name and decide what we are doing, detex or delatex */
	if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
	    sbProgName++;
	else
	    sbProgName = rgsbArgs[0];
	
	/* set rgsbInputPaths for use with TexOpen() */
	SetInputPaths();

	/* process command line options */
	while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
		while (*++pch)
		    switch (*pch) {
		    case CHCITEOPT:
			fCite = 1;
			break;
		    case CHENVOPT:
			sbEnvList = rgsbArgs[++iArgs];
			break;
		    case CHLATEXOPT:
			fLatex = 1;
			break;
		    case CHNOFOLLOWOPT:
			fFollow = 0;
			break;
		    case CHSPACEOPT:
			fSpace = 1;
			break;
		    case CHWORDOPT:
			fWord = 1;
			break;
		    default:
			sbBadOpt[0] = *pch;
			sbBadOpt[1] = '\0';
			Warning("unknown option ignored -", sbBadOpt);
		    }
		iArgs++;
	}
	SetEnvIgnore(sbEnvList);

	/* process input files */
	for (; iArgs < cArgs; iArgs++) {
	    fSawFile++;
	    if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
		Warning("can't open file", rgsbArgs[iArgs]);
		continue;;
	    }
	    BEGIN Nrm;
	    (void)yylex();
	}

	/* if there were no input files, assume stdin */
	if (!fSawFile) {
	    yyin = stdin;
	    BEGIN Nrm;
	    (void)yylex();
	}
	if (YYSTATE != Nrm)
	    ErrorExit("input contains an unterminated mode or environment");
	exit(0);
}

/******
** yywrap -- handles EOF for lex.  Check to see if the stack of open files
**	has anything on it.  If it does, set yyin to the to value.  If not
**	return the termination signal for lex.
******/

yywrap()
{
	(void)fclose(yyin);
	if (cfp > 0) {
	    yyin = rgfp[--cfp];
	    return(0);
	}
	return(1);
}

/******
** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the
**	sbEnvList.
******/

SetEnvIgnore(sbEnvList)
char	*sbEnvList;
{
	csbEnvIgnore = SeparateList(sbEnvList, rgsbEnvIgnore, CHENVSEP, MAXENVS);
	if (csbEnvIgnore == ERROR)
	    ErrorExit("The environtment list contains too many environments");
}

/******
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore.  If it
**	is, sbCurrentEnv is set to sbEnv.
******/

BeginEnv(sbEnv)
char	*sbEnv;
{
	int	i;

	if (!fLatex) return(0);
	for (i = 0; i < csbEnvIgnore; i++)
	    if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
		(void)strcpy(sbCurrentEnv, sbEnv);
		return(1);
	    }
	return(0);
}

/******
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
******/

EndEnv(sbEnv)
char	*sbEnv;
{
	if (!fLatex) return(0);
	if (strcmp(sbEnv, sbCurrentEnv) == 0)
	    return(1);
	return(0);
}

/******
** InputFile -- push the current yyin and open sbFile.  If the open fails,
**	the sbFile is ignored.
******/

InputFile(sbFile)
char	*sbFile;
{
	FILE	*TexOpen();

	if (!fFollow)
	    return;
	rgfp[cfp++] = yyin;
	if ((yyin = TexOpen(sbFile)) == NULL) {
	    Warning("can't open \\input file", sbFile);
	    yyin = rgfp[--cfp];
	}
}

/******
** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin
**	and open sbFile.  If the open fails, the sbFile is ignored.
******/

IncludeFile(sbFile)
char	*sbFile;
{
	FILE	*TexOpen();

	if (!fFollow)
	    return;
	if (!InList(sbFile))
	    return;
	rgfp[cfp++] = yyin;
	if ((yyin = TexOpen(sbFile)) == NULL) {
	    Warning("can't open \\include file", sbFile);
	    yyin = rgfp[--cfp];
	}
}

/******
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
**	If the include list is too long, sbFile is ignored.
******/

AddInclude(sbFile)
char	*sbFile;
{
	if (!fFollow)
	    return;
	if (csbIncList >= MAXINCLIST)
	    Warning("\\includeonly list is too long, ignoring", sbFile);
	rgsbIncList[csbIncList] = malloc((unsigned)(strlen(sbFile) + 1));
	(void)strcpy(rgsbIncList[csbIncList++], sbFile);
}

/******
** InList -- checks to see if sbFile is in the rgsbIncList.  If there is
**	no list, all files are assumed to be "in the list".
******/

InList(sbFile)
char	*sbFile;
{
	char	*pch, sbBase[MAXPATHLEN];
	int	i;

	if (csbIncList == 0)	/* no list */
	    return(1);
	(void)strcpy(sbBase, sbFile);
	if ((pch = rindex(sbBase, '.')) != NULL)
	    *pch = '\0';
	i = 0;
	while ((i < csbIncList) && rgsbIncList[i])
	    if (strcmp(rgsbIncList[i++], sbBase) == 0)
	        return(1);
	return(0);
}

/******
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
**	TEXINPUTS environment variable if set or else DEFAULTINPUTS.
******/

SetInputPaths()
{
    char *sb, *sbPaths, *getenv();

    if ((sb = getenv("TEXINPUTS")) == NULL)
	sbPaths = DEFAULTINPUTS;
    else {
	sbPaths = malloc((unsigned)(strlen(sb) + 1));
	(void)strcpy(sbPaths, sb);
    }

    csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
    if (csbInputPaths == ERROR)
	ErrorExit("TEXINPUTS environment variable has too many paths");
}

/******
** SeparateList -- takes a chSep separated list sbList, replaces the
**	chSep's with NULLs and sets rgsbList[i] to the beginning of
**	the ith word in sbList.  The number of words is returned.  A
**	ERROR is returned if there are more than csbMax words.
******/

SeparateList(sbList, rgsbList, chSep, csbMax)
char	*sbList, *rgsbList[], chSep;
int	csbMax;
{
	int	csbList = 0;

	while (sbList && *sbList && csbList < csbMax) {
	    rgsbList[csbList++] = sbList;
	    if (sbList = index(sbList, chSep))
		*sbList++ = NULL;
	}
	return(sbList && *sbList ? ERROR : csbList);
}

/******
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
**	For each input path the following order is used:
**		file.tex - must be as named, if not there go to the next path
**		file.ext - random extension, try it
**		file     - base name, add .tex and try it
**		file     - try it as is
**	Notice that if file exists in the first path and file.tex exists in
**	one of the other paths, file in the first path is what is opened.
**	If the sbFile begins with a '/', no paths are searched.
******/

FILE *
TexOpen(sbFile)
char	*sbFile;
{
	char	*pch, *sbNew;
	FILE	*fp;
	int	iPath;
	static char	sbFullPath[MAXPATHLEN];

	for (iPath = 0; iPath < csbInputPaths; iPath++) {
	    if (*sbFile == '/') {	/* absolute path */
		(void)sprintf(sbFullPath, "%s", sbFile);
		iPath = csbInputPaths;	/* only check once */
	    } else
		(void)sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile);

	    /* If sbFile ends in .tex then it must be there */
	    if ((pch = rindex(sbFullPath, '.')) != NULL
			&& (strcmp(pch, ".tex") == 0))
		if ((fp = fopen(sbFullPath, "r")) != NULL)
		    return(fp);
		else
		    continue;

	    /* if .<ext> then try to open it.  the '.' represents   */
	    /* the beginning of an extension if it is not the first */
	    /* character and it does not follow a '.' or a '/'      */
	    if (pch != NULL && pch > &(sbFullPath[0])
                    && *(pch - 1) != '.' && *(pch - 1) != '/'
		    && (fp = fopen(sbFullPath, "r")) != NULL)
		return(fp);

	    /* just base name, add .tex to the name */
	    sbNew = malloc((unsigned)(strlen(sbFullPath) + 5));
	    (void)strcpy(sbNew, sbFullPath);
	    (void)strcat(sbNew, ".tex");
	    if ((fp = fopen(sbNew, "r")) != NULL)
		return(fp);

	    /* try sbFile regardless */
	    if ((fp = fopen(sbFullPath, "r")) != NULL)
		return(fp);
	}
	return((FILE *)NULL);
}

/******
** Warning -- print a warning message preceded by the program name.
******/

Warning(sb1, sb2)
char	*sb1, *sb2;
{
	fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2);
}

/******
** ErrorExit -- print an error message preceded by the program name.
**	Stdout is flushed and detex exits.
******/

ErrorExit(sb1)
char	*sb1;
{
	(void)fflush(stdout);
	fprintf(stderr, "%s: error: %s\n", sbProgName, sb1);
	exit(1);
}