DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - metrics - download
Index: T d

⟦5565cb16e⟧ TextFile

    Length: 11857 (0x2e51)
    Types: TextFile
    Names: »detex.l«

Derivation

└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
    └─⟦c319c2751⟧ »unix3.0/TeX3.0.tar.Z« 
        └─⟦036c765ac⟧ 
            └─⟦this⟧ »TeX3.0/TeXcontrib/trinkle/detex.l« 
└─⟦060c9c824⟧ Bits:30007080 DKUUG TeX 2/12/89
    └─⟦this⟧ »./tex82/TeXcontrib/trinkle/detex.l« 
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
    └─⟦63303ae94⟧ »unix3.14/TeX3.14.tar.Z« 
        └─⟦c58930e5c⟧ 
            └─⟦this⟧ »TeX3.14/TeXcontrib/trinkle/detex.l« 

TextFile

%{
static char	rcsid[] = "$Header: /u1/src/local/bin/tex/tools/detex/RCS/detex.l,v 2.3 86/10/23 16:29:27 trinkle Exp $";

/*
 * detex [-w] [-l] [-e environment-list] [file[.tex]]
 *
 *	This program is used to remove TeX or LaTeX constructs from a text
 *	file.
 *
 * Written by:
 *	Daniel Trinkle
 *	Department of Computer Science
 *	Purdue University
 *
 */

#include "detex.h"
#include <strings.h>
#include <sys/param.h>

#define	LaBEGIN		if (fLatex) BEGIN
#define	IGNORE		if (!fWord) putchar(' ')
#define	NEWLINE		if (!fWord) putchar('\n')

char	*rgsbEnvIgnore[MAXENVS];	/* list of environments ignored */
char	*rgsbIncList[MAXINCLIST];	/* list of includeonly files */
char	*rgsbInputPaths[MAXINPUTPATHS];	/* list of input paths in order */
char	sbCurrentEnv[CCHMAXENV];	/* current environment being ignored */
char	*sbProgName;			/* name we were invoked with */
FILE	*rgfp[NOFILE+1];		/* stack of input/include files */
int	cfp = 0;			/* count of files in stack */
int	csbEnvIgnore;			/* count of environments ignored */
int	csbIncList = 0;			/* count of includeonly files */
int	csbInputPaths;			/* count of input paths */
int	fLatex = 0;			/* flag to indicated delatex */
int	fWord = 0;			/* flag for -w option */
%}

S	[ \t\n]*
W	[a-zA-Z]+

%Start Def Display IncOnly Input Math Normal
%Start LaBegin LaDisplay LaEnd LaEnv LaForm LaInc LaMacro

%%
<Normal>"%".*		/* ignore comments */	;

<Normal>"\\begin"{S}"{"{S}"document"{S}"}"	{fLatex = 1; IGNORE;}

<Normal>"\\begin"     /* environment start */	{LaBEGIN LaBegin; IGNORE;}
<LaBegin>{W}					{   if (BeginEnv(yytext))
							BEGIN LaEnv;
						    else
							BEGIN LaMacro;
						    IGNORE;
						}
<LaBegin>"\n"					NEWLINE;
<LaBegin>.					;

<LaEnv>"\\end" /* absorb some environments */	{LaBEGIN LaEnd; IGNORE;}
<LaEnv>"\n"					NEWLINE;
<LaEnv>.					;

<LaEnd>{W}		/* end environment */	{   if (EndEnv(yytext))
							BEGIN Normal;
						    IGNORE;
						}
<LaEnd>"}"					{BEGIN LaEnv; IGNORE;}
<LaEnd>"\n"					NEWLINE;
<LaEnd>.					;

<Normal>"\\bibitem"	   /* ignore args  */	{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\bibliography"   /* of these \cs */	{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\bibstyle"				{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\cite"				{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\documentstyle"			{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\end"					{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\label"				{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\pageref"				{LaBEGIN LaMacro; IGNORE;}
<Normal>"\\ref"					{LaBEGIN LaMacro; IGNORE;}
<LaMacro>"}"					BEGIN Normal;
<LaMacro>"\n"					NEWLINE;
<LaMacro>.					;

<Normal>"\\def"		/* ignore def begin */	{BEGIN Def; IGNORE;}
<Def>"{"					BEGIN Normal;
<Def>"\n"					NEWLINE;
<Def>.						;

<Normal>"\\("		/* formula mode */	{LaBEGIN LaForm; IGNORE;}
<LaForm>"\\)"					BEGIN Normal;
<LaForm>"\n"					NEWLINE;
<LaForm>.					;

<Normal>"\\["		/* display mode */	{LaBEGIN LaDisplay; IGNORE;}
<LaDisplay>"\\]"				BEGIN Normal;
<LaDisplay>"\n"					NEWLINE;
<LaDisplay>.					;

<Normal>"$$"		/* display mode */	{BEGIN Display; IGNORE;}
<Display>"$$"					BEGIN Normal;
<Display>"\n"					NEWLINE;
<Display>.					;

<Normal>"$"		/* math mode */		{BEGIN Math; IGNORE;}
<Math>"$"					BEGIN Normal;
<Math>"\n"					NEWLINE;
<Math>.						;

<Normal>"\\include"	/* process files */	{LaBEGIN LaInc; IGNORE;}
<LaInc>[^{ \t\n}]+				{   IncludeFile(yytext);
						    BEGIN Normal;
						}
<LaInc>"\n"					NEWLINE;
<LaInc>.					;

<Normal>"\\includeonly"				{BEGIN IncOnly; IGNORE;}
<IncOnly>[^{ \t,\n}]+				AddInclude(yytext);
<IncOnly>"}"					{   if (csbIncList == 0)
							rgsbIncList[csbIncList++] = NULL;
						    BEGIN Normal;
						}
<IncOnly>"\n"					NEWLINE;
<IncOnly>.					;

<Normal>"\\input"				{BEGIN Input; IGNORE;}
<Input>[^{ \t\n}]+				{   InputFile(yytext);
						    BEGIN Normal;
						}
<Input>"\n"					NEWLINE;
<Input>.					;

<Normal>\\[a-zA-Z@]+	/* ignore other \cs */	IGNORE;
<Normal>\\.					IGNORE;
<Normal>\\[a-zA-Z@][a-zA-Z@0-9]*['=`][^ \t\n]*	IGNORE;

<Normal>[{}\\|~]      /* special characters */	IGNORE;
<Normal>[!?]"`"					IGNORE;

<Normal>{W}[']*{W}				{   if (fWord)
							printf("%s\n", yytext);
						    else
							ECHO;
						}
<Normal>[0-9]+					if (!fWord) ECHO;
<Normal>(.|\n)					if (!fWord) ECHO;
%%
/******
** main --
**	Set sbProgName to the base of arg 0.
**	Set the input paths.
**	Check for options
**		-w		word only output
**		-l		force latex mode
**		-e <env-list>	list of LaTeX environments to ignore
**	Set the list of LaTeX environments to ignore.
**	Process each input file.
**	If no input files are specified on the command line, process stdin.
******/

main(cArgs,rgsbArgs)
int	cArgs;
char	*rgsbArgs[];
{
	char	*pch, *sbEnvList = DEFAULTENV, sbBadOpt[2];
	FILE	*TexOpen();
	int	fSawFile = 0, iArgs = 1;
	
	/* get base name and decide what we are doing, detex or delatex */
	if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
	    sbProgName++;
	else
	    sbProgName = rgsbArgs[0];
	
	/* set rgsbInputPaths for use with TexOpen() */
	SetInputPaths();

	/* process command line options */
	while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
		while (*++pch)
		    switch (*pch) {
		    case CHENVOPT:
			sbEnvList = rgsbArgs[++iArgs];
			break;
		    case CHLATEXOPT:
			fLatex = 1;
			break;
		    case CHWORDOPT:
			fWord = 1;
			break;
		    default:
			sbBadOpt[0] = *pch;
			sbBadOpt[1] = '\0';
			Warning("unknown option ignored -", sbBadOpt);
		    }
		iArgs++;
	}
	SetEnvIgnore(sbEnvList);

	/* process input files */
	for (; iArgs < cArgs; iArgs++) {
	    fSawFile++;
	    if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
		Warning("can't open file", rgsbArgs[iArgs]);
		continue;;
	    }
	    BEGIN Normal;
	    yylex();
	}

	/* if there were no input files, assume stdin */
	if (!fSawFile) {
	    yyin = stdin;
	    BEGIN Normal;
	    yylex();
	}
	if (YYSTATE != Normal)
	    ErrorExit("input contains an unterminated mode or environment");
	exit(0);
}

/******
** yywrap -- handles EOF for lex.  Check to see if the stack of open files
**	has anything on it.  If it does, set yyin to the to value.  If not
**	return the termination signal for lex.
******/

yywrap()
{
	fclose(yyin);
	if (cfp > 0) {
	    yyin = rgfp[--cfp];
	    return(0);
	}
	return(1);
}

/******
** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the
**	sbEnvList.
******/

SetEnvIgnore(sbEnvList)
char	*sbEnvList;
{
	csbEnvIgnore = SeparateList(sbEnvList, rgsbEnvIgnore, CHENVSEP, MAXENVS);
	if (csbEnvIgnore == ERROR)
	    ErrorExit("The environtment list contains too many environments");
}

/******
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore.  If it
**	is, sbCurrentEnv is set to sbEnv.
******/

BeginEnv(sbEnv)
char	*sbEnv;
{
	int	i;

	if (!fLatex) return(0);
	for (i = 0; i < csbEnvIgnore; i++)
	    if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
		strcpy(sbCurrentEnv, sbEnv);
		return(1);
	    }
	return(0);
}

/******
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
******/

EndEnv(sbEnv)
char	*sbEnv;
{
	char	*sb;

	if (!fLatex) return(0);
	if (strcmp(sbEnv, sbCurrentEnv) == 0)
	    return(1);
	return(0);
}

/******
** InputFile -- push the current yyin and open sbFile.  If the open fails,
**	the sbFile is ignored.
******/

InputFile(sbFile)
char	*sbFile;
{
	FILE	*TexOpen();

	rgfp[cfp++] = yyin;
	if ((yyin = TexOpen(sbFile)) == NULL) {
	    Warning("can't open \\input file", sbFile);
	    yyin = rgfp[--cfp];
	}
}

/******
** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin
**	and open sbFile.  If the open fails, the sbFile is ignored.
******/

IncludeFile(sbFile)
char	*sbFile;
{
	FILE	*TexOpen();

	if (!InList(sbFile))
	    return(0);
	rgfp[cfp++] = yyin;
	if ((yyin = TexOpen(sbFile, "r")) == NULL) {
	    Warning("can't open \\include file", sbFile);
	    yyin = rgfp[--cfp];
	}
}

/******
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
**	If the include list is too long, sbFile is ignored.
******/

AddInclude(sbFile)
char	*sbFile;
{
	if (csbIncList >= MAXINCLIST)
	    Warning("\\includeonly list is too long, ignoring", sbFile);
	rgsbIncList[csbIncList] = (char *)malloc(strlen(sbFile) + 1);
	strcpy(rgsbIncList[csbIncList++], sbFile);
}

/******
** InList -- checks to see if sbFile is in the rgsbIncList.  If there is
**	no list, all files are assumed to be "in the list".
******/

InList(sbFile)
char	*sbFile;
{
	char	*pch, sbBase[MAXPATHLEN];
	int	i;

	if (csbIncList == 0)	/* no list */
	    return(1);
	strcpy(sbBase, sbFile);
	if ((pch = rindex(sbBase, '.')) != NULL)
	    *pch = '\0';
	i = 0;
	while ((i < csbIncList) && rgsbIncList[i])
	    if (strcmp(rgsbIncList[i++], sbBase) == 0)
	        return(1);
	return(0);
}

/******
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
**	TEXINPUTS environment variable if set or else DEFAULTINPUTS.
******/

SetInputPaths()
{
    char *sb, *sbPaths, *getenv();

    if ((sb = getenv("TEXINPUTS")) == NULL)
	sbPaths = DEFAULTINPUTS;
    else {
	sbPaths = (char *)malloc(strlen(sb) + 1);
	strcpy(sbPaths, sb);
    }

    csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
    if (csbInputPaths == ERROR)
	ErrorExit("TEXINPUTS environment variable has too many paths");
}

/******
** SeparateList -- takes a chSep separated list sbList, replaces the
**	chSep's with NULLs and sets rgsbList[i] to the beginning of
**	the ith word in sbList.  The number of words is returned.  A
**	ERROR is returned if there are more than csbMax words.
******/

SeparateList(sbList, rgsbList, chSep, csbMax)
char	*sbList, *rgsbList[], chSep;
int	csbMax;
{
	char	*pch;
	int	csbList = 0;

	while (sbList && *sbList && csbList < csbMax) {
	    rgsbList[csbList++] = sbList;
	    if (sbList = index(sbList, chSep))
		*sbList++ = NULL;
	}
	return(sbList && *sbList ? ERROR : csbList);
}

/******
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
**	For each input path the following order is used:
**		file.tex - must be as named, if not there go to the next path
**		file.ext - random extension, try it
**		file     - base name, add .tex and try it
**		file     - try it as is
**	Notice that if file exists in the first path and file.tex exists in
**	one of the other paths, file in the first path is what is opened.
**	If the sbFile begins with a '/', no paths are searched.
******/

FILE *
TexOpen(sbFile)
char	*sbFile;
{
	char	*pch, *sbNew;
	FILE	*fp;
	int	iPath;
	static char	sbFullPath[MAXPATHLEN];

	for (iPath = 0; iPath < csbInputPaths; iPath++) {
	    if (*sbFile == '/') {	/* absolute path */
		sprintf(sbFullPath, "%s", sbFile);
		iPath = csbInputPaths;	/* only check once */
	    } else
		sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile);

	    /* If sbFile ends in .tex then it must be there */
	    if ((pch = rindex(sbFullPath, '.')) != NULL
			&& (strcmp(pch, ".tex") == 0))
		if ((fp = fopen(sbFullPath, "r")) != NULL)
		    return(fp);
		else
		    continue;

	    /* if .<ext> then try to open it */
	    if (pch != NULL && (fp = fopen(sbFullPath, "r")) != NULL)
		return(fp);

	    /* just base name, add .tex to the name */
	    sbNew = (char *)malloc(strlen(sbFullPath) + 5);
	    strcpy(sbNew, sbFullPath);
	    strcat(sbNew, ".tex");
	    if ((fp = fopen(sbNew, "r")) != NULL)
		return(fp);

	    /* try sbFile regardless */
	    if ((fp = fopen(sbFullPath, "r")) != NULL)
		return(fp);
	}
	return((FILE *)NULL);
}

/******
** Warning -- print a warning message preceded by the program name.
******/

Warning(sb1, sb2)
char	*sb1, *sb2;
{
	fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2);
}

/******
** ErrorExit -- print an error message preceded by the program name.
**	Stdout is flushed and detex exits.
******/

ErrorExit(sb1)
char	*sb1;
{
	(void)fflush(stdout);
	fprintf(stderr, "%s: error: %s\n", sbProgName, sb1);
	exit(1);
}