|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T U
Length: 73202 (0x11df2) Types: TextFile Notes: Uncompressed file
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12 └─⟦c319c2751⟧ »unix3.0/TeX3.0.tar.Z« └─⟦036c765ac⟧ └─⟦aadeeed27⟧ »TeX3.0/TeXdoc/LiterateProgramming/web.tex.Z« └─⟦060c9c824⟧ Bits:30007080 DKUUG TeX 2/12/89 └─⟦aadeeed27⟧ »./tex82/TeXdoc/LiterateProgramming/web.tex.Z« └─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12 └─⟦63303ae94⟧ »unix3.14/TeX3.14.tar.Z« └─⟦c58930e5c⟧ └─⟦aadeeed27⟧ »TeX3.14/TeXdoc/LiterateProgramming/web.tex.Z« └─⟦this⟧
% Page layout \input webmac \output{\setbox0=\box255}\eject % get rid of spurious WEBMAC page \font\man=manfnt scaled \magstep3 \font\CompJtitle=ambx10 scaled\magstep4 \font\CompJabstract=amb10 \font\tenssb=amssmc10 \font\tenss=amss10 \font\tenssi=amssi10 \font\eightss=helvetica at 8truebp \font\eightssi=helveticai at 8truebp \font\eightssb=helveticab at 8truebp \font\eighttt=amtt8 \font\ninerm=amr9 \let\mc=\ninerm % medium caps for names like PASCAL \newdimen\pagewidth \newdimen\pageheight \newdimen\ruleht \hsize=177mm \vsize=249mm \parindent=1em % this is needed for WEB output \pagewidth=\hsize \pageheight=\vsize \ruleht=1pt \abovedisplayskip=11pt plus 3pt minus 8pt \abovedisplayshortskip=0pt plus 3pt \belowdisplayskip=11pt plus 3pt minus 8pt \belowdisplayshortskip=6pt plus 3pt minus 3pt \newif\iftitle \def\titlepage{\global\titletrue} % for pages without headlines \def\leftheadline{\hbox to \pagewidth{% \vbox to 8pt{}\hss \eightrm D. E. KNUTH\hss}} \def\rightheadline{\hbox to \pagewidth{% \vbox to 8pt{}\hss \eightrm LITERATE PROGRAMMING\hss}} \hoffset=-.25in \voffset=-.6in \newinsert\lefttop \newinsert\righttop \count\lefttop=1000 \count\righttop=1000 \dimen\lefttop=\maxdimen \dimen\righttop=\maxdimen \skip\lefttop=25pt plus 3pt minus 3pt \skip\righttop=\skip\lefttop \def\leftfloat{\insert\lefttop\bgroup \floatingpenalty=0 \penalty0 \vbox\bgroup} \def\rightfloat{\insert\righttop\bgroup \floatingpenalty=0 \penalty0 \vbox\bgroup} \def\endfloat{\egroup\egroup} \def\onepageout#1{\shipout\vbox{ % here we define one page of output \offinterlineskip % butt the boxes together \vbox to 9mm{ % this part goes on top of the regular pages \iftitle % the next is used for title pages \global\titlefalse % reset the titlepage switch \hbox to\pagewidth{\leaders\CJrule\hfill} \else\ifodd\pageno \rightheadline\else\leftheadline\fi\fi \vfill} % this completes the \vbox to 9mm \vbox to \pageheight{ #1 % now insert the main information \boxmaxdepth=\maxdepth } % this completes the \vbox to \pageheight \baselineskip=7mm \lineskiplimit=0pt \hbox to\pagewidth{% \ifodd\pageno\hfil\tenss submitted to THE COMPUTER JOURNAL% \tenssb\quad\folio \else\tenssb\folio\quad \tenss submitted to THE COMPUTER JOURNAL\hfil\fi} } \advancepageno} \output{\onepageout{\unvbox255}} \newbox\partialpage \def\begindoublecolumns{\begingroup \output={\global\setbox\partialpage=\vbox{\unvbox255}}\eject \output={\doublecolumnout} \hsize=84mm \vsize=510mm} \def\enddoublecolumns{\output={\balancecolumns}\eject \endgroup \pagegoal=\vsize} \def\doublecolumnout{\dimen0=\pageheight \advance\dimen0 by-\ht\partialpage \splittopskip=\topskip \ifdim\ht\lefttop>0pt \setbox255=\vbox{\unvbox\lefttop \setbox0=\lastbox\unvbox0\vskip\skip\lefttop\unvbox255}\fi \setbox0=\vsplit255 to\dimen0 \ifdim\ht\righttop>0pt \setbox255=\vbox{\unvbox\righttop \setbox0=\lastbox\unvbox0\vskip\skip\righttop\unvbox255}\fi \setbox2=\vsplit255 to\dimen0 \onepageout\pagesofar \unvbox255 \penalty\outputpenalty} \def\pagesofar{\unvbox\partialpage \wd0=\hsize \wd2=\hsize \hbox to\pagewidth{\box0\hfil\box2}} \def\balancecolumns{\setbox0=\vbox{\unvbox255} \dimen0=\ht0 \advance\dimen0 by\topskip \advance\dimen0 by-\baselineskip \divide\dimen0 by2 \splittopskip=\topskip {\vbadness=10000 \loop \global\setbox3=\copy0 \global\setbox1=\vsplit3 to\dimen0 \ifdim\ht3>\dimen0 \global\advance\dimen0 by1pt \repeat} \setbox0=\vbox to\dimen0{\unvbox1} \setbox2=\vbox to\dimen0{\unvbox3} \pagesofar} \def\CJrule{\hrule height\ruleht} \baselineskip=11pt \parskip=0pt plus 1pt \def\beginsection #1\par{\goodbreak\vskip9mm plus4mm minus 2mm \vbox{\CJrule width \hsize \kern5pt} \kern-3pt \nointerlineskip \leftline{\strut\bf#1} \CJrule \kern12pt\nobreak\noindent\ignorespaces} \def\caption #1. #2.{\leftline{\def\TeX{T\kern-.2em\lower.5ex\hbox{E}X}% \tenssb Figure #1.\enspace\tenss#2.}} \def\WEB{{\tt WEB}} \def\PASCAL{{\mc PASCAL}} \def\sec{{\tensy x}} \def\<{$\langle\,$} \def\>{$\,\rangle$} \newbox\circlebox \setbox\circlebox=\hbox{\man Y} \def\encircle#1{\kern6pt\hbox to\wd\circlebox{\hss\tt#1\hss}\kern-\wd\circlebox \raise10pt\copy\circlebox\kern6pt} \def\ttverbatim{\begingroup \tt \parindent=0pt \obeylines \uncatcodespecials \catcode`/=0 \obeyspaces} \let\endverbatim=\endgroup {\obeyspaces\global\let =\ } % let active space = control space \def\uncatcodespecials{\def\do##1{\catcode`##1=12 }\dospecials} \def\cvdots{\kern3pt\qquad\smash\vdots} \newcount\refno \newif\ifshowit \def\ref{\showittrue\makeref} \def\silentref{\showitfalse\makeref} \def\references{} % this will grow until it holds all the references \def\makeref#1#2{\advance\refno by1 \edef#1{{\the\refno}}% \toks0=\expandafter{\references}% {\def\rm{\eightss}\def\sl{\eightssi}\def\bf{\eightssb}\def\tt{\eighttt}% \def\TeX{T\kern-.2em\lower.5ex\hbox{E}\kern-.000em X}% \xdef\references{\the\toks0 \noexpand\item{\the\refno.}#2\par}}% \ifshowit\edef\next{\spacefactor=\the\spacefactor\space}% $^{\the\refno}$\next\fi} \hyphenation{Dijk-stra} \hyphenchar\tentt=-1 % no hyphenation in the typewriter font \titlepage \leftline{\kern13mm\CompJtitle Literate Programming} \kern6mm \CJrule \kern4.5mm \leftline{\kern13mm\bf Donald E. Knuth} \kern2pt \leftline{\kern13mm\eightrm Computer Science Department, Stanford University, Stanford, CA 94305, USA} \kern4mm \CJrule \kern6mm \leftline{\kern13mm\vbox{\hsize=151mm\CompJabstract\noindent The author and his associates have been experimenting for the past several years with a programming language and documentation system called \WEB. This paper presents \WEB\ by example, and discusses why the new system appears to be an improvement over previous ones.}} \bigskip\bigskip \begindoublecolumns \beginsection A. INTRODUCTION The past ten years have witnessed substantial improvements in programming methodology. This advance, carried out under the banner of ``structured programming,'' has led to programs that are more reliable and easier to comprehend; yet the results are not entirely satisfactory. My purpose in the present paper is to propose another motto that may be appropriate for the next decade, as we attempt to make further progress in the state of the art. I believe that the time is ripe for significantly better documentation of programs, and that we can best achieve this by considering programs to be {\it works of literature}. Hence, my title: ``Literate Programming.'' Let us change our traditional attitude to the construction of programs: Instead of imagining that our main task is to instruct a {\it computer\/} what to do, let us concentrate rather on explaining to {\it human beings\/} what we want a computer to do. The practitioner of literate programming can be regarded as an essayist, whose main concern is with exposition and excellence of style. Such an author, with thesaurus in hand, chooses the names of variables carefully and explains what each variable means. He or she strives for a program that is comprehensible because its concepts have been introduced in an order that is best for human understanding, using a mixture of formal and informal methods that re\"\i nforce each other. I dare to suggest that such advances in documentation are possible because of the experiences I've had during the past several years while working intensively on software development. By making use of several ideas that have existed for a long time, and by applying them systematically in a slightly new way, I've stumbled across a method of composing programs that excites me very much. In fact, my enthusiasm is so great that I must warn the reader to discount much of what I shall say as the ravings of a fanatic who thinks he has just seen a great light. Programming is a very personal activity, so I can't be certain that what has worked for me will work for everybody. Yet the impact of this new approach on my own style has been profound, and my excitement has continued unabated for more than two years. I~enjoy the new methodology so much that it is hard for me to refrain from going back to every program that I've ever written and recasting it in ``literate'' form. I~find myself unable to resist working on programming tasks that I would ordinarily have assigned to student research assistants; and why? Because it seems to me that at last I'm able to write programs as they should be written. My programs are not only explained better than ever before; they also are better programs, because the new methodology encourages me to do a better job. For these reasons I am compelled to write this paper, in hopes that my experiences will prove to be relevant to others. I must confess that there may also be a bit of malice in my choice of a title. During the 1970s I was coerced like everybody else into adopting the ideas of structured programming, because I couldn't bear to be found guilty of writing {\it unstructured\/} programs. Now I have a chance to get even. By coining the phrase ``literate programming,'' I am imposing a moral commitment on everyone who hears the term; surely nobody wants to admit writing an {\it il{}literate\/} program. \beginsection B. THE \WEB\ SYSTEM I hope, however, to demonstrate in this paper that the title is not merely wordplay. The ideas of literate programming have been embodied in a language and a suite of computer programs that have been developed at Stanford University during the past few years as part of my research on algorithms and on digital typography. This language and its associated programs have come to be known as the \WEB\ system. My goal in what follows is to describe the philosophy that underlies \WEB, to present examples of programs in the \WEB\ language, and to discuss what may be the future implications of this work. I chose the name \WEB\ partly because it was one of the few three-letter words of English that hadn't already been applied to computers. But as time went on, I've become extremely pleased with the name, because I~think that a complex piece of software is, indeed, best regarded as a {\it web\/} that has been delicately pieced together from simple materials. We understand a complicated system by understanding its simple parts, and by understanding the simple relations between those parts and their immediate neighbors. If we express a program as a web of ideas, we can emphasize its structural properties in a natural and satisfying way. \WEB\ itself is chiefly a combination of two other languages: (1)~a document formatting language and (2)~a programming language. My prototype \WEB\ system uses \TeX\ as the document formatting language and \PASCAL\ as the programming language, but the same principles would apply equally well if other languages were substituted. Instead of \TeX, one could use a language like Scribe or Troff; instead of \PASCAL, one could use {\mc ADA}, {\mc ALGOL}, {\mc LISP}, {\mc COBOL}, {\mc FORTRAN}, {\mc APL}, {\mc C}, etc., or even assembly language. The main point is that \WEB\ is inherently bilingual, and that such a combination of languages proves to be much more powerful than either single language by itself. \WEB\ does not make the other languages obsolete; on the contrary, it enhances them. I naturally chose \TeX\ to be the document formatting language, in the first \WEB\ system, because \TeX\ is my own creation;\ref\TeXbook{D. E. Knuth, {\sl The \TeX book}. Addison-Wesley, Reading, Mass., U.S.A. (1983).} I wanted to acquire a lot of experience in harnessing \TeX\ to a variety of different tasks. I~chose \PASCAL\ as the programming language because it has received such widespread support from educational institutions all over the world; it is not my favorite language for system programming, but it has become a ``second language'' for so many programmers that it provides an exceptionally effective medium of communication. Furthermore \WEB\ itself has a macro-processing ability that makes \PASCAL's limitations largely irrelevant. Document formatting languages are newcomers to the computing scene, but their use is spreading rapidly. Therefore I'm confident that we will be able to expect each member of the next generation of programmers to be familiar with a document language as well as a programming language, as part of their basic education. Once a person knows both of the underlying languages, there's no trick at all to learning \WEB, because the \WEB\ user's manual is fewer than ten pages long. A \WEB\ user writes a program that serves as the source language for two different system routines. (See Figure~1.) One line of processing is called {\it weaving\/} the web; it produces a document that describes the program clearly and that facilitates program maintenance. The other line of processing is called {\it tangling\/} the web; it produces a machine-executable program. The program and its documentation are both generated from the same source, so they are consistent with each other. \bigskip \centerline{\vbox{ \halign{&\hss#\hss\cr &&&\TeX\cr \noalign{\vskip-4pt} &&\encircle{TEX}&\enspace\rightarrowfill\enspace&\encircle{DVI}\cr \multispan2\hfil\smash{\raise4pt\hbox{\tt WEAVE}\kern-1pt}$\nearrow$ \cr \noalign{\vskip6pt} \encircle{WEB}\cr \noalign{\vskip6pt} \multispan2\hfil\smash{\lower6pt\hbox{\tt TANGLE}\kern-1pt}$\searrow$ \cr &&\encircle{PAS}&\enspace\rightarrowfill\enspace&\encircle{REL}\cr \noalign{\vskip-2pt} &&&\mc\ PASCAL\ \cr} }} \nobreak\medskip \caption 1. Dual usage of a {\tt WEB} file. \bigbreak Let's look at this process in slightly more detail. Suppose you have written a \WEB\ program and put it into a computer text file called {\tt COB.WEB} (say). To generate hardcopy documentation for your program, you can run the {\tt WEAVE} processor; this is a system program that takes the file {\tt COB.WEB} as input and produces another file {\tt COB.TEX} as output. Then you run the \TeX\ processor, which takes {\tt COB.TEX} as input and produces {\tt COB.DVI} as output. The latter file, {\tt COB.DVI}, is a ``device-independent'' binary description of how to typeset the documentation, so you can get printed output by applying one more system routine to this file. You can also follow the other branch of Figure~1, by running the {\tt TANGLE} processor; this is a system program that takes the file {\tt COB.WEB} as input and produces a new file {\tt COB.PAS} as output. Then you run the \PASCAL\ compiler, which converts {\tt COB.PAS} to a binary file {\tt COB.REL} (say). Finally, you can run your program by loading and executing {\tt COB.REL}. The process of ``compile, load, and go'' has been slightly lengthened to ``tangle, compile, load, and go.'' \beginsection C. A COMPLETE EXAMPLE Now it's time for me to stop presenting general platitudes and to move on to something tangible. Let us look at a real program that has been written in \WEB. The numbered paragraphs that follow are the actual output of a \WEB\ file that has been ``woven'' into a document; a computer has also generated the indexes that appear at the program's end. If my claims for the advantages of literate programming have any merit, you should be able to understand the following description more easily than you could have understood the same program when presented in a more conventional way. However, I am trying here to explain the format of \WEB\ documentation at the same time as I am discussing the details of a nontrivial algorithm, so the description below is slightly longer than it would be if it were written for people who already have been introduced to \WEB. \silentref\Dijk{O.-J.~Dahl, E.~W. Dijkstra, and C.~A.~R. Hoare, {\sl Structured Programming}. Academic Press, London and New York (1972).} \silentref\goto{D. E. Knuth, Structured programming with {\bf go to} statements. {\sl Computing Surveys\/ \bf6}, 261--301 (1974).} Here, then, is the computer-generated output: \bigskip \CJrule \medskip \begingroup \def\prune\input webmac{\input primes.contents} \def\Z#1#2#3{\line{\ignorespaces#1\ \dotfill\ {\tensy x}#2}} \def\M#1.{\MN#1.\iftrue\medbreak\startsection\ignorespaces} \def\firstmod{1} \def\N#1.#2.{\MN#1.\iftrue\nobreak \ifx\modno\firstmod\medskip\else\bigskip\fi \CJrule\medbreak\startsection {\bf\ignorespaces#2.\quad}\ignorespaces} \def\inx{\par\medbreak \def\:##1, {\par\hangindent2em\noindent##1:\kern1em} \def\[##1]{$\underline{##1}$} \rm \rightskip0pt plus2.5em \tolerance10000 \let\*=\lapstar \hyphenpenalty10000 \parindent0pt} \def\fin{\par\bigskip\CJrule\medbreak \parfillskip0pt plus1fil \def\note##1##2.{\hfil\penalty-1\hfilneg\quad{\eightrm##1 ##2.}} \def\U{\note{Used in}} \def\:{\par\hangindent 2em}\let\*=*} \let\con=\par \parskip=0pt \expandafter\prune\input primes \endgroup \beginsection D. HOW THE EXAMPLE WAS SPECIFIED Everything reproduced above, from the table of contents preceding the program to the indexes of identifiers and section names at the end, was generated by applying the program {\tt WEAVE} to a source file {\tt PRIMES.WEB} written in the \WEB\ language. Let us now look at that file {\tt PRIMES.WEB}, in order to get an idea of what a \WEB\ user actually types. There's no need to show very much of {\tt PRIMES.WEB}, however, because that file is reflected quite faithfully by the formatted output. Figure~2 contains enough of the \WEB\ source to indicate the general flavor; a reader who is familiar with the rudiments of \TeX\ will be able to reconstruct all of {\tt PRIMES.WEB} by looking only at the formatted output and Figure~2. \leftfloat \ttverbatim /hrule /medskip \font\ninerm=cmr9 \let\mc=\ninerm % medium caps \def\WEB{{\tt WEB}} \def\PASCAL{{\mc PASCAL}} \def\[{\ifhmode\ \fi$[\mkern-2mu[$} \def\]{$]\mkern-2mu]$\ } /cvdots \hyphenation{Dijk-stra} /medskip @* Printing primes: An example of \WEB. The following program is essentially the same as Edsger Dijkstra's @^Dijkstra, Edsger@> ``first example of step-wise program composition,'' found on pages 26--39 of his {\sl Notes on Structured Programming},$^\Dijk$ but it has been translated into the \WEB\ language. @.WEB@> /medskip \[Double brackets will be used in what follows to enclose comments relating to \WEB\ /cvdots an informal top-level description.\] /medskip @p @<Program to print the first thousand prime numbers@> /endverbatim \medskip \caption 2a. The beginning of {\tt PRIMES.WEB}. \medskip \hrule \endfloat Figure 2a starts with \TeX\ commands (not shown in full) that make it convenient to typeset double brackets $[\mkern-2mu[\ldots]\mkern-2mu]$ and to give special typographic treatment to names like `\WEB' and `\PASCAL'. A \WEB\ user generally begins by declaring such special aspects of the document format; for example, if nonstandard fonts of type are needed, they are usually stated first. It may also be necessary to specify the correct hyphenation of non-English words that appear in the document. Then comes `{\tt@*}', which starts the program proper. \WEB\ uses the symbol `{\tt@}' as an escape character for special instructions to the {\tt WEAVE} and {\tt TANGLE} processors. Everything between such special commands is either expressed in \TeX\ language or in \PASCAL\ language, depending on the context. Each section of the program begins either with `{\tt@ }' (i.e., at-sign and space) or `{\tt@*}' (i.e., at-sign and asterisk); \WEB\ supplies the section numbers automatically. The latter case, `{\tt@*}', denotes a {\it major section\/} of the program, for which a special title is given. This title will appear in boldface type, and it will also appear in the table of contents, and as a running headline on all pages of the woven documentation until another major section begins. Each major section starts at the top of a page. (Such page beginnings have been indicated by horizontal lines in our example, because \WEB's normal output format has been adapted to the format of this journal. The output of {\tt WEAVE} usually has a lot more white space, and the individual lines of text are usually quite a bit wider.) The lines that follow in Figure~2a show a few more \WEB\ instructions: `{\tt@\char`^}' marks the beginning of an index entry to be set in roman type; `{\tt@>}' marks the end of an argument to a \WEB\ command; `{\tt@.}'\ marks the beginning of an index entry to be set in typewriter type; `{\tt@p}' marks the beginning of the \PASCAL\ program; and `{\tt@<}' marks the beginning of a top-level description, i.e., of a section name in the \WEB\ program. \rightfloat \ttverbatim /hrule /medskip @ This program has no input, because we want to keep it rather simple. The result of the program will be to produce a list of the first thousand prime numbers, and this list will appear on the |output| file. /medskip Since there is no input, we declare the value |m=1000| as a compile-time constant. The program itself is capable of generating the first |m| prime numbers for any positive |m|, as long as the computer's finite limitations are not exceeded. /medskip \[The program text below specifies the ``expanded meaning'' of `\X2:Program to print $\ldots$ numbers\X'; notice that it involves the top-level descriptions of three other sections. When those top-level descriptions are replaced by their expanded meanings, a syntactically correct \PASCAL\ program will be obtained.\] /medskip @<Program to print...@>= program print_primes(output); const @!m=1000; @<Other constants of the program@>@; var @<Variables of the program@>@; begin @<Print the first |m| prime numbers@>; end. /endverbatim \medskip \caption 2b. The \WEB\ code that generated \sec2. \ttverbatim /bigskip /hrule /medskip @ In order to keep this program reasonably free of notations that are uniquely \PASCAL esque, \[and in order to illustrate /cvdots The first three macro definitions here are parametric; the other two are simple.\] /medskip @d print_string(#)==write(#) {put a given string into the |output| file} @d print_integer(#)==write(#:1) {put a given integer into the |output| file, in decimal notation, using only as many digit positions as necessary} @d print_entry(#)==write(#:ww) {like |print_integer|, but |ww| character positions are filled, inserting blanks at the left} @d new_line==write_ln {advance to a new line in the |output| file} @d new_page==page {advance to a new page in the |output| file} /endverbatim \medskip \caption 2c. The \WEB\ code that generated \sec6. \medskip \hrule \endfloat Figure 2b immediately follows Figure~2a in the \WEB\ file. This material is what generated \sec2 of the documentation, and it illustrates the bilingual nature of \WEB: The commentary at the beginning of each section is typed in \TeX\ language, and the program text at the end is typed in \PASCAL\ language. Language-switching between \TeX\ and \PASCAL\ is occasionally desirable. For example, when you refer to technical details about the program, you usually want to describe them in \PASCAL, hence you want {\tt WEAVE} to format them with the typographic conventions it uses for \PASCAL\ programs. Conversely, when you put comments in a \PASCAL\ program, you want the text of those comments to be formatted by \TeX\ in the normal way. \WEB\ files use vertical bars to introduce \PASCAL\ formatting in the midst of \TeX\ formatting; for example, Figure~2b says `{\tt the |output| file}' in order to typeset `the \\{output} file'. The program text in Figure~2b begins with `{\tt@<}' instead of with the `{\tt@p}' command used in Figure~2a, because the program text in~\sec2 is the expansion of a specific top-level description. Notice that the top-level description has been abbreviated to `{\tt@<Program to print...@>}'. Since the names of sections tend to be rather long, it is a nuisance to type them in full each time; \WEB\ allows you to type `{\tt...}'\ after you have given enough text to identify the remainder uniquely. The `{\tt@!}'\ operation in the program text of Figure~2b governs the underlining of index entries. The `{\tt@;}'\ specifies an invisible symbol that has the effect of a semicolon in \PASCAL\ syntax. Commands such as these are comparatively unimportant, but they are available for polishing up the final documentation when you want to maintain fine control. Figure 2c shows key portions of the \WEB\ text that generated \sec6. Notice that the command `{\tt@d}' introduces a macro definition. All features of \WEB\ that appear in our example program are illustrated in Figures 2a, 2b, and~2c; the remainder of {\tt PRIMES.WEB} simply uses the same conventions again and again. In fact, most of the \WEB\ file is much simpler than the examples shown here; Figure~2 has illustrated only the difficult parts. \beginsection E. THE TANGLED OUTPUT Figure 3 shows the \PASCAL\ program {\tt PRIMES.PAS} that results when {\tt TANGLE} is applied to {\tt PRIMES.WEB}. This program is not intended for human consumption---it's only supposed to be readable by a \PASCAL\ compiler---so {\tt TANGLE} does not go to great pains to produce a beautiful format. Notice that underlines have been removed from the identifier names, and that all of the letters have been converted to uppercase (except in strings); {\tt TANGLE} tries to produce a format that will be acceptable to a standard \PASCAL\ compiler. {\tt TANGLE} removes all of the commentary in the \WEB\ file, but it inserts new comments of its own. If for some reason you need to correlate the tangled \PASCAL\ code with the woven documentation, you can find the program text for, say, \sec8 by looking between the comments `{\tt\char`\{8:\char`\}}' and `{\tt\char`\{:8\char`\}}'. A comparison of Figure~3 to Figure~2 should make it clear why the {\tt TANGLE} processor has acquired its name. \rightfloat \ttverbatim /hrule /medskip {1:}{2:}PROGRAM PRINTPRIMES(OUTPUT); CONST M=1000;{5:}RR=50;CC=4;WW=10;{:5}{19:} ORDMAX=30;{:19}VAR{4:} P:ARRAY[1..M]OF INTEGER;{:4}{7:} PAGENUMBER:INTEGER;PAGEOFFSET:INTEGER; ROWOFFSET:INTEGER;C:0..CC;{:7}{12:}J:INTEGER; K:0..M;{:12}{15:}JPRIME:BOOLEAN;{:15}{17:} ORD:2..ORDMAX;SQUARE:INTEGER;{:17}{23:} N:2..ORDMAX;{:23}{24:} MULT:ARRAY[2..ORDMAX]OF INTEGER;{:24} BEGIN{3:}{11:}{16:}J:=1;K:=1;P[1]:=2;{:16} {18:}ORD:=2;SQUARE:=9;{:18}; WHILE K<M DO BEGIN{14:}REPEAT J:=J+2;{20:} IF J=SQUARE THEN BEGIN ORD:=ORD+1;{21:} SQUARE:=P[ORD]*P[ORD];{:21}{25:} MULT[ORD-1]:=J;{:25};END{:20};{22:}N:=2; JPRIME:=TRUE; WHILE(N<ORD)AND JPRIME DO BEGIN{26:} WHILE MULT[N]<J DO MULT[N]:=MULT[N]+P[N]+P[N] ;IF MULT[N]=J THEN JPRIME:=FALSE{:26};N:=N+1; END{:22};UNTIL JPRIME{:14};K:=K+1;P[K]:=J; END{:11};{8:}BEGIN PAGENUMBER:=1; PAGEOFFSET:=1; WHILE PAGEOFFSET<=M DO BEGIN{9:} BEGIN WRITE('The First ');WRITE(M:1); WRITE(' Prime Numbers --- Page '); WRITE(PAGENUMBER:1);WRITELN;WRITELN; FOR ROWOFFSET:=PAGEOFFSET TO PAGEOFFSET+RR-1 DO{10:} BEGIN FOR C:=0 TO CC-1 DO IF ROWOFFSET+C*RR<= M THEN WRITE(P[ROWOFFSET+C*RR]:WW);WRITELN; END{:10};PAGE;END{:9}; PAGENUMBER:=PAGENUMBER+1; PAGEOFFSET:=PAGEOFFSET+RR*CC;END;END{:8}{:3}; END.{:2}{:1} /endverbatim \medskip \caption 3. PASCAL program generated from the \WEB\ file. \medskip \hrule \endfloat \beginsection F. THE WOVEN OUTPUT I mentioned earlier that {\tt WEAVE} is a program that converts a file like {\tt PRIMES.WEB} into a file {\tt PRIMES.TEX} that is a syntactically correct source file for \TeX. Figure~4 gives a sampling of {\tt PRIMES.TEX}, which is even more unreadable than {\tt PRIMES.PAS}. The instructions that cause \TeX\ to produce formatted \PASCAL\ programs, with appropriate typefaces and indentation, etc., are somewhat complex because they are supposed to give decent results regardless of the page size. There is no need to discuss Figure~4 further in the present paper, because the details of ``pretty printing'' are not relevant to my main theme. I have shown this much of {\tt PRIMES.TEX} only to make the point that it is nice to have a program like {\tt WEAVE} to do all the formatting; computer programs are not easy to typeset. \leftfloat \ttverbatim /hrule /medskip \input webmac \font\ninerm=amr9 /cvdots syntactically correct \PASCAL\ program will be obtained.\] /medskip \Y\P$\4\X2:Program to print the first thousand prime numbers\X\S$\6 \4\&{program}\1\ \37$\\{print\_primes}(% \\{output})$;\6 \4\&{const} \37$\|m=1000$;\5 \X5:Other constants of the program\X\6 \4\&{var} \37\X4:Variables of the program\X\6 \&{begin} \37\X3:Print the first \|m prime numbers\X;\6 \&{end}.\par \U section~1.\fi /cvdots The first three macro definitions here are parametric; the other two are simple.\] /medskip \Y\P\D \37$\\{print\_string}(\#)\S\\{write}(% \#)$\C{put a given string into the % \\{output} file}\par /cvdots \inx \:{Bertrand, Joseph, postulate}, 21. \:\\{boolean}, 15. /cvdots \:\.{WEB}, 1. \:\\{write}, 6. \:\\{write\_ln}, 6. \:\\{ww}, \[5], 6. \fin /cvdots \:\X4, 7, 12, 15, 17, 23, 24:Variables of the program\X \U section~2. \con /endverbatim \medskip \caption 4. \TeX\ program generated from the \WEB\ file. \medskip \hrule \endfloat \beginsection G. ADDITIONAL BELLS AND WHISTLES A system like \WEB\ can be successful only if it is capable of handling large programs as well as small ones, and only if it is complete enough to take care of all the practical requirements that arise when many different kinds of programs are considered. A small example like {\tt PRIMES.WEB} is a satisfactory vehicle for illustrating the general ideas, but it cannot be convincing as a demonstration of \WEB's ability to produce quality software in the ``real world.'' My original design of \WEB\ in September, 1981, was followed by a year of extensive experiments, so that by the time Version~1 was released in September, 1982, I could be fairly confident that the language was reasonably complete. Since then only one or two small extensions have proved to be necessary; and although numerous enhancements can easily be imagined, I believe that a useful stopping point for a working system called {\tt WEB83} has been reached. A full description of {\tt WEB83} appears in a Stanford report,\ref\WEBman% {D. E. Knuth, {\sl The \WEB\kern-2pt\ System of Structured Documentation}. Stanford Computer Science Report CS980 (September 1983).} which also contains the complete \WEB\ programs for {\tt WEAVE} and {\tt TANGLE}. The full language contains only a few features that do not show up in the {\tt PRIMES} example considered above: \def\nindent#1{\noindent\hbox to\parindent{#1)\hfil}\ignorespaces} \smallskip \nindent1 There are facilities to override {\tt WEAVE}'s automatic formatting of \PASCAL\ programs. For example, it is possible to force a statement to begin on a new line, or to force several statements to appear on the same line, or to suggest a desirable breakpoint in the middle of a long expression. In unusual cases, {\tt WEAVE} must parse program fragments that are not syntactically complete---for example, there may be a {\bf begin} without a matching {\bf end}---so a \WEB\ user must be given a chance to control the results. Furthermore there is a facility for changing {\tt WEAVE}'s formatting rules by declaring that a certain identifier should be treated as a certain \PASCAL\ reserved word, or by declaring that a certain reserved word should be treated as an ordinary identifier. \smallskip \nindent2 There is a way to force {\tt TANGLE} to omit a space between two adjacent pieces of text, so that a name like `\\{x3}' can be manufactured from `\|x' and `\\3'. Similarly, there is a way to pass an arbitrary sequence of characters through {\tt TANGLE} so that the same sequence will appear ``verbatim'' in the \PASCAL\ file; and there is a way to force beginning-of-line in that file. The latter extensions have proved to be necessary to deal with various nonstandard conventions of different \PASCAL\ compilers. When a comment in braces is sent to the \PASCAL\ file, {\tt TANGLE} is careful not to introduce further braces inside the comment. \smallskip \nindent3 There are facilities for octal and hexadecimal constants in \WEB\ thees. {\tt TANGLE} converts such constants to decimal form; {\tt WEAVE} gives them an appropriate typographic treatment. \smallskip \nindent4 There is a facility for dealing with alphabetic constants. When a program contains a double-quoted character like {\tt"A"}, {\tt TANGLE} converts this to an integer between 0 and~127 that equals the corresponding {\mc ASCII} code (in this case 65). The use of {\mc ASCII} code facilitates the construction of software that is readily portable from one machine to another, independent of the actual character set in use. \smallskip \nindent5 Furthermore, if a double-quoted constant is a string of several characters, like {\tt"cat"}, {\tt TANGLE} converts it into a unique integer that is 128 or more. A special {\it string pool file\/} is written, containing all of the strings that have been specially encoded in this way. I have used this general mechanism only in large programs, but experience has shown that it makes quite a nice substitute for the string-processing capabilities that \PASCAL\ lacks. (Incidentally, I noticed after several months that a program needs to have some indication that the string-pool file it is reading contains the same strings that {\tt TANGLE} generated when the program itself was tangled. Therefore a ``check sum'' is included in the string pool file; each program is able to refer to its own check sum and to compare it with the value in the file. This check-sum extension was one of the last features to be added to \WEB.) \smallskip \nindent6 The {\tt PRIMES} example illustrates macros with parameters and macros without parameters. \WEB\ also allows ``numeric'' macros, which are small integer constants; {\tt TANGLE} is capable of doing simple arithmetic on such constants. This feature of \WEB\ was introduced specifically to overcome \PASCAL's unfortunate inability to do compile-time arithmetic. For example, it is impossible to have a \PASCAL\ array whose bounds are `$0\to n-1$', or to write `$20+3:$' as the label of one of the cases in `{\bf case} $x+y$'; \WEB's numeric macros make it possible for {\tt TANGLE} to preprocess such constants. \beginsection H. OCCAM'S RAZOR I would also like to mention several things that were intentionally left out of \WEB, since I have tried to keep the language as simple as I could. There are no ``conditional macros,'' nor does {\tt TANGLE} evaluate Boolean expressions that might influence the output. I~found that everything I needed could be done satisfactorily by commenting out the optional code. For example, a system program is often designed to gather statistics about its own operation, but such statistics-gathering is pointless unless someone is actually going to use the results. In order to make the instrumentation code optional, I include the word `{\bf stat}' just before any special code for statistics, and `{\bf tats}' just after such code; and I tell {\tt WEAVE} to regard {\bf stat} and {\bf tats} as if they were {\bf begin} and {\bf end}. But {\bf stat} and {\bf tats} are actually simple macros. When I do want to gather the statistics, I define {\bf stat} and {\bf tats} to be null; but in a production version of the software, I make {\bf stat} expand to~`{\tt@\char`\{}' and {\bf tats} expand to~`{\tt@\char`\}}', where {\tt@\char`\{} and {\tt@\char`\}} are special braces that {\tt TANGLE} does not remove. Thus the optional code appears as a harmless comment in the \PASCAL\ program. \WEB's macros are allowed to have at most one parameter. Again, I did this in the interests of simplicity, because I noticed that most applications of multiple parameters could in fact be reduced to the one-parameter case. For example, suppose that you want to define something like $$\hbox{\tt mac(\#1,\#2) == m[\#1*r+\#2]}$$ which \WEB\ doesn't permit. You can get essentially the same result with two one-parameter macros $$\vbox{\halign{\tt#\hfil\cr mac\char`\_tail(\#) == \#]\cr mac(\#) == m[\#*r+mac\char`\_tail\cr}}$$ since, e.g., `{\tt mac(a)(b)}' will expand into `{\tt m[a*r+b]}'. Here is another example that indicates some of the surprising generality of one-parameter macros: Consider the two definitions $$\vbox{\halign{\tt#\hfil\cr define two\char`\_cases(\#)==case j of\cr \ \ \ \ \ \ \ \ \ \ \ \ \ 1:\#(1); 2:\#(2); end\cr define reset\char`\_file(\#)==reset(file@\&\#)\cr}}$$ where `{\tt@\char`\&}' in the second definition is the concatenation operation that pastes two texts together. You can now say $$\hbox{\tt two\char`\_cases(reset\char`\_file)}$$ and the resulting \PASCAL\ output will be $$\vbox{\halign{\tt#\hfil\cr case j of\cr 1:reset(file1);\cr 2:reset(file2);\cr end\cr}}$$ In other words, the name of one macro can usefully be a parameter to another macro. This particular trick makes it possible to live with \PASCAL\ compilers that do not allow arrays of files. \beginsection I. PORTABILITY One of the goals of my \TeX\ research has been to produce portable software, and the {\tt WEB} system has been extremely helpful in this respect. Although my own work is done on a DEC-10 computer with Stanford's one-of-a-kind operating system, the software developed with \WEB\ has already been transported successfully to a wide variety of computers made by other manufacturers (including IBM, Control Data, XEROX, Hewlett-Packard), and to a variety of different operating systems for those machines. To my knowledge, no other software of such complexity has ever been transported to so many different machines. It seems likely that \TeX\ will soon be operating on all but the smallest of the world's computer systems. To my surprise, the main bottleneck to portability of the \TeX ware has been the lack of suitable \PASCAL\ compilers, because \PASCAL\ has often been implemented without system programming in mind. Anybody who has a decent \PASCAL\ compiler can install \WEB\ (and all programs written in \WEB) without great difficulty, essentially as follows: \smallskip \item{1)} Start with the three files {\tt WEAVE.WEB}, {\tt TANGLE.WEB}, and {\tt TANGLE.PAS}. (The programs have not been copyrighted, so these files are not difficult to obtain.) \item{2)} Run {\tt TANGLE.PAS} through your \PASCAL\ compiler to get a working {\tt TANGLE} program. \item{3)} Check your {\tt TANGLE} by applying it to {\tt TANGLE.WEB}; your output file should match {\tt TANGLE.PAS}. \item{4)} Apply your {\tt TANGLE} to the file {\tt WEAVE.WEB}, obtaining {\tt WEAVE.PAS}; then apply \PASCAL\ to {\tt WEAVE.PAS} and you'll have a working {\tt WEAVE} system. \item{5)} The same process applies to any software written in \WEB, notably to \TeX\ itself. (However, you need fonts and suitable output equipment in order to make proper use of \TeX; that may be another bottleneck.) Once you have \TeX\ working, you can apply {\tt WEAVE} and \TeX\ to your \WEB\ files, thereby getting program documents as illustrated above. \smallskip\noindent Notice that a {\tt TANGLE.PAS} file is needed in order to get this ``bootstrapping'' process started. If you have just {\tt WEAVE.WEB} and {\tt TANGLE.WEB}, you can't do the first step. However, anybody who has looked seriously into the question of software portability will realize that my comments in the preceding paragraphs have been oversimplified. I have glossed over some serious problems that arise: Character sets are different; file naming conventions are different; special conventions are needed to interact with a user's terminal; data is packed differently on different machines; floating-point arithmetic is always nonstandard and sometimes nonexistent; users want ``friendly'' interaction with existing programs for editing and spooling; etc., etc. Furthermore, many of the world's \PASCAL\ compilers are incredibly bizarre. Therefore it is quite na\"\i ve to believe that a single program {\tt TANGLE.PAS} could actually work on very many different machines, or even that one single source file {\tt TANGLE.WEB} could be adequate; some system-dependent\kern-.5pt\kern.5pt\ changes are inevitable. The \WEB\ system caters to system-dependent changes in a simple but surprisingly effective way that I neglected to mention when I listed its other features. Both {\tt TANGLE} and {\tt WEAVE} are designed to work with {\it two\/} input files, not just one: In addition to a \WEB\ source file like {\tt TEX.WEB}, there is also a ``change file'' {\tt TEX.CH} that contains whatever changes are needed to customize \TeX\ for a particular system. (Similarly, the source files {\tt WEAVE.WEB} and {\tt TANGLE.WEB} are accompanied by {\tt WEAVE.CH} and {\tt TANGLE.CH}.) Here's how change files work: Each change has the form ``replace $x_1\ldots x_m$ by $y_1\ldots y_n$,'' for some $m\ge 1$ and $n\ge0$; here $x_i$ and~$y_j$ represent lines in the change file. The {\tt WEAVE} and {\tt TANGLE} programs read data from the \WEB\ input file until finding a line that matches $x_1$; this line, and the $m-1$ following lines, are replaced by $y_1\ldots y_n$. An error message is given if the $m$ lines replaced did not match $x_1\ldots x_m$ perfectly. For example, the program {\tt PRIMES.WEB} invokes a \\{page} procedure to begin a new page; but \\{page} was not pres\-ent in Wirth's original \PASCAL\ and it is defined rather vaguely in the \PASCAL\ standard. Therefore a system-dependent change may be needed here. A change file {\tt PRIMES.CH} could be made by copying the line $$\hbox{\tt @d new\char`\_page==page}$$ from Figure~2c and specifying one or more appropriate replacement lines. The program {\tt TANGLE} itself contains about 190 sections, and a typical installation will have to change about 15 of these. If you want to transport {\tt TANGLE} to a new environment, you therefore need to create a suitable file {\tt TANGLE.CH} that modifies 15~or~so parts of {\tt TANGLE.WEB}. (Examples of {\tt TANGLE.CH} are provided to all people who receive {\tt TANGLE.WEB}, so that each implementor has a model of what to do.) You need to insert your changes by hand into {\tt TANGLE.PAS}, until you have a {\tt TANGLE} program that works sufficiently well to support further bootstrapping. But you never actually change the master file {\tt TANGLE.WEB}. This approach has two important advantages. First, the same master file {\tt TANGLE.WEB} is used by everybody, and it contains the basic logic of {\tt TANGLE} that really defines the essence of tangling. The system-dependent changes do not affect any of the subtle parts of {\tt TANGLE}'s control structures or data structures. Second, when the official {\tt TANGLE} has been upgraded to a newer version, a brand new {\tt TANGLE.WEB} will almost always work with the old {\tt TANGLE.CH}, since changes are rarely made to the system-dependent parts. In other words, this dual-input-file scheme works when the \WEB\ file is constant and the {\tt CH} file is modified, and it also works when the {\tt CH} file is constant but the \WEB\ file is modified. Change files were added to \WEB\ about three months after the system was initially designed, based on our initial experiences with people who had volunteered to participate in portability experiments. We realized about a year later that {\tt WEAVE} could be modified so that only the changed parts of a program would (optionally) be printed; thus, it's now possible to document the changes by listing only the sections that are actually affected by the {\tt CH} file that {\tt WEAVE} has processed. We also generalized the original format of {\tt CH} files, which permitted only changes that extended to the end of a section. These two important ideas were among the final enhancements incorporated into {\tt WEB83}. \beginsection J. PROGRAMS AS WEBS When I first began to work with the ideas that eventually became the \WEB\ system, I thought that I would be designing a language for ``top-down'' programming, where a top-level description is given first and successively refined. On the other hand I knew that I often created major parts of programs in a ``bottom-up'' fashion, starting with the definitions of basic procedures and data structures and gradually building more and more powerful subroutines. I had the feeling that top-down and bottom-up were opposing methodologies: one more suitable for program exposition and the other more suitable for program creation. But after gaining experience with \WEB, I have come to realize that there is no need to choose once and for all between top-down and bottom-up, because a program is best thought of as a web instead of a tree. A hierarchical structure is present, but the most important thing about a program is its structural relationships. A complex piece of software consists of simple parts and simple relations between those parts; the programmer's task is to state those parts and those relationships, in whatever order is best for human comprehension---not in some rigidly determined order like top-down or bottom-up. When I'm writing a longish program like {\tt TANGLE.WEB} or {\tt WEAVE.WEB} or {\tt TEX.WEB}, I invariably have strong feelings about what part of the whole should be tackled next. For example, I'll come to a point where I need to define a major data structure and its conventions, before I'll feel happy about going further. My experiences have led me to believe that a person reading a program is, likewise, ready to comprehend it by learning its various parts in approximately the order in which it was written. The {\tt PRIMES.WEB} example illustrates this principle on a small scale; the decisions that Dijkstra made as he composed the original program$^\Dijk$ appear in the \WEB\ documentation in the same order. Top-down programming gives you a strong idea of where you are going, but it forces you to keep a lot of plans in your head; suspense builds up because nothing is really nailed down until the end. Bottom-up programming has the advantage that you continually wield a more and more powerful pencil, as more and more subroutines have been constructed; but it forces you to postpone the overall program organization until the last minute, so you might flounder aimlessly. When I tear up the first draft of a program and start over, my second draft usually considers things in almost the same order as the first one did. Sometimes the ``correct'' order is top-down, sometimes it is bottom-up, and sometimes it's a mixture; but always it's an order that makes sense on expository grounds. Thus the \WEB\ language allows a person to express programs in a ``stream of consciousness'' order. {\tt TANGLE} is able to scramble everything up into the arrangement that a \PASCAL\ compiler demands. This feature of \WEB\ is perhaps its greatest asset; it makes a \WEB-written program much more readable than the same program written purely in \PASCAL, even if the latter program is well commented. And the fact that there's no need to be hung up on the question of top-down versus bottom-up---since a programmer can now view a large program as a web, to be explored in a psychologically correct order---is perhaps the greatest lesson I have learned from my recent experiences. Another surprising thing that I learned while using \WEB\ was that traditional programming languages had been causing me to write inferior programs, although I hadn't realized what I was doing. My original idea was that \WEB\ would be merely a tool for documentation, but I actually found that my \WEB\ programs were better than the programs I had been writing in other languages. How could this be? Well, imagine that you are writing a small subroutine that updates part of a data structure, and suppose that the updating takes only one or two lines of code. In practical programs, there's often something that can go wrong, if the user's input is incorrect, so the subroutine has to check that the input is correct before doing the update. Thus, the subroutine has the general form $$\vbox{\halign{#\hfil\cr \&{procedure} \\{update};\cr \&{begin if} \<input data is invalid\> \&{then}\cr \quad \<Issue an error message and try to recover\>;\cr \<Update the data structure\>;\cr \&{end}.\cr}}$$ A subtle phenomenon occurs in traditional programming languages: While writing the program for `\<Issue an error message and try to recover\>', a programmer subconsciously tries to get by with the fewest possible lines of code, since the program for `\<Update the data structure\>' is quite short. If an extensive error recovery is actually programmed, the subroutine will appear to have error-message printing as its main purpose. But the programmer knows that the error is really an exceptional case that arises only rarely; therefore a lengthy error recovery doesn't look right, and most programmers will minimize it (without realizing that they are doing so) in order to make the subroutine's appearance match its intended behavior. On the other hand when the same task is programmed with \WEB, the purpose of \\{update} can be shown quite clearly, and the possibility of error recovery can be reduced to a mere mention when \\{update} is defined. When another section entitled `\<Issue an error message and try to recover\>' is subsequently written, the whole point of that section is to do the best error recovery, and it becomes quite natural to write a better program as a result. This fact---that \WEB\ allows you to let each part of the program have its appropriate size, without distorting the readability of other parts---means that good programmers find their \WEB\ programs better than their \PASCAL\ programs, even though their \PASCAL\ programs once looked like the work of an expert. \beginsection K. STYLISTIC ISSUES I found that my style of using \WEB\ evolved quite a bit during the first year. The general format, in which each section beings with commentary and ends with a formal program fragment, is extremely versatile; you have the freedom to say anything you want, yet you must make a decision about how you'll do it. I imagine that different programmers will converge to quite different styles, but I would like to note down some of the things that have seemed to work best for me. Consider first the question of macros versus section names. A named section, like `\<Issue an error message and try to recover\>', is essentially the same as a parameterless macro; \WEB\ provides both. I prefer to use parameterless macros for ``small'' things that can be embodied in a word or two, but named sections for longer portions of the program that merit a fuller description. I usually start the name of a section with an imperative verb, but I give a declarative commentary at the beginning of a section. Thus, {\tt PRIMES.WEB} says `{\bf 8.}~Now that appropriate $\ldots$ \X8:Print table $p$\X$\;\S\;$\dots\thinspace'; I wouldn't do the opposite and say `{\bf8.}~Print the table. \X8:Code for printing\X$\;\S\;$\dots'. The name of a section (enclosed in angle brackets) should be long enough to encapsulate the essential characteristics of the code in that section, but it should not be too verbose. I found very early that it would be a mistake to include all of the assumptions about local and global variables in the name of each section, even though such information would strictly be necessary to isolate that section as an independent module. The trick is to find a balance between formal and informal exposition so that a reader can grasp what is happening without being overwhelmed with detail.\ref\Naur% {P. Naur, Formalization in program development. {\sl BIT\/ \bf22}, 437--453 (1982).} Another lesson I learned early in the game was that the name of a section should explicitly mention any nonstandard control structures, even though its data structures can often be left implied. Furthermore, if the control flow is properly explained, you can avoid the usual errors associated with \&{goto} statements; such statements can safely be introduced in a restrained but natural manner. For example, \sec14 of the prime-printing example could be reprogrammed as follows, using `\&{loop}' as a macro abbreviation for `\&{while} \\{true} \&{do}': $$\vbox{\halign{\hbox to\hsize{#\hfil}\cr \X14:Increase $j$ until it is the next prime number\X$\;\S$\cr \quad\&{loop begin} $j\K j+2$;\cr \qquad\X20:Update variables that depend on $j$\X;\cr \qquad\X22:If $j$ is prime, \&{goto} \\{found}\X;\cr \qquad\&{end};\cr \\{found}:\cr}}$$ With this change, \sec22 could become $$\vbox{\halign{\hbox to\hsize{#\hfil}\cr \X22:If $j$ is prime, \&{goto} \\{found}\X$\;\S$\cr \quad$n\K2$;\cr \quad\&{while} $n<\\{ord}$ \&{do}\cr \qquad\&{begin} \X26:If $p[n]$ is a factor of $j$, \&{goto} \\{not\_found}\X;\cr \qquad$n\K n+1$;\cr \qquad\&{end};\cr \quad\&{goto} \\{found};\cr \\{not\_found}:\cr}}$$ if \sec26 changes in the obvious way. The resulting program will be more efficient on most machines; and I believe that it is actually easier to read and to write, in spite of the fact that two \&{goto} statements appear, because the labels have been used with appropriate interpretations of their abstract significance. Of course, \PASCAL\ makes it difficult to use \&{goto} statements, because Wirth decided that labels should be numeric, and that they should be declared in advance. If I were to introduce the \&{goto} statements as suggested, I would have to define numeric macros \\{found} and \\{not\_found}, and I would have to insert `\&{label} \\{found}, \\{not\_found}' into the program at the right place. Such extra work is a bit of a nuisance, but it can be done in \WEB\ without spoiling the exposition. \PASCAL\ has a few other misfeatures that prove to be inconvenient with respect to \WEB\ exposition. The worst of these is the inability to declare local variables in the midst of a program or procedure. For example, a programmer often finds it most natural to define an integer variable when a \&{for} loop is introduced, but the rules of \PASCAL\ insist that such a variable be declared rather far away from that \&{for} loop. My \WEB\ programs overcome this problem by having sections like `\<Local variables for \\{xyzzy}\>' whenever there's a rather lengthy procedure `\\{xyzzy}' whose local variables should not be declared all at once. But when a procedure is short, say only half a dozen sections long, there's usually no harm in declaring its local variables in \PASCAL\ style, because the entire text of the procedure will tend to appear on one or two adjacent pages of the documentation. Another slightly awkward aspect of \PASCAL\ is its treatment of semicolons. If you look closely at the prime-number example, you'll see that I had to be a bit careful about where I put semicolons; sometimes they occur at the end of the expanded text of a section, but usually they don't. With a little self discipline, a person can learn to do this quite satisfactorily, but it is a nuisance until you get used to it. \beginsection L. ECONOMIC ISSUES What does it cost to use \WEB? Let's look first at the lowest level, where computer costs are considered, because it is easy to make quantitative statements at this level. The running time to {\tt TANGLE} a \WEB\ file is approximately the same as the time needed to compile the resulting \PASCAL\ program; hence the extra preprocessing does not cost much. Similarly, {\tt WEAVE} doesn't take long to produce a file for \TeX. However, \TeX\ needs a comparatively large amount of time to typeset the final document. For example, if we assume that each page requires four seconds, it will take four minutes to produce a 60-page document. The running time for {\tt WEAVE}-plus-\TeX\ is quite reasonable when you consider that your program is effectively being converted into a fairly substantial booklet; but the costs are sufficiently large to discourage remaking and reprinting such a booklet more than once or twice a day. When a new program is being developed, it is therefore customary to work with hardcopy documentation that is slightly obsolete, and to read the \WEB\ source file itself when up-to-date information is required; the source file is sufficiently easy to read for such purposes. The costs of \WEB\ are more difficult to estimate at higher levels, but I have found to my surprise that the total time of writing and debugging a \WEB\ program is no greater than the total time of writing and debugging an {\mc ALGOL} or {\mc PASCAL} program, even though my \WEB\ programs are much better, and even though I am putting substantially more documentation into the programs. Therefore I have lately been using \WEB\ for all of my programming, even for one-off jobs that I write ``for my eyes only'' just to explore occasional problems. The extra time I spend in preparing additional commentary is regained because the debugging time is reduced. In retrospect, the fact that a ``literate'' program takes much less time to debug is not surprising, because the \WEB\ language encourages a discipline that I was previously unwilling to impose on myself. I had known for a long time that the programs I construct for publication in a book, or the programs that I construct in front of a class, have tended to be comparatively free of errors, because I am forced to clarify my thoughts as I do the programming. By contrast, when writing for myself alone, I have often taken shortcuts that proved later to be dreadful mistakes. It's harder for me to fool myself in such ways when I'm writing a \WEB\ program, because I'm in ``expository mode'' (analogous to classroom lecturing) whenever a \WEB\ is being spun. Ergo, less debugging time. Now that I am writing all my programs in \WEB, an unforeseen problem has, however, arisen: I suddenly have a collection of programs that seem quite beautiful in my own eyes, and I have a compelling urge to publish all of them so that everybody can admire these works of art. A nice little 10-page program can easily be written and debugged in an afternoon and evening; if I keep accumulating such gems, I'll soon run out of storage space, and my office will be encrusted with webs of my own making. There is no telling what will happen if lots of other people catch \WEB\ fever and start foisting their creations on each other. I can already envision the appearance of a new journal, to be entitled {\sl Webs}, for the publication of literate programs; I imagine that it will have a large backlog and a large group of dedicated editors and referees. \beginsection M. RELATED WORK Nothing about \WEB\ is really new; I have simply combined a bunch of ideas that have been in the air for a long time. I would like to summarize in the next few paragraphs the things that had the greatest influence on my thinking as I put those pieces together. George Forsythe wrote in 1966 that ``A useful algorithm is a substantial contribution to knowledge. Its publication constitutes an important piece of schol\-ar\-ship.''\ref\GEF{G. E. Forsythe, Algorithms for scientific computation. {\sl Communications of the ACM\/ \bf9}, 255--256 (1966).} His comments have always inspired me to strive for excellence in programming, and they have played a major r\^^Dole in shaping my present view that it is worthwhile to consider {\it every\/} program as a work of literature. The design of \WEB\ was influenced primarily by the pioneering work of Pierre-Arnoul de Marneffe,\ref\deM{P. A. de Marneffe, {\sl Holon Programming}. Univ.~de Liege, Service D'Informatique (December, 1973).}$^,$% \ref\deMR{P. A. de Marneffe and D. Ribbens, Holon Programming, in A. G\"unther et al.\ (eds.), {\sl International Computing Symposium 1973\/}, Amsterdam, North-Holland (1974).} whose research on what he called ``Holon Programming'' has not received the attention it deserves. His work was, in turn, inspired by Arthur Koestler's excellent treatise on the structure of complex systems and organisms;\ref\Koest{A. Koestler, {\sl The Ghost in the Machine}. New York, Macmillan (1968).} thus we have another connection between programming and literature. A somewhat similar system was independently created by Edwin Towster.\ref\Tow% {E. Towster, A convention for explicit declaration of environments and top-down refinement of data. {\sl IEEE Transactions on Software Engineering\/ \bf SE--5}, 374--386 (1979).} I owe a great debt to Edsger Dijkstra, Tony Hoare, Ole-Johan Dahl, and Niklaus Wirth for opening my eyes to the importance of abstraction in the reading and writing of programs, and to Peter Naur for stressing the importance of a balance between formal and informal methods. Tony Hoare provided a special impetus for \WEB\ when he suggested in 1978 that I should publish my program for \TeX. Since very few large-scale software systems were available in the literature, he had been trying to promote the publication of well-written programs. Hoare's suggestion was actually rather terrifying to me, and I'm sure he knew that he was posing quite a challenge. As a professor of computer science, I was quite comfortable publishing papers about toy problems that could be polished up nicely and presented in an elegant manner; but I had no idea how to take a piece of real software, with all the compromises necessary to make it useful to a large class of people on a wide variety of systems, and to open it up to public scrutiny. How could a supposedly respectable academic, like me, reveal the way he actually writes large programs? And could a large program be made intelligible? My previous attempts along these lines\ref\CF{D. E. Knuth, Computer-drawn flow charts. {\sl Communications of the ACM\/ \bf 6}, 555--563 (1963).} were by now hopelessly out of date. I decided that this would be a good time to try out de Marneffe's ideas; furthermore, the \TeX\ system itself provided me with new tools for printing and format control, so I suspected that it would be possible to obtain state-of-the-art documentation by making proper use of typography. It is interesting to reread some of the comments that Tony made ten years ago in his keynote address to the first ACM symposium on Principles of Programming Languages:\ref\Hoare{C. A. R. Hoare, {\sl Hints on Programming Language Design}. Stanford Computer Science Report CS403 (October 1973).} \smallskip {\narrower\noindent Documentation must be regarded as an integral part of the process of design and coding. A good programming language will encourage and assist the programmer to write clear, self-documenting code, and even perhaps to develop and display a pleasant style of writing. \smallskip} \noindent He foresaw many future trends, but not the impending improvements in typesetting quality: \smallskip {\narrower\noindent It is of course possible for a compiler or service program to expand the abbreviations, fill in the defaults, and make explicit the assumptions. But in practice, experience shows that it is very unlikely that the output of a computer will ever be more readable than its input, except in such trivial but important aspects as improved indentation. \smallskip} Typographic formatting of computer programs has a long tradition, originating with {\mc ALGOL} and its immediate precursors. I'm not sure who made the first experiments, but I believe that the lion's share of the credit for developing excellent programming-language typography belongs to two people: Peter Naur, who edited the {\mc ALGOL~60} report\ref\Alg{P. Naur (ed.)~et al., Report on the algorithmic language ALGOL 60. {\sl Communications of the ACM\/ \bf3}, 299--314.} and gave special care to its presentation; and Myrtle Kellington, who served for many years as executive editor of ACM publications and set the standards that have been adopted by other journals. The computing profession owes much to these people, who made published programs so much more readable than they would otherwise have been; the magnitude of their contribution can only be appreciated by people who submit computer programs to journals like {\sl Acta Arithmetica\/} whose editors are unfamiliar with computer science. Bill McKeeman called attention to formatting issues when he published Algorithm~268, ``{\mc ALGOL~60} reference language editor,'' in 1965.\ref\McK{W. M. McKeeman, Algorithm 268. {\sl Communications of the ACM\/ \bf8}, 667--668 (1965).} There has been a flowering of such algorithms in recent years; the papers by Oppen\ref\DO{D. Oppen, Prettyprinting. {\sl ACM Transactions on Programming Languages and Systems\/ \bf2}, 465--483 (1980).} and by Rose and Welsh\ref\RW{G. A. Rose and J. Welsh, Formatted programming languages. {\sl Software---% Practice \char'46\ Experience\/ \bf11}, 651--669 (1981).} are particularly noteworthy. I began to design \WEB\ in the spring of 1979, when I constructed a prototype system that was called {\tt DOC}. Luis Trabb~Pardo helped me to develop a suitable style of exposition at that time; then Ignacio Zabala~Salelles gave a {\tt DOC} a thorough test when he prepared a full implementation of \TeX\ in \PASCAL. Zabala's implementation was successfully transported to many different computers,\ref\Z{I. Zabala and L. Trabb Pardo, The status of the PASCAL implementation of \TeX. {\sl TUGboat\/ \bf1}, 16--17 (1980).}\silentref\ZZ{I. Zabala, \TeX-PASCAL and PASCAL compilers. {\sl TUGboat\/ \bf2} (1), 11--12 (1981).}\silentref\ZZZ{I. Zabala, Some feedback from PTEX installations. {\sl TUGboat\/ \bf2} (2), 16--19 (1981).}$^-$\ref\ZZZZ{I. A. Zabala, How portable is PASCAL? Draft of paper in preparation (1982).} and this experience was of immense value to me when I cast \WEB\ into its present form in 1981. Since then many significant improvements have been suggested by my colleague David R. Fuchs, and I have also benefited from the experiences of a large number of outstanding people who volunteered to be guinea pigs for pre-released versions of \TeX. It's impossible for me to name everyone who has helped, but I would like to give special thanks to Arthur Samuel, Howard Trickey, Joe Weening, and Pierre MacKay for important contributions. I'm fortunate indeed to share a working environment with such stimulating people. When I originally designed the \WEB\ system, I spent about six weeks preparing the files {\tt TANGLE.WEB} and {\tt WEAVE.WEB}, during which time I was continually changing the language and trying different styles of exposition. (The programs were neither long nor complicated, but this was rather intensive work, so I didn't get much else done during those six weeks. The first two weeks were actually spent drafting the first ten per cent of what is now {\tt TEX.WEB}.) Then I spent about six tedious hours with a text editor, hand-simulating the behavior of {\tt TANGLE} on {\tt TANGLE.WEB}, so that I had a program {\tt TANGLE.PAS} that was ripe for debugging. At first I had to correct errors both in {\tt TANGLE.WEB} and {\tt TANGLE.PAS}, but soon {\tt TANGLE} was working well enough that I needed only {\tt TANGLE.WEB} as a source file. Then {\tt WEAVE.WEB} could be tangled and debugged too. The total time to create ``Version~0'' of the \WEB\ system, including the language design and the time to debug the programs and write a brief manual for users, was about eight weeks; then enhancements were added at the rate of about one per month for the next 18 months. As a result of this experience I think it's reasonable to state that a {\tt WEB}-like system can be created from scratch in a fairly short time, for some other pair of languages besides \TeX\ and \PASCAL, by an expert system programmer who is conversant with both languages. Indeed, I spoke about \WEB\ on a recent visit to London and one of the people in the audience decided to test this hypothesis; shortly afterwards I received an elegant report from Harold Thimbleby, who had just constructed an excellent system called {\tt Cweb}, based on Troff/Nroff and {\mc C} instead of \TeX\ and \PASCAL.\ref\Thim{H. Thimbleby, {\sl Cweb}. Preprint, University of York (August 1983).} \beginsection N. RETROSPECT AND PROSPECTS Enthusiastic reports about new computer languages, by the authors of those languages, are commonplace. Hence I'm well aware of the fact that my own experiences cannot be extrapolated too far. I also realize that, whenever I have encountered a problem with \WEB, I've simply changed the system; other users of \WEB\ cannot operate under the same ground rules. However, I believe that I have stumbled on a way of programming that produces better programs that are more port\-able and more easily understood and maintained; furthermore, the system seems to work with large programs as well as with small ones. I'm pleased that my work on typography, which began as an application of computers to another field, has come full circle and become an application of typography to the heart of computer science; I like to think of \WEB\ as a neat ``spinoff'' of my research on \TeX. However, all of my experiences with this system have been highly colored by my own tastes, and only time will tell if a large number of other people will find \WEB\ to be equally attractive and useful. I made a conscious decision not to design a language that would be suitable for everybody. My goal was to provide a tool for system programmers, not for high school students or for hobbyists. I don't have anything against high school students and hobbyists, but I don't believe every computer language should attempt to offer all things to all people. A user of \WEB\ needs to be good enough at computer science that he or she is comfortable dealing with several languages simultaneously. Since \WEB\ combines \TeX\ and \PASCAL\ with a few rules of its own, \WEB\ programs can contain \WEB\ syntax errors, \TeX\ syntax errors, \PASCAL\ syntax errors, and algorithmic errors; in practice, all four types of errors occur, and a bit of sophistication is needed to sort out which is which. Computer scientists tend to be better at such things than other people. I have found that \WEB\ programs can be debugged rapidly in spite of the profusion of languages, but I'm sure that many other intelligent people will find such a task difficult. In other words, \WEB\ seems to be specifically for the peculiar breed of people who are called computer scientists. And I'm pretty sure that there are also a lot of computer scientists who will not enjoy using \WEB; some of us are glad that traditional programming languages have comparatively primitive capabilities for inserted comments, because such difficulties provide a good excuse for not documenting programs well. Thus, \WEB\ may be only for the subset of computer scientists who like to write and to explain what they are doing. My hope is that the ability to make explanations more natural will cause more programmers to discover the joys of literate programming, because I believe it's quite a pleasure to combine verbal and mathematical skills; but perhaps I'm hoping for too much. The fact that at least one paper has been written that is a syntactically correct {\mc ALGOL 68} program\ref\ft{C. H. Lindsey, ALGOL 68 with fewer tears. {\sl The Computer Journal\/ \bf15}, 176--188 (1972).} encourages me to persevere in my hopes for the future. Perhaps we will even one day find Pulitzer prizes awarded to computer programs. And what about the future of \WEB? If the next year or so of trial use shows that a lot of other people besides myself become ``hooked'' on this method of programming, there will be many ways to incorporate the \WEB\ philosophy into a really effective programming environment. For example, it will be worthwhile to produce a unified system that does both tangling and compiling, instead of using separate programs as in Figure~1; and it will also be worthwhile to carry the unification one step further, so that run-time debugging as well as syntactic debugging can be done entirely in terms of the \WEB\ source language. Furthermore, a \WEB-like system could be designed to incorporate additional modularization, so that it would be easier to compile different parts of a program independently. The new generation of graphic workstations makes it desirable to display selected program sections on demand, by using \TeX\ only on the sections that are of current interest, instead of producing hardcopy for an entire document. And so on; a considerable amount of additional research and development will be appropriate if the idea of literate programming catches on. \bigskip\leftline{\bf Acknowledgements} \smallskip {\eightrm\baselineskip9pt \noindent The preparation of this paper was supported in part by the National Science Foundation under grants IST-8201926 and MCS-8300984, and by the System Development Foundation. `\TeX' is a trademark of the American Mathematical Society.\par} \enddoublecolumns % prepare for the references \bigskip\bigskip \hbox to\pagewidth{\hss\bf REFERENCES\hss\strut} \CJrule width\pagewidth \bigskip \begindoublecolumns \let\rm=\eightss \let\sl=\eightssi \let\bf=\eightssb \rm \baselineskip=9pt \tolerance=1000 \references \bigskip \noindent Received September 1983 \enddoublecolumns \kern6mm \CJrule width\pagewidth \bye