DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - metrics - download
Index: T t

⟦dbd2fe7ee⟧ TextFile

    Length: 4160 (0x1040)
    Types: TextFile
    Names: »tokenize.c«

Derivation

└─⟦b20c6495f⟧ Bits:30007238 EUUGD18: Wien-båndet, efterår 1987
    └─⟦this⟧ »EUUGD18/General/Corewars/tokenize.c« 

TextFile

/*	Copyrighted (C) 1989 by Na Choon Piaw.  All rights reserved       */



/*	This program and documentation is Public Domain, and may be	  */
/*	distributed and copied by everyone provided this header		  */
/*	remains intact							  */

/* tokenize ---- tokenizer for the assembler.  It splits up all of the
   tokens in the given input file, generate a linked list of such tokens,
   and outputs the pointer to that linked list.
  11/14                 ----- NCP                                    */
#include <stdio.h>
#include <ctype.h>
#include <strings.h>
#include <malloc.h>
#include "assem.h"

/* tokenize function:
   separate input stream (infile) into tokens.
   algorithm:
	1)	call nextoken to get next token.
	2)	if "null" then return the first pointer
	3)	otherwise, add the newtoken to the token list
	4)	go back to 1.
*/
tokenlist *tokenize(infile)
FILE	*infile;
{
	tokenlist	*head, *tail, *newtail;
	char		*nextoken(), *newtoken;

	head = tail = NULL;
	while ((newtoken = nextoken(infile)) != NULL)
	{
		if (!(newtail = (tokenlist *) 
		                 malloc((unsigned) sizeof(tokenlist))))
		{
			printf("ran out of space for tokens: %s\n", newtoken);
			printf("------  tokenize\n");
			exit(1);
		}

		/* otherwise , set old stuff to this and move tail one up*/
		newtail -> token = newtoken;
		newtail -> next = NULL;
		if (tail)			/* tail already defined */
		{
			tail -> next = newtail;		/* set previous ptr */
			tail = tail -> next;		/* move up list */
		}
		else
			head = tail = newtail;
	}	/* end while */

	return (head);	/* return function value */
}

/* function next token:
   return next token in the file.
   Algorithm:
	1)	read until start of next token or EOF
	2)	if EOF then return NULL pointer
	3)	read new token into buffer
	4)	malloc new string
	5)	put next token into new string
	6)	change all characters into upper case
	7)	return pointer to string created in (4)      */
/* BUG NOTE:
   Due to the way it is written, the assembler will not process things like
   "mov0 1" correctly, and neither will "mov 0 1;imp" work.
   this is because the tokenize breaks everything down that isn't a
   delimiter, and a ";" is not a delimiter, even though it's not in
   the instruction set.                                   NCP - 11/15/88 */
char	*nextoken(infile)
FILE	*infile;
{
	char	buffer[MAXBUFFER],	/* string buffer */
		*newtoken;		/* new token pointer */

	int	c,			/* character we read in one by one */
		i = 0;			/* integer counter */

	while ((c = fgetc(infile)) != EOF)
	{
		if (!isspace(c))
			break;		/* not space, so process */
	}

	if (c == EOF)
		return(NULL);		/* no more!! */

	if (c == COMMENT)		/* handle comments */
	{
		while (((c = fgetc(infile)) != '\n') && c != EOF)
			;		/* read until end of the line */

		if (c == EOF)
			return(NULL);

		/* process the rest of the file:
		   actually, we could have done this by using a goto
		   the beginning of this function, but I think this
		   is a lot more elegant                     --- CP */
		return(nextoken(infile));
	}

	while ((!isspace(c)) && (c != EOF))	/* read until next space */
	{
		if (i >= MAXBUFFER)	/* buffer out */
		{
			printf("buffer over extended\n");	
			printf("--- nextoken\n");
			exit(1);
		}

		buffer[i++] = c;

		c = fgetc(infile);
	}	/* end while */
	buffer[i] = '\0';		/* terminate with a null */

	if (!(newtoken = (char *) malloc( (unsigned) strlen(buffer) + 1)))
	{
		printf("not enough memory for token %s\n", buffer);
		printf(" ------- nextoken\n");
		exit(1);
	}

	strcpy(newtoken, buffer);

	upcase(newtoken);

	return(newtoken);
}	/* end of nextoken */

/* upcase function -- translate string to upper case (don't trust toupper) */
upcase(str)
char	*str;
{
	while (*str)
	{
		if ((*str <= 'z') && (*str >= 'a'))
			*str -= 'a' - 'A';
		str++;
	}
}

/* special main to test the above */
/* section commented out because code has been fully debugged
   and given a clean bill of health			CP - 11/14/88 
main(argc,argv)
int	argc;
char	*argv[];
{
	FILE	*f;
	tokenlist	*head;

	f = fopen(argv[1],"r");
	head = tokenize(f);
	while (head != NULL)
	{
		printf("%s\n", head -> token);
		head = head -> next;
	}
} */