DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - metrics - download
Index: T f

⟦d8792e3fb⟧ TextFile

    Length: 10332 (0x285c)
    Types: TextFile
    Names: »field.c«

Derivation

└─⟦a05ed705a⟧ Bits:30007078 DKUUG GNU 2/12/89
    └─⟦f68d31fd9⟧ »./gawk-2.11.tar.Z« 
        └─⟦2fc192871⟧ 
            └─⟦this⟧ »gawk-2.11/field.c« 

TextFile

/*
 * field.c - routines for dealing with fields and record parsing
 */

/* 
 * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
 * 
 * GAWK is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 1, or (at your option)
 * any later version.
 * 
 * GAWK is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with GAWK; see the file COPYING.  If not, write to
 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "awk.h"

extern void assoc_clear();
extern int a_get_three();
extern int get_rs();

static char *get_fs();
static int re_split();
static int parse_fields();
static void set_element();

char *line_buf = NULL;	/* holds current input line */

static char *parse_extent;	/* marks where to restart parse of record */
static int parse_high_water=0;	/* field number that we have parsed so far */
static char f_empty[] = "";
static char *save_fs = " ";	/* save current value of FS when line is read,
				 * to be used in deferred parsing
				 */


NODE **fields_arr;		/* array of pointers to the field nodes */
NODE node0;			/* node for $0 which never gets free'd */
int node0_valid = 1;		/* $(>0) has not been changed yet */

void
init_fields()
{
	emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
	node0.type = Node_val;
	node0.stref = 0;
	node0.stptr = "";
	node0.flags = (STR|PERM);	/* never free buf */
	fields_arr[0] = &node0;
}

/*
 * Danger!  Must only be called for fields we know have just been blanked, or
 * fields we know don't exist yet.  
 */

/*ARGSUSED*/
static void
set_field(num, str, len, dummy)
int num;
char *str;
int len;
NODE *dummy;	/* not used -- just to make interface same as set_element */
{
	NODE *n;
	int t;
	static int nf_high_water = 0;

	if (num > nf_high_water) {
		erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
		nf_high_water = num;
	}
	/* fill in fields that don't exist */
	for (t = parse_high_water + 1; t < num; t++)
		fields_arr[t] = Nnull_string;
	n = make_string(str, len);
	(void) force_number(n);
	fields_arr[num] = n;
	parse_high_water = num;
}

/* Someone assigned a value to $(something).  Fix up $0 to be right */
static void
rebuild_record()
{
	register int tlen;
	register NODE *tmp;
	NODE *ofs;
	char *ops;
	register char *cops;
	register NODE **ptr;
	register int ofslen;

	tlen = 0;
	ofs = force_string(OFS_node->var_value);
	ofslen = ofs->stlen;
	ptr = &fields_arr[parse_high_water];
	while (ptr > &fields_arr[0]) {
		tmp = force_string(*ptr);
		tlen += tmp->stlen;
		ptr--;
	}
	tlen += (parse_high_water - 1) * ofslen;
	emalloc(ops, char *, tlen + 1, "fix_fields");
	cops = ops;
	ops[0] = '\0';
	for (ptr = &fields_arr[1]; ptr <= &fields_arr[parse_high_water]; ptr++) {
		tmp = *ptr;
		if (tmp->stlen == 1)
			*cops++ = tmp->stptr[0];
		else if (tmp->stlen != 0) {
			memcpy(cops, tmp->stptr, tmp->stlen);
			cops += tmp->stlen;
		}
		if (ptr != &fields_arr[parse_high_water]) {
			if (ofslen == 1)
				*cops++ = ofs->stptr[0];
			else if (ofslen != 0) {
				memcpy(cops, ofs->stptr, ofslen);
				cops += ofslen;
			}
		}
	}
	tmp = make_string(ops, tlen);
	free(ops);
	deref = fields_arr[0];
	do_deref();
	fields_arr[0] = tmp;
}

/*
 * setup $0, but defer parsing rest of line until reference is made to $(>0)
 * or to NF.  At that point, parse only as much as necessary.
 */
void
set_record(buf, cnt)
char *buf;
int cnt;
{
	register int i;

	assign_number(&NF_node->var_value, (AWKNUM)-1);
	for (i = 1; i <= parse_high_water; i++) {
		deref = fields_arr[i];
		do_deref();
	}
	parse_high_water = 0;
	node0_valid = 1;
	if (buf == line_buf) {
		deref = fields_arr[0];
		do_deref();
		save_fs = get_fs();
		node0.type = Node_val;
		node0.stptr = buf;
		node0.stlen = cnt;
		node0.stref = 1;
		node0.flags = (STR|PERM);	/* never free buf */
		fields_arr[0] = &node0;
	}
}

NODE **
get_field(num, assign)
int num;
int assign;	/* this field is on the LHS of an assign */
{
	int n;

	/*
	 * if requesting whole line but some other field has been altered,
	 * then the whole line must be rebuilt
	 */
	if (num == 0 && (node0_valid == 0 || assign)) {
		/* first, parse remainder of input record */
		if (NF_node->var_value->numbr == -1) {
			if (parse_high_water == 0)
				parse_extent = node0.stptr;
			n = parse_fields(HUGE-1, &parse_extent,
		    		node0.stlen - (parse_extent - node0.stptr),
		    		save_fs, set_field, (NODE *)NULL);
			assign_number(&NF_node->var_value, (AWKNUM)n);
		}
		if (node0_valid == 0)
			rebuild_record();
		return &fields_arr[0];
	}
	if (num > 0 && assign)
		node0_valid = 0;
	if (num <= parse_high_water)	/* we have already parsed this field */
		return &fields_arr[num];
	if (parse_high_water == 0 && num > 0)	/* starting at the beginning */
		parse_extent = fields_arr[0]->stptr;
	/*
	 * parse up to num fields, calling set_field() for each, and saving
	 * in parse_extent the point where the parse left off
	 */
	n = parse_fields(num, &parse_extent,
		fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
		save_fs, set_field, (NODE *)NULL);
	if (num == HUGE-1)
		num = n;
	if (n < num) {	/* requested field number beyond end of record;
			 * set_field will just extend the number of fields,
			 * with empty fields
			 */
		set_field(num, f_empty, 0, (NODE *) NULL);
		/*
		 * if this field is onthe LHS of an assignment, then we want to
		 * set NF to this value, below
		 */
		if (assign)
			n = num;
	}
	/*
	 * if we reached the end of the record, set NF to the number of fields
	 * so far.  Note that num might actually refer to a field that
	 * is beyond the end of the record, but we won't set NF to that value at
	 * this point, since this is only a reference to the field and NF
	 * only gets set if the field is assigned to -- in this case n has
	 * been set to num above
	 */
	if (*parse_extent == '\0')
		assign_number(&NF_node->var_value, (AWKNUM)n);

	return &fields_arr[num];
}

/*
 * this is called both from get_field() and from do_split()
 */
static int
parse_fields(up_to, buf, len, fs, set, n)
int up_to;	/* parse only up to this field number */
char **buf;	/* on input: string to parse; on output: point to start next */
int len;
register char *fs;
void (*set) ();	/* routine to set the value of the parsed field */
NODE *n;
{
	char *s = *buf;
	register char *field;
	register char *scan;
	register char *end = s + len;
	int NF = parse_high_water;
	char rs = get_rs();


	if (up_to == HUGE)
		NF = 0;
	if (*fs && *(fs + 1) != '\0') {	/* fs is a regexp */
		struct re_registers reregs;

		scan = s;
		if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) {
			while ((*scan == '\n' || *scan == ' ' || *scan == '\t')
			    && scan < end)
				scan++;
		}
		while (re_split(scan, (int)(end - scan), fs, &reregs) != -1 &&
		    NF < up_to) {
			(*set)(++NF, scan, reregs.start[0], n);
			scan += reregs.end[0];
		}
		if (NF != up_to && scan <= end) {
			if (!(rs == 0 && scan == end)) {
				(*set)(++NF, scan, (int)(end - scan), n);
				scan = end;
			}
		}
		*buf = scan;
		return (NF);
	}
	for (scan = s; scan < end && NF < up_to; scan++) {
		/*
		 * special case:  fs is single space, strip leading
		 * whitespace 
		 */
		if (*fs == ' ') {
			while ((*scan == ' ' || *scan == '\t') && scan < end)
				scan++;
			if (scan >= end)
				break;
		}
		field = scan;
		if (*fs == ' ')
			while (*scan != ' ' && *scan != '\t' && scan < end)
				scan++;
		else {
			while (*scan != *fs && scan < end)
				scan++;
			if (rs && scan == end-1 && *scan == *fs) {
				(*set)(++NF, field, (int)(scan - field), n);
				field = scan;
			}
		}
		(*set)(++NF, field, (int)(scan - field), n);
		if (scan == end)
			break;
	}
	*buf = scan;
	return NF;
}

static int
re_split(buf, len, fs, reregsp)
char *buf, *fs;
int len;
struct re_registers *reregsp;
{
	typedef struct re_pattern_buffer RPAT;
	static RPAT *rp;
	static char *last_fs = NULL;

	if ((last_fs != NULL && !STREQ(fs, last_fs))
	    || (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0)
			 ^ (rp->translate != NULL))))
	{
		/* fs has changed or IGNORECASE has changed */
		free(rp->buffer);
		free(rp->fastmap);
		free((char *) rp);
		free(last_fs);
		last_fs = NULL;
	}
	if (last_fs == NULL) {	/* first time */
		emalloc(rp, RPAT *, sizeof(RPAT), "re_split");
		memset((char *) rp, 0, sizeof(RPAT));
		emalloc(rp->buffer, char *, 8, "re_split");
		rp->allocated = 8;
		emalloc(rp->fastmap, char *, 256, "re_split");
		emalloc(last_fs, char *, strlen(fs) + 1, "re_split");
		(void) strcpy(last_fs, fs);
		if (! strict && IGNORECASE_node->var_value->numbr != 0.0)
			rp->translate = casetable;
		else
			rp->translate = NULL;
		if (re_compile_pattern(fs, strlen(fs), rp) != NULL)
			fatal("illegal regular expression for FS: `%s'", fs);
	}
	return re_search(rp, buf, len, 0, len, reregsp);
}

NODE *
do_split(tree)
NODE *tree;
{
	NODE *t1, *t2, *t3;
	register char *splitc;
	char *s;
	NODE *n;

	if (a_get_three(tree, &t1, &t2, &t3) < 3)
		splitc = get_fs();
	else
		splitc = force_string(t3)->stptr;

	n = t2;
	if (t2->type == Node_param_list)
		n = stack_ptr[t2->param_cnt];
	if (n->type != Node_var && n->type != Node_var_array)
		fatal("second argument of split is not a variable");
	assoc_clear(n);

	tree = force_string(t1);

	s = tree->stptr;
	return tmp_number((AWKNUM)
		parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n));
}

static char *
get_fs()
{
	register NODE *tmp;
	static char buf[10];

	tmp = force_string(FS_node->var_value);
	if (get_rs() == 0) {
		if (tmp->stlen == 1) {
			if (tmp->stptr[0] == ' ')
				(void) strcpy(buf, "[ 	\n]+");
			else
				sprintf(buf, "[%c\n]", tmp->stptr[0]);
		} else if (tmp->stlen == 0) {
			buf[0] = '\n';
			buf[1] = '\0';
		} else
			return tmp->stptr;
		return buf;
	}
	return tmp->stptr;
}

static void
set_element(num, s, len, n)
int num;
char *s;
int len;
NODE *n;
{
	*assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len);
}