|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T f
Length: 10555 (0x293b) Types: TextFile Names: »field.c«
└─⟦9ae75bfbd⟧ Bits:30007242 EUUGD3: Starter Kit └─⟦6dcdebfcf⟧ »EurOpenD3/gnu/gawk/gawk-2.11.1.tar.Z« └─⟦a05ed705a⟧ Bits:30007078 DKUUG GNU 2/12/89 └─⟦6dcdebfcf⟧ »./gawk-2.11.1.tar.Z« └─⟦3c42ca21a⟧ └─⟦this⟧ »gawk-2.11/field.c«
/* * field.c - routines for dealing with fields and record parsing */ /* * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Progamming Language. * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 1, or (at your option) * any later version. * * GAWK is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GAWK; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "awk.h" extern void assoc_clear(); extern int a_get_three(); extern int get_rs(); static char *get_fs(); static int re_split(); static int parse_fields(); static void set_element(); char *line_buf = NULL; /* holds current input line */ static char *parse_extent; /* marks where to restart parse of record */ static int parse_high_water=0; /* field number that we have parsed so far */ static char f_empty[] = ""; static char *save_fs = " "; /* save current value of FS when line is read, * to be used in deferred parsing */ NODE **fields_arr; /* array of pointers to the field nodes */ NODE node0; /* node for $0 which never gets free'd */ int node0_valid = 1; /* $(>0) has not been changed yet */ void init_fields() { emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields"); node0.type = Node_val; node0.stref = 0; node0.stptr = ""; node0.flags = (STR|PERM); /* never free buf */ fields_arr[0] = &node0; } /* * Danger! Must only be called for fields we know have just been blanked, or * fields we know don't exist yet. */ /*ARGSUSED*/ static void set_field(num, str, len, dummy) int num; char *str; int len; NODE *dummy; /* not used -- just to make interface same as set_element */ { NODE *n; int t; static int nf_high_water = 0; if (num > nf_high_water) { erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); nf_high_water = num; } /* fill in fields that don't exist */ for (t = parse_high_water + 1; t < num; t++) fields_arr[t] = Nnull_string; n = make_string(str, len); (void) force_number(n); fields_arr[num] = n; parse_high_water = num; } /* Someone assigned a value to $(something). Fix up $0 to be right */ static void rebuild_record() { register int tlen; register NODE *tmp; NODE *ofs; char *ops; register char *cops; register NODE **ptr; register int ofslen; tlen = 0; ofs = force_string(OFS_node->var_value); ofslen = ofs->stlen; ptr = &fields_arr[parse_high_water]; while (ptr > &fields_arr[0]) { tmp = force_string(*ptr); tlen += tmp->stlen; ptr--; } tlen += (parse_high_water - 1) * ofslen; emalloc(ops, char *, tlen + 1, "fix_fields"); cops = ops; ops[0] = '\0'; for (ptr = &fields_arr[1]; ptr <= &fields_arr[parse_high_water]; ptr++) { tmp = *ptr; if (tmp->stlen == 1) *cops++ = tmp->stptr[0]; else if (tmp->stlen != 0) { memcpy(cops, tmp->stptr, tmp->stlen); cops += tmp->stlen; } if (ptr != &fields_arr[parse_high_water]) { if (ofslen == 1) *cops++ = ofs->stptr[0]; else if (ofslen != 0) { memcpy(cops, ofs->stptr, ofslen); cops += ofslen; } } } tmp = make_string(ops, tlen); free(ops); deref = fields_arr[0]; do_deref(); fields_arr[0] = tmp; } /* * setup $0, but defer parsing rest of line until reference is made to $(>0) * or to NF. At that point, parse only as much as necessary. */ void set_record(buf, cnt) char *buf; int cnt; { register int i; assign_number(&NF_node->var_value, (AWKNUM)-1); for (i = 1; i <= parse_high_water; i++) { deref = fields_arr[i]; do_deref(); } parse_high_water = 0; node0_valid = 1; if (buf == line_buf) { deref = fields_arr[0]; do_deref(); save_fs = get_fs(); node0.type = Node_val; node0.stptr = buf; node0.stlen = cnt; node0.stref = 1; node0.flags = (STR|PERM); /* never free buf */ fields_arr[0] = &node0; } } NODE ** get_field(num, assign) int num; int assign; /* this field is on the LHS of an assign */ { int n; /* * if requesting whole line but some other field has been altered, * then the whole line must be rebuilt */ if (num == 0 && (node0_valid == 0 || assign)) { /* first, parse remainder of input record */ if (NF_node->var_value->numbr == -1) { if (parse_high_water == 0) parse_extent = node0.stptr; n = parse_fields(HUGE-1, &parse_extent, node0.stlen - (parse_extent - node0.stptr), save_fs, set_field, (NODE *)NULL); assign_number(&NF_node->var_value, (AWKNUM)n); } if (node0_valid == 0) rebuild_record(); return &fields_arr[0]; } if (num > 0 && assign) node0_valid = 0; if (num <= parse_high_water) /* we have already parsed this field */ return &fields_arr[num]; if (parse_high_water == 0 && num > 0) /* starting at the beginning */ parse_extent = fields_arr[0]->stptr; /* * parse up to num fields, calling set_field() for each, and saving * in parse_extent the point where the parse left off */ n = parse_fields(num, &parse_extent, fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr), save_fs, set_field, (NODE *)NULL); if (num == HUGE-1) num = n; if (n < num) { /* requested field number beyond end of record; * set_field will just extend the number of fields, * with empty fields */ set_field(num, f_empty, 0, (NODE *) NULL); /* * if this field is onthe LHS of an assignment, then we want to * set NF to this value, below */ if (assign) n = num; } /* * if we reached the end of the record, set NF to the number of fields * so far. Note that num might actually refer to a field that * is beyond the end of the record, but we won't set NF to that value at * this point, since this is only a reference to the field and NF * only gets set if the field is assigned to -- in this case n has * been set to num above */ if (*parse_extent == '\0') assign_number(&NF_node->var_value, (AWKNUM)n); return &fields_arr[num]; } /* * this is called both from get_field() and from do_split() */ static int parse_fields(up_to, buf, len, fs, set, n) int up_to; /* parse only up to this field number */ char **buf; /* on input: string to parse; on output: point to start next */ int len; register char *fs; void (*set) (); /* routine to set the value of the parsed field */ NODE *n; { char *s = *buf; register char *field; register char *scan; register char *end = s + len; int NF = parse_high_water; char rs = get_rs(); if (up_to == HUGE) NF = 0; if (*fs && *(fs + 1) != '\0') { /* fs is a regexp */ struct re_registers reregs; scan = s; if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) { while ((*scan == '\n' || *scan == ' ' || *scan == '\t') && scan < end) scan++; } s = scan; while (scan < end && re_split(scan, (int)(end - scan), fs, &reregs) != -1 && NF < up_to) { if (reregs.end[0] == 0) { /* null match */ scan++; if (scan == end) { (*set)(++NF, s, scan - s, n); up_to = NF; break; } continue; } (*set)(++NF, s, scan - s + reregs.start[0], n); scan += reregs.end[0]; s = scan; } if (NF != up_to && scan <= end) { if (!(rs == 0 && scan == end)) { (*set)(++NF, scan, (int)(end - scan), n); scan = end; } } *buf = scan; return (NF); } for (scan = s; scan < end && NF < up_to; scan++) { /* * special case: fs is single space, strip leading * whitespace */ if (*fs == ' ') { while ((*scan == ' ' || *scan == '\t') && scan < end) scan++; if (scan >= end) break; } field = scan; if (*fs == ' ') while (*scan != ' ' && *scan != '\t' && scan < end) scan++; else { while (*scan != *fs && scan < end) scan++; if (rs && scan == end-1 && *scan == *fs) { (*set)(++NF, field, (int)(scan - field), n); field = scan; } } (*set)(++NF, field, (int)(scan - field), n); if (scan == end) break; } *buf = scan; return NF; } static int re_split(buf, len, fs, reregsp) char *buf, *fs; int len; struct re_registers *reregsp; { typedef struct re_pattern_buffer RPAT; static RPAT *rp; static char *last_fs = NULL; if ((last_fs != NULL && !STREQ(fs, last_fs)) || (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0) ^ (rp->translate != NULL)))) { /* fs has changed or IGNORECASE has changed */ free(rp->buffer); free(rp->fastmap); free((char *) rp); free(last_fs); last_fs = NULL; } if (last_fs == NULL) { /* first time */ emalloc(rp, RPAT *, sizeof(RPAT), "re_split"); memset((char *) rp, 0, sizeof(RPAT)); emalloc(rp->buffer, char *, 8, "re_split"); rp->allocated = 8; emalloc(rp->fastmap, char *, 256, "re_split"); emalloc(last_fs, char *, strlen(fs) + 1, "re_split"); (void) strcpy(last_fs, fs); if (! strict && IGNORECASE_node->var_value->numbr != 0.0) rp->translate = casetable; else rp->translate = NULL; if (re_compile_pattern(fs, strlen(fs), rp) != NULL) fatal("illegal regular expression for FS: `%s'", fs); } return re_search(rp, buf, len, 0, len, reregsp); } NODE * do_split(tree) NODE *tree; { NODE *t1, *t2, *t3; register char *splitc; char *s; NODE *n; if (a_get_three(tree, &t1, &t2, &t3) < 3) splitc = get_fs(); else splitc = force_string(t3)->stptr; n = t2; if (t2->type == Node_param_list) n = stack_ptr[t2->param_cnt]; if (n->type != Node_var && n->type != Node_var_array) fatal("second argument of split is not a variable"); assoc_clear(n); tree = force_string(t1); s = tree->stptr; return tmp_number((AWKNUM) parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n)); } static char * get_fs() { register NODE *tmp; static char buf[10]; tmp = force_string(FS_node->var_value); if (get_rs() == 0) { if (tmp->stlen == 1) { if (tmp->stptr[0] == ' ') (void) strcpy(buf, "[ \n]+"); else sprintf(buf, "[%c\n]", tmp->stptr[0]); } else if (tmp->stlen == 0) { buf[0] = '\n'; buf[1] = '\0'; } else return tmp->stptr; return buf; } return tmp->stptr; } static void set_element(num, s, len, n) int num; char *s; int len; NODE *n; { *assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len); }