|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T m
Length: 13699 (0x3583)
Types: TextFile
Names: »mdmat.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/mdmat.c«
/* Copyright 1985 Gary Perlman */
#include "stat.h"
FUN(mdmat,MultiDimensional Matrix Routines,5.1,02/03/87)
/*
mdmat: routines for reading and manipulating multidimensional arrays
These routines assume each input datum is on a line by itself,
preceded by strings encoding conditions under which it was obtained.
The total of the combinations of conditions is used to create an
array with software to support simulation of a multidimensional array.
Accessible through global variables are the data, the names/number of
factors, the names/number of levels of factors. All these that are larger
than a scalar are dynamically allocated, but not freed.
Access into the mdarray is done with the functions mdaddr and mdnext.
A level array is one of positive integers used as indexes into dimensions
in the mdarray. For example, if we had a 3d-array, we could access
elements with a[i][j][k], but with these mdarrays, we do:
level[0] = i; level[1] = j; level[2] = k;
a[mdaddr (level)];
mdaddr does all the multiplying necessary.
Iteration through the array is somewhat automated with the mdnext
function. If we start with a level vector with 0's, successive
calls to mdnext will count through each factor (dimension).
For example, if factor 0 has 3 levels and factor 1 has 2, then we count:
0 0, 0 1, 1 0, 1 1, 2 0, 2 1
There is some subtlety in the parameters to mdnext because of the
development use of mdmat: data analysis. In data analysis, it is
common to summarize a crossing of some factors, averaging or summing
over all others. This is done by providing a "source" parameter
that tells mdnext the names of the factors of interest. This
is conceptually a set, but implemented as bits, them abstraction is
hidden mostly in macros. The last parameter to mdnext tells
whether the next level of a factor of interest or one not of interest
should be provided. The expected use is to (1) cycle through all
possible combinations of factors (called sources) and for each source,
(2) cycle through all the levels of source factors, (3) summarizing
over non-source factors. This is coded as (T==1):
for (source = 0; source < nsources; source++)
do
{
do
summarize over nonsources (e.g., sum += data[mdaddr(level)];)
while (mdnext (level, source, F));
report/store summary of this combination of source factor levels
}
while (mdnext (level, source, T));
*/
\f
#ifndef R_DATA
#define I_DATA
#endif
#include "mdmat.h"
Posint mdfill (); /* reads in data from temporary file */
char *mdlevels (); /* reads levels of factors into data */
char *calloc ();
#ifndef MSDOS /* signal processing to remove temp file */
#include <signal.h>
static char *MDtmpfile;
/* FUNCTION mdonint: remove temporary md file on interrupt */
mdonint ()
{
VOID signal (SIGINT, SIG_IGN);
WARNING (...interrupted...removing tempfile)
VOID unlink (MDtmpfile);
exit (1);
}
#endif MSDOS
\f
/* Global Data */
Posint Nfactors; /* total number of factors */
char **Factname; /* names of factors + data in last */
Posint *Nlevels; /* number of levels of each factor */
char ***Levelname; /* level names */
DATUM *Datax; /* will hold all the data */
short *Nreplics; /* number of replications in each cell */
Posint Maxlev = MAXLEV; /* maximum number of levels */
Posint NAcount = 0; /* number of missing points */
\f
static
ncmp (sp1, sp2)
char **sp1, **sp2;
{
return (numcmp (*sp1, *sp2));
}
static
sortnames (vec, n)
char **vec;
int n;
{
int i;
int ncmp ();
for (i = 0; i < n; i++)
if (!number (vec[i]))
return;
qsort ((char *) vec, n, sizeof (char *), ncmp);
}
\f
/*FUNCTION mdread: read multidimensional matrix */
Posint
mdread (argc, argv, firstname)
char **argv;
{
char *tmpdata;
int i;
Posint ncells;
tmpdata = mdlevels (argc, argv, firstname);
for (i = 0; i < Nfactors; i++)
sortnames (Levelname[i], Nlevels[i]);
ncells = mdfill (tmpdata);
return (ncells);
}
\f
/*FUNCTION mdlevels: finds the number of levels of each factor */
/*
For each line, it reads in the levels of each factor.
It assumes that the number of levels equals the maximum levelnumber.
The data is read from the stdin but is copied for further use.
Returns the name of a temp file where data are stored.
*/
static
char *
mdlevels (argc, argv, firstop)
char **argv;
int firstop; /* first operand (factor names) */
{
register int factor; /* looping variable */
register int level; /* looping variable */
char line[BUFSIZ]; /* each data line read in here */
char *column[MAXFACT+2]; /* data line separated in cols */
char *ptr;
int ncols; /* number of columns in line */
static char tmpname[100]; /* temporary file */
FILE *datafile; /* pointer to temporary file */
#ifndef MSDOS
int mdonint ();
MDtmpfile = tmpname;
VOID signal (SIGINT, mdonint);
#endif MSDOS
VOID tmpfile (argv[0], tmpname);
if ((datafile = fopen (tmpname, "w")) == NULL)
ERROPEN ("temporary file")
while (fgets (line, BUFSIZ, stdin))
{
fputs (line, datafile); /* save data for next pass */
ncols = parselin (line, column, MAXFACT+2);
if (ncols == 0)
continue;
if (Nfactors == 0) /* initialize */
{
Nfactors = ncols - 1;
if (Nfactors < 1 || Nfactors > MAXFACT)
ERRMSG1 (must have between one and %d factors, MAXFACT)
if (argc - firstop > Nfactors + 1)
ERRMANY (factor names,Nfactors)
Factname = myalloc (char *, Nfactors+1);
if (Factname == NULL)
ERRSPACE (factor names)
Factname[Nfactors] = "DATA"; /* data name */
for (factor = firstop; factor < argc; factor++)
Factname[factor-firstop] = argv[factor];
for (factor = factor-firstop; factor < Nfactors; factor++)
{
Factname[factor] = myalloc (char, 2);
Factname[factor][0] = factor + 'A';
Factname[factor][1] = '\0';
}
Nlevels = (Posint *) calloc (Nfactors, sizeof (Posint));
if (Nlevels == NULL)
ERRSPACE (numbers of levels of factors)
Levelname = myalloc (char **, Nfactors);
if (Levelname == NULL)
ERRSPACE (level names)
for (factor = 0; factor < Nfactors; factor++)
{
Levelname[factor] = myalloc (char *, Maxlev);
if (Levelname[factor] == NULL)
ERRSPACE (level names)
}
}
if (ncols != Nfactors+1)
ERRRAGGED
/* check for new factor name */
for (factor = 0; factor < Nfactors; factor++)
{
for (level = 0; level < Nlevels[factor]; level++)
if (!strcmp (Levelname[factor][level], column[factor]))
break;
if (level == Maxlev)
ERRMANY (levels, Maxlev)
if (level == Nlevels[factor]) /* a new level */
Levelname[factor][Nlevels[factor]++] = strdup (column[factor]);
}
if (isna (column[Nfactors]))
{
NAcount++;
continue;
}
#ifdef R_DATA /* input must be numerical */
if (!number (column[Nfactors]))
ERRNUM (column[Nfactors],data value)
#endif R_DATA
#ifdef I_DATA /* input must be a frequency count */
for (ptr = column[Nfactors]; isdigit (*ptr); ptr++)
continue;
if (*ptr) /* non digit -> not a frequency count */
ERRMSG1 (datum (%s) is not a frequency count, column[Nfactors])
#endif I_DATA
}
if (Nfactors == 0)
ERRDATA
for (factor = 0; factor < Nfactors; factor++)
if (Nlevels[factor] < 2)
ERRMSG1 (factor %s must have at least two levels, Factname[factor])
VOID fclose (datafile);
return (tmpname);
}
\f
/*FUNCTION mdaddr: return unique index for each combination factor levels */
Posint
mdaddr (level)
Posint *level; /* levels (>= 0) of each factor */
{
register int factor; /* looping variable */
int aindex; /* level of each factor read in here */
int coeff = 1; /* aindex multiplied by coeff */
aindex = level[Nfactors-1];
for (factor = Nfactors-2; factor >= 0; factor--)
{
coeff *= Nlevels[factor+1];
aindex += coeff * level[factor];
}
return (aindex);
}
\f
/*FUNCTION mdfill: read data from datafile and store it in data array */
/*
Space is allocated for the data array and the number of replics per cell.
For each line, it reads the levels of each factor and finds the location
where the data is to be stored in data by calling mdaddr with the level
numbers stored in the array called level. Any space not used in data
(because of nested design, for example) has nreplics == 0.
Finally, it removes the temporary data file.
returns the number of cells allocated.
*/
static
Posint
mdfill (tmpname)
char *tmpname;
{
register Posint address; /* where data will be added */
register int factor; /* looping variable */
Posint level[MAXFACT]; /* level of each factor */
char line[BUFSIZ]; /* each data input line read in here */
char *column[MAXFACT+1]; /* data line in columns */
FILE *datafile;
Posint ncells = 1;
for (factor = 0; factor < Nfactors; factor++)
ncells *= Nlevels[factor];
if ((Datax = (DATUM *) calloc (ncells, sizeof (*Datax))) == NULL)
ERRSPACE (data)
#ifdef R_DATA /* allocate space for replications */
if ((Nreplics = (short *) calloc (ncells, sizeof (*Nreplics))) == NULL)
ERRSPACE (data)
#endif R_DATA
if ((datafile = fopen (tmpname, "r")) == NULL)
ERROPEN ("temporary file");
while (fgets (line, BUFSIZ, datafile))
{
if (parselin (line, column, MAXFACT+1) == 0) /* blank line */
continue;
for (factor = 0; factor < Nfactors; factor++)
{
level[factor] = 0;
while (strcmp (column[factor], Levelname[factor][level[factor]]))
level[factor]++;
}
address = mdaddr (level);
#ifdef R_DATA
Nreplics[address]++;
#endif R_DATA
Datax[address] += CONV (column[Nfactors]);
}
#ifdef R_DATA /* average all cells by number of replications */
for (address = 0; address < ncells; address++)
if (Nreplics[address] > 1)
Datax[address] /= Nreplics[address];
#endif R_DATA
VOID fclose (datafile);
#ifndef MSDOS
VOID signal (SIGINT, SIG_DFL); /* really, this should reset to previous */
#endif MSDOS
VOID unlink (tmpname);
return (ncells);
}
\f
/*FUNCTION mdnext: simulate a counting system based on Nlevels[factors] */
Boole
mdnext (level, source, sourceflag)/* returns whether there are more levels */
Posint level[MAXFACT]; /* the current levels of each factor */
Posint source; /* bit array of factors to (not) increment */
Boole sourceflag; /* incr source factor if TRUE, else non-source */
{
register int factor;
for (factor = Nfactors-1; factor >= 0; factor--)
if (sourceflag == member (factor, source))
if (++level[factor] < Nlevels[factor])
return (TRUE);
else /* go to next `decimal' place */
level[factor] = 0;
return (FALSE);
}
\f
/*FUNCTION printeffect: print cell summary of an effect */
#ifdef TRACE
printeffect ()
{
Posint source;
int factor;
Posint level[MAXFACT];
Boole sources, nonsources;
DATUM sum;
Posint count;
Posint address;
Posint nsources = (1 << Nfactors);
for (source = 0; source < nsources; source++)
{
for (factor = 0; factor < Nfactors; factor++)
{
level[factor] = 0;
printf ("%s\t", Factname[factor]);
}
putchar ('\n');
for (sources = TRUE; sources; sources = mdnext (level, source, TRUE))
{
sum = ZERO;
count = 0;
for (nonsources = TRUE; nonsources;
nonsources = mdnext (level, source, FALSE))
{
address = mdaddr (level);
#ifdef R_DATA /* only include cells with data in them */
if (Nreplics[address])
{
#endif R_DATA
sum += Datax[address];
count++;
#ifdef R_DATA
}
#endif R_DATA
}
if (count)
{
for (factor = 0; factor < Nfactors; factor++)
if (member (factor, source))
printf ("%s ", Levelname[factor][level[factor]]);
else
putchar ('\t');
printf (FORMAT, sum);
printf ("\t%d\n", count);
}
}
}
}
#endif TRACE
\f
/*FUNCTION printlevels: print the levels of the factors */
#ifdef TRACE
printlevels ()
{
int maxlev = 0;
int factor, level;
puts ("Levels of Factors:");
for (maxlev = factor = 0; factor < Nfactors; factor++)
{
if (Nlevels[factor] > maxlev)
maxlev = Nlevels[factor];
printf ("%-7.7s%c",
Factname[factor],
factor == Nfactors-1 ? '\n' : '\t');
}
for (level = 0; level < maxlev; level++)
for (factor = 0; factor < Nfactors; factor++)
{
printf ("%-7.7s%c",
Nlevels[factor] > level ? Levelname[factor][level] : "",
factor == Nfactors-1 ? '\n' : '\t');
}
}
#endif TRACE
\f
/*FUNCTION setsize: return the number of sources (bits) in a set */
int
setsize (set, maxsize)
Posint set;
Posint maxsize;
{
int size = 0;
Posint bit;
for (bit = 0; bit < maxsize; bit++)
if (member (bit, set))
size++;
return (size);
}
\f
/*FUNCTION printsource: print the names of factors in source */
Posint
printsource (source)
Posint source;
{
int factor;
int size = 0;
printf ("SOURCE: ");
for (factor = 0; factor < Nfactors; factor++)
if (member (factor, source))
{
printf ("%s ", Factname[factor]);
size++;
}
putchar ('\n');
return (size);
}
\f
/*FUNCTION printdesign: print names and levels of factors */
VOID
printdesign (ndata)
Posint ndata;
{
int factor;
char *sformat = "%10.10s ";
char *dformat = "%10d ";
printf ("FACTOR: ");
for (factor = 0; factor <= Nfactors; factor++)
printf (sformat, Factname[factor]);
putchar ('\n');
printf ("LEVELS: ");
for (factor = 0; factor < Nfactors; factor++)
printf (dformat, Nlevels[factor]);
printf (dformat, ndata);
putchar ('\n');
if (NAcount)
{
printf ("NA : ");
for (factor = 0; factor < Nfactors; factor++)
printf (sformat, "");
printf (dformat, NAcount);
putchar ('\n');
}
}