|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T o
Length: 12022 (0x2ef6)
Types: TextFile
Names: »oneway.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/oneway.c«
/* Copyright 1985 Gary Perlman */
/* use splitter as string group labels */
#include "stat.h"
PGM(oneway,One-Way Between Groups t-Test/ANOVA,5.5,1/20/87)
main (argc, argv) char **argv;
{
ARGV0;
initial (argc, argv);
checkstdin ();
readdata ();
printstats ();
doplot ();
oneway ();
exit (0);
}
#define MAXGROUP 20
#define MAXCHARS 50 /* maximum number of chars in words */
char *Name[MAXGROUP]; /* names of the different groups */
int Ngroups = 0; /* number of groups of data */
int Count[MAXGROUP]; /* number of cells in each group */
int NAcount[MAXGROUP]; /* count of NA values by group */
double Sum[MAXGROUP]; /* sum of all values in the group */
double Sumsq[MAXGROUP]; /* sums of squares */
double Mins[MAXGROUP]; /* min value in the group */
double Maxs[MAXGROUP]; /* max value in the group */
double Mean[MAXGROUP]; /* mean of each group */
double Standev[MAXGROUP]; /* standard deviation of each group */
/* OPTIONS */
Boole InfoVersion; /* print version information */
Boole InfoLimits; /* print program limits */
Boole InfoOptions; /* print usage information */
double Splitter = -1.0; /* used to indicate group differences */
Boole Ttest = FALSE; /* Ttest format for two group case */
Boole Unweighted; /* type of solution */
Boole Plot = FALSE; /* should plot be printed? */
int Plotwidth = 60; /* width of error bar plot */
initial (argc, argv) char **argv;
{
extern char *optarg;
extern int optind;
int errflg = 0;
int C;
int group;
while ((C = getopt (argc, argv, "ptuw:s:LOV")) != EOF)
switch (C)
{
case 'O': InfoOptions = TRUE; break;
case 'V': InfoVersion = TRUE; break;
case 'L': InfoLimits = TRUE; break;
case 'p':
Plot = TRUE;
break;
case 's':
if (setreal (Argv0, 's', optarg, &Splitter))
errflg++;
break;
case 'u': /* unweighted means solution */
Unweighted = TRUE;
break;
case 'w':
if (setint (Argv0, 'w', optarg, &Plotwidth, 10, 100))
errflg++;
Plot = TRUE;
break;
case 't': /* t-test format */
Ttest = TRUE;
break;
default: errflg++; break;
}
if (errflg)
USAGE ([-ptu] [-w plotwidth] [-s splitter] [names])
usinfo ();
for (group = 0; optind + group < argc; group++)
Name[group] = argv[optind+group];
}
char *
newname (group)
int group;
{
char *name = myalloc (char, 10);
VOID sprintf (name, "Group-%d", group);
return (name);
}
readdata ()
{
char word[MAXCHARS]; /* string input data read in here */
double datum; /* each data value */
int grindex = 0; /* group index == Ngroups - 1 */
while (getword (word, stdin))
{
if (isna (word))
{
NAcount[grindex]++;
continue;
}
if (!number (word))
ERRNUM (word,input data)
datum = atof (word);
if (datum == Splitter) /* new group */
{
if (Count[grindex] != 0) /* until some data read for this group */
grindex = Ngroups++;
}
else /* real datum */
{
if (grindex >= MAXGROUP)
ERRMANY (groups,MAXGROUP)
if (Count[grindex] == 0)
{
if (Ngroups == 0)
Ngroups = 1;
Mins[grindex] = Maxs[grindex] = datum;
}
else
{
if (datum < Mins[grindex])
Mins[grindex] = datum;
else if (datum > Maxs[grindex])
Maxs[grindex] = datum;
}
Sum[grindex] += datum;
Sumsq[grindex] += datum * datum;
if (Name[grindex] == NULL)
Name[grindex] = newname (grindex+1);
Count[grindex]++;
}
}
if (Count[grindex] == 0) /* ignore empty last group */
Ngroups--;
if (Ngroups > MAXGROUP)
ERRMANY (groups,MAXGROUP)
if (Ngroups == 0)
ERRDATA
}
#define var(count,sum,ss) (count<=1) ? 0.0 : ((ss-sum*sum/count)/(count-1))
printstats ()
{
int group;
int tcount = 0; /* total count */
int tnacount = 0; /* count of all NA's */
double tsum = 0.0; /* total sum */
double tss = 0.0; /* total sum of squares */
double tmin = Mins[0]; /* overall min */
double tmax = Maxs[0]; /* overall max */
char *numfmt = "%8.3f ";
for (group = 0; group < Ngroups; group++)
tnacount += NAcount[group];
if (Ngroups)
{
printf ("%-9.9s ", "Name");
printf ("%5s ", "N");
if (tnacount)
printf ("%5s ","NA");
printf ("%8s ", "Mean");
printf ("%8s ", "SD");
printf ("%8s ", "Min");
printf ("%8s ", "Max");
putchar ('\n');
}
for (group = 0; group < Ngroups; group++)
{
printf ("%-9.9s ", Name[group]);
printf ("%5d ", Count[group]);
if (tnacount)
printf ("%5d ", NAcount[group]);
printf (numfmt, Mean[group] = Sum[group]/Count[group]);
Standev[group] = sqrt (var (Count[group], Sum[group], Sumsq[group]));
printf (numfmt, Standev[group]);
printf (numfmt, Mins[group]);
printf (numfmt, Maxs[group]);
putchar ('\n');
}
if (Ngroups > 1) /* print totals over groups */
{
for (group = 0; group < Ngroups; group++)
{
tcount += Count[group];
tsum += Sum[group];
tss += Sumsq[group];
if (Mins[group] < tmin)
tmin = Mins[group];
if (Maxs[group] > tmax)
tmax = Maxs[group];
}
printf ("%-9.9s ", "Total");
printf ("%5d ", tcount);
if (tnacount)
printf ("%5d ", tnacount);
printf (numfmt, tsum/tcount);
printf (numfmt, sqrt (var (tcount, tsum, tss)));
printf (numfmt, tmin);
printf (numfmt, tmax);
putchar ('\n');
}
}
/*
compute one-way anova based on Guilford & Fruchter (5th Ed) p. 239
unweighted solution based on Keppel p. 351
*/
oneway ()
{
double Wsum; /* weighted solution grand sum */
double Wsumsq; /* weighted solution grand sum squares (SStotal) */
int Wcount; /* weighted solution N total */
double Ucount; /* unweighted sample size */
double SSn; /* will be SUM(i) Sum[i]*Sum[i]/Count[i] */
int dfbetween; /* degrees of freedom numerator */
int dfwithin; /* degrees of freedom denominator */
double WSSbetween; /* weighted solution sum of squares between groups */
double USSbetween; /* unweighted ss between groups */
double MeanGsq; /* sum of squares of group means */
double MeanSum; /* sum of all means */
double SSwithin; /* sum of squares within groups */
int group; /* loop index */
if (Ngroups < 2)
ERRMSG0 (You need at least two groups of data for a comparison)
Wsum = Wsumsq = 0.0;
Ucount = 0.0;
MeanSum = 0.0;
MeanGsq = 0.0;
Wcount = 0;
for (group = 0; group < Ngroups; group++)
{
Wsum += Sum[group];
Wsumsq += Sumsq[group];
Wcount += Count[group];
Ucount += 1.0 / Count[group];
MeanGsq += Mean[group]*Mean[group];
MeanSum += Mean[group];
}
Ucount = Ngroups / Ucount;
MeanSum = MeanSum * MeanSum / Ngroups;
if (Wcount <= Ngroups)
ERRMSG0 (You need more than one datum per group for comparison)
for (SSn = 0.0, group = 0; group < Ngroups; group++)
SSn += (Sum[group] * Sum[group]) / Count[group];
WSSbetween = SSn - Wsum*Wsum/Wcount;
USSbetween = Ucount * (MeanGsq - MeanSum);
SSwithin = Wsumsq - SSn;
dfbetween = Ngroups - 1;
dfwithin = Wcount - Ngroups;
if (Unweighted)
ftable ("Unweighted", USSbetween, dfbetween, SSwithin, dfwithin);
else
ftable ("Weighted", WSSbetween, dfbetween, SSwithin, dfwithin);
}
ftable (solution, ssbetween, dfbetween, sswithin, dfwithin)
char *solution; /* name of the solution analysis type */
double ssbetween; /* ss between groups */
int dfbetween; /* degrees of freedom numerator */
double sswithin; /* ss within groups (error) */
int dfwithin; /* degrees of freedom numerator */
{
double msbetween; /* mean square effect */
double mswithin; /* mean square errror */
double F; /* F ratio */
double p, pof (); /* probability of F */
if (dfbetween <= 0 || dfwithin <= 0)
{
WARNING (invalid degrees of freedom)
return;
}
if (sswithin < FZERO)
{
WARNING (zero error term implies infinite F or t statistic);
return;
}
msbetween = ssbetween / dfbetween;
mswithin = sswithin / dfwithin;
if (fzero (mswithin))
{
F = 99999.999;
p = 0.0;
}
else
{
F = msbetween / mswithin;
p = pof (F, dfbetween, dfwithin);
}
putchar ('\n');
printf ("%s Means Analysis:\n", solution);
if (Ngroups == 2 && Ttest == TRUE)
printf ("t(%d) = %.3f p = %.3f\n", dfwithin, sqrt (F), p);
else
{
printf ("%-8s %10s %5s %10s %8s %5s\n",
"Source", "SS", "df", "MS", "F", "p");
printf ("%-8s ", "Between");
printf ("%10.3f ", ssbetween);
printf ("%5d ", dfbetween);
printf ("%10.3f ", msbetween);
printf ("%8.3f ", F);
printf ("%5.3f ", p);
if (p < .001) putchar ('*');
if (p < .01) putchar ('*');
if (p < .05) putchar ('*');
putchar ('\n');
printf ("%-8s ", "Within");
printf ("%10.3f ", sswithin);
printf ("%5d ", dfwithin);
printf ("%10.3f ", mswithin);
putchar ('\n');
}
}
doplot ()
{
char *linegraph ();
double minval = Mins[0];
double maxval = Maxs[0];
int group;
if (Plot == FALSE)
return;
for (group = 1; group < Ngroups; group++)
{
if (Mins[group] < minval)
minval = Mins[group];
if (Maxs[group] > maxval)
maxval = Maxs[group];
}
putchar ('\n');
for (group = 0; group < Ngroups; group++)
printf ("%-9.9s |%s|\n", Name[group],
linegraph (group, minval, maxval));
printf ("%-9.9s ", "");
numline (minval, maxval, Plotwidth);
}
#define EPSILON (.000000001)
#define scale(x,minx,maxx) ((int) (Plotwidth*((x)-(minx))/((maxx)-(minx)+EPSILON)))
#define addch(line,i,c) (line[(i)] = (c))
#define MINCHAR '<'
#define MAXCHAR '>'
#define MEANCHAR '#'
#define MINSE '('
#define MAXSE ')'
#define STANDEV '='
#define TWOSTANDEV '-'
char *
linegraph (group, minval, maxval)
int group;
double minval; /* global minimum */
double maxval; /* global maximum */
{
static char line[BUFSIZ];
int i;
int low, high; /* range values in graph buffer */
int minlow, maxhigh; /* scaled versions of group extrema */
double sqrt ();
double se = Standev[group] / sqrt ((double) Count[group]);
for (i = 0; i < Plotwidth; i++)
line[i] = ' ';
line[Plotwidth] = '\0';
minlow = scale (Mins[group], minval, maxval);
maxhigh = scale (Maxs[group], minval, maxval);
/* two standard deviations above and below the mean */
low = scale (Mean[group]-2.0*Standev[group], minval, maxval);
high = scale (Mean[group]+2.0*Standev[group], minval, maxval);
if (low < minlow)
low = minlow;
if (high > maxhigh)
high = maxhigh;
for (i = low; i <= high; i++)
addch (line, i, TWOSTANDEV);
/* one standard deviation above and below the mean */
low = scale (Mean[group]-Standev[group], minval, maxval);
high = scale (Mean[group]+Standev[group], minval, maxval);
if (low < minlow)
low = minlow;
if (high > maxhigh)
high = maxhigh;
for (i = low; i <= high; i++)
addch (line, i, STANDEV);
/* one standard error around the mean */
low = scale (Mean[group] - se, minval, maxval);
high = scale (Mean[group] + se, minval, maxval);
if (low < minlow)
low = minlow;
if (high > maxhigh)
high = maxhigh;
addch (line, low, MINSE);
addch (line, high, MAXSE);
/* write in minimum and maximum, then mean */
addch (line, minlow, MINCHAR);
addch (line, maxhigh, MAXCHAR);
i = scale (Mean[group], minval, maxval);
addch (line, i, MEANCHAR);
return (line);
}
usinfo ()
{
if (InfoVersion)
pver (Version);
if (InfoLimits)
{
plim (Argv0);
const (MAXGROUP, "maximum number of groups");
const (MAXCHARS, "maximum number of characters in input words");
}
if (InfoOptions)
{
ppgm (Argv0, Purpose);
lopt ('p', "print error bar plots", Plot);
ropt ('s', "splitter", "group membership separator value", Splitter);
lopt ('t', "print significance test in t-test format", Ttest);
lopt ('u', "use unweighted means analysis", Unweighted);
iopt ('w', "plotwidth", "plot width", Plotwidth);
oper ("[names]", "group names", "Group-1 Group-2 ...");
}
if (InfoVersion || InfoLimits || InfoOptions)
exit (0);
}