|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T o
Length: 12022 (0x2ef6) Types: TextFile Names: »oneway.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki └─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/oneway.c«
/* Copyright 1985 Gary Perlman */ /* use splitter as string group labels */ #include "stat.h" PGM(oneway,One-Way Between Groups t-Test/ANOVA,5.5,1/20/87) main (argc, argv) char **argv; { ARGV0; initial (argc, argv); checkstdin (); readdata (); printstats (); doplot (); oneway (); exit (0); } #define MAXGROUP 20 #define MAXCHARS 50 /* maximum number of chars in words */ char *Name[MAXGROUP]; /* names of the different groups */ int Ngroups = 0; /* number of groups of data */ int Count[MAXGROUP]; /* number of cells in each group */ int NAcount[MAXGROUP]; /* count of NA values by group */ double Sum[MAXGROUP]; /* sum of all values in the group */ double Sumsq[MAXGROUP]; /* sums of squares */ double Mins[MAXGROUP]; /* min value in the group */ double Maxs[MAXGROUP]; /* max value in the group */ double Mean[MAXGROUP]; /* mean of each group */ double Standev[MAXGROUP]; /* standard deviation of each group */ /* OPTIONS */ Boole InfoVersion; /* print version information */ Boole InfoLimits; /* print program limits */ Boole InfoOptions; /* print usage information */ double Splitter = -1.0; /* used to indicate group differences */ Boole Ttest = FALSE; /* Ttest format for two group case */ Boole Unweighted; /* type of solution */ Boole Plot = FALSE; /* should plot be printed? */ int Plotwidth = 60; /* width of error bar plot */ initial (argc, argv) char **argv; { extern char *optarg; extern int optind; int errflg = 0; int C; int group; while ((C = getopt (argc, argv, "ptuw:s:LOV")) != EOF) switch (C) { case 'O': InfoOptions = TRUE; break; case 'V': InfoVersion = TRUE; break; case 'L': InfoLimits = TRUE; break; case 'p': Plot = TRUE; break; case 's': if (setreal (Argv0, 's', optarg, &Splitter)) errflg++; break; case 'u': /* unweighted means solution */ Unweighted = TRUE; break; case 'w': if (setint (Argv0, 'w', optarg, &Plotwidth, 10, 100)) errflg++; Plot = TRUE; break; case 't': /* t-test format */ Ttest = TRUE; break; default: errflg++; break; } if (errflg) USAGE ([-ptu] [-w plotwidth] [-s splitter] [names]) usinfo (); for (group = 0; optind + group < argc; group++) Name[group] = argv[optind+group]; } char * newname (group) int group; { char *name = myalloc (char, 10); VOID sprintf (name, "Group-%d", group); return (name); } readdata () { char word[MAXCHARS]; /* string input data read in here */ double datum; /* each data value */ int grindex = 0; /* group index == Ngroups - 1 */ while (getword (word, stdin)) { if (isna (word)) { NAcount[grindex]++; continue; } if (!number (word)) ERRNUM (word,input data) datum = atof (word); if (datum == Splitter) /* new group */ { if (Count[grindex] != 0) /* until some data read for this group */ grindex = Ngroups++; } else /* real datum */ { if (grindex >= MAXGROUP) ERRMANY (groups,MAXGROUP) if (Count[grindex] == 0) { if (Ngroups == 0) Ngroups = 1; Mins[grindex] = Maxs[grindex] = datum; } else { if (datum < Mins[grindex]) Mins[grindex] = datum; else if (datum > Maxs[grindex]) Maxs[grindex] = datum; } Sum[grindex] += datum; Sumsq[grindex] += datum * datum; if (Name[grindex] == NULL) Name[grindex] = newname (grindex+1); Count[grindex]++; } } if (Count[grindex] == 0) /* ignore empty last group */ Ngroups--; if (Ngroups > MAXGROUP) ERRMANY (groups,MAXGROUP) if (Ngroups == 0) ERRDATA } #define var(count,sum,ss) (count<=1) ? 0.0 : ((ss-sum*sum/count)/(count-1)) printstats () { int group; int tcount = 0; /* total count */ int tnacount = 0; /* count of all NA's */ double tsum = 0.0; /* total sum */ double tss = 0.0; /* total sum of squares */ double tmin = Mins[0]; /* overall min */ double tmax = Maxs[0]; /* overall max */ char *numfmt = "%8.3f "; for (group = 0; group < Ngroups; group++) tnacount += NAcount[group]; if (Ngroups) { printf ("%-9.9s ", "Name"); printf ("%5s ", "N"); if (tnacount) printf ("%5s ","NA"); printf ("%8s ", "Mean"); printf ("%8s ", "SD"); printf ("%8s ", "Min"); printf ("%8s ", "Max"); putchar ('\n'); } for (group = 0; group < Ngroups; group++) { printf ("%-9.9s ", Name[group]); printf ("%5d ", Count[group]); if (tnacount) printf ("%5d ", NAcount[group]); printf (numfmt, Mean[group] = Sum[group]/Count[group]); Standev[group] = sqrt (var (Count[group], Sum[group], Sumsq[group])); printf (numfmt, Standev[group]); printf (numfmt, Mins[group]); printf (numfmt, Maxs[group]); putchar ('\n'); } if (Ngroups > 1) /* print totals over groups */ { for (group = 0; group < Ngroups; group++) { tcount += Count[group]; tsum += Sum[group]; tss += Sumsq[group]; if (Mins[group] < tmin) tmin = Mins[group]; if (Maxs[group] > tmax) tmax = Maxs[group]; } printf ("%-9.9s ", "Total"); printf ("%5d ", tcount); if (tnacount) printf ("%5d ", tnacount); printf (numfmt, tsum/tcount); printf (numfmt, sqrt (var (tcount, tsum, tss))); printf (numfmt, tmin); printf (numfmt, tmax); putchar ('\n'); } } /* compute one-way anova based on Guilford & Fruchter (5th Ed) p. 239 unweighted solution based on Keppel p. 351 */ oneway () { double Wsum; /* weighted solution grand sum */ double Wsumsq; /* weighted solution grand sum squares (SStotal) */ int Wcount; /* weighted solution N total */ double Ucount; /* unweighted sample size */ double SSn; /* will be SUM(i) Sum[i]*Sum[i]/Count[i] */ int dfbetween; /* degrees of freedom numerator */ int dfwithin; /* degrees of freedom denominator */ double WSSbetween; /* weighted solution sum of squares between groups */ double USSbetween; /* unweighted ss between groups */ double MeanGsq; /* sum of squares of group means */ double MeanSum; /* sum of all means */ double SSwithin; /* sum of squares within groups */ int group; /* loop index */ if (Ngroups < 2) ERRMSG0 (You need at least two groups of data for a comparison) Wsum = Wsumsq = 0.0; Ucount = 0.0; MeanSum = 0.0; MeanGsq = 0.0; Wcount = 0; for (group = 0; group < Ngroups; group++) { Wsum += Sum[group]; Wsumsq += Sumsq[group]; Wcount += Count[group]; Ucount += 1.0 / Count[group]; MeanGsq += Mean[group]*Mean[group]; MeanSum += Mean[group]; } Ucount = Ngroups / Ucount; MeanSum = MeanSum * MeanSum / Ngroups; if (Wcount <= Ngroups) ERRMSG0 (You need more than one datum per group for comparison) for (SSn = 0.0, group = 0; group < Ngroups; group++) SSn += (Sum[group] * Sum[group]) / Count[group]; WSSbetween = SSn - Wsum*Wsum/Wcount; USSbetween = Ucount * (MeanGsq - MeanSum); SSwithin = Wsumsq - SSn; dfbetween = Ngroups - 1; dfwithin = Wcount - Ngroups; if (Unweighted) ftable ("Unweighted", USSbetween, dfbetween, SSwithin, dfwithin); else ftable ("Weighted", WSSbetween, dfbetween, SSwithin, dfwithin); } ftable (solution, ssbetween, dfbetween, sswithin, dfwithin) char *solution; /* name of the solution analysis type */ double ssbetween; /* ss between groups */ int dfbetween; /* degrees of freedom numerator */ double sswithin; /* ss within groups (error) */ int dfwithin; /* degrees of freedom numerator */ { double msbetween; /* mean square effect */ double mswithin; /* mean square errror */ double F; /* F ratio */ double p, pof (); /* probability of F */ if (dfbetween <= 0 || dfwithin <= 0) { WARNING (invalid degrees of freedom) return; } if (sswithin < FZERO) { WARNING (zero error term implies infinite F or t statistic); return; } msbetween = ssbetween / dfbetween; mswithin = sswithin / dfwithin; if (fzero (mswithin)) { F = 99999.999; p = 0.0; } else { F = msbetween / mswithin; p = pof (F, dfbetween, dfwithin); } putchar ('\n'); printf ("%s Means Analysis:\n", solution); if (Ngroups == 2 && Ttest == TRUE) printf ("t(%d) = %.3f p = %.3f\n", dfwithin, sqrt (F), p); else { printf ("%-8s %10s %5s %10s %8s %5s\n", "Source", "SS", "df", "MS", "F", "p"); printf ("%-8s ", "Between"); printf ("%10.3f ", ssbetween); printf ("%5d ", dfbetween); printf ("%10.3f ", msbetween); printf ("%8.3f ", F); printf ("%5.3f ", p); if (p < .001) putchar ('*'); if (p < .01) putchar ('*'); if (p < .05) putchar ('*'); putchar ('\n'); printf ("%-8s ", "Within"); printf ("%10.3f ", sswithin); printf ("%5d ", dfwithin); printf ("%10.3f ", mswithin); putchar ('\n'); } } doplot () { char *linegraph (); double minval = Mins[0]; double maxval = Maxs[0]; int group; if (Plot == FALSE) return; for (group = 1; group < Ngroups; group++) { if (Mins[group] < minval) minval = Mins[group]; if (Maxs[group] > maxval) maxval = Maxs[group]; } putchar ('\n'); for (group = 0; group < Ngroups; group++) printf ("%-9.9s |%s|\n", Name[group], linegraph (group, minval, maxval)); printf ("%-9.9s ", ""); numline (minval, maxval, Plotwidth); } #define EPSILON (.000000001) #define scale(x,minx,maxx) ((int) (Plotwidth*((x)-(minx))/((maxx)-(minx)+EPSILON))) #define addch(line,i,c) (line[(i)] = (c)) #define MINCHAR '<' #define MAXCHAR '>' #define MEANCHAR '#' #define MINSE '(' #define MAXSE ')' #define STANDEV '=' #define TWOSTANDEV '-' char * linegraph (group, minval, maxval) int group; double minval; /* global minimum */ double maxval; /* global maximum */ { static char line[BUFSIZ]; int i; int low, high; /* range values in graph buffer */ int minlow, maxhigh; /* scaled versions of group extrema */ double sqrt (); double se = Standev[group] / sqrt ((double) Count[group]); for (i = 0; i < Plotwidth; i++) line[i] = ' '; line[Plotwidth] = '\0'; minlow = scale (Mins[group], minval, maxval); maxhigh = scale (Maxs[group], minval, maxval); /* two standard deviations above and below the mean */ low = scale (Mean[group]-2.0*Standev[group], minval, maxval); high = scale (Mean[group]+2.0*Standev[group], minval, maxval); if (low < minlow) low = minlow; if (high > maxhigh) high = maxhigh; for (i = low; i <= high; i++) addch (line, i, TWOSTANDEV); /* one standard deviation above and below the mean */ low = scale (Mean[group]-Standev[group], minval, maxval); high = scale (Mean[group]+Standev[group], minval, maxval); if (low < minlow) low = minlow; if (high > maxhigh) high = maxhigh; for (i = low; i <= high; i++) addch (line, i, STANDEV); /* one standard error around the mean */ low = scale (Mean[group] - se, minval, maxval); high = scale (Mean[group] + se, minval, maxval); if (low < minlow) low = minlow; if (high > maxhigh) high = maxhigh; addch (line, low, MINSE); addch (line, high, MAXSE); /* write in minimum and maximum, then mean */ addch (line, minlow, MINCHAR); addch (line, maxhigh, MAXCHAR); i = scale (Mean[group], minval, maxval); addch (line, i, MEANCHAR); return (line); } usinfo () { if (InfoVersion) pver (Version); if (InfoLimits) { plim (Argv0); const (MAXGROUP, "maximum number of groups"); const (MAXCHARS, "maximum number of characters in input words"); } if (InfoOptions) { ppgm (Argv0, Purpose); lopt ('p', "print error bar plots", Plot); ropt ('s', "splitter", "group membership separator value", Splitter); lopt ('t', "print significance test in t-test format", Ttest); lopt ('u', "use unweighted means analysis", Unweighted); iopt ('w', "plotwidth", "plot width", Plotwidth); oper ("[names]", "group names", "Group-1 Group-2 ..."); } if (InfoVersion || InfoLimits || InfoOptions) exit (0); }