|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T r
Length: 12090 (0x2f3a) Types: TextFile Names: »rankind.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki └─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/rankind.c«
/* Copyright 1987 Gary Perlman */ #include "stat.h" PGM(rankind,Rank Order Analysis for Independent Conditions,5.3,02/04/87) /* rankind getword centile getopt setreal number fisher2x2 prodlist \ primes chisq z setint fiveplot numline ranksort ordstat chitest */ #define MAXCOND 20 /* maximum number of conditions */ #define MAXCHARS 50 /* maximum number of chars in words */ #define MAXDATA 1000 /* max number of input data points */ /* critical N's over which to use approximations */ #define NMANNWHITNEY 20 /* according to Siegel */ #define N1KRUSKALWALLICE 5 /* for k = 3, any n > 5 --> not (small) */ #define N2KRUSKALWALLICE 20 /* any n > 20 --> not (small) */ #define NFISHER 100 /* largest total cell freq for exact test */ #define SMALLCELL 5.0 /* anything smaller should be noted */ char *Condname[MAXCOND]; /* condition names */ int Condno; /* condition number */ int Nconds; /* number of conditions == Condno + 1 */ int Count[MAXCOND]; /* number of data in each condition */ int NAcount[MAXCOND]; /* number of NA in each condition */ float Mins[MAXCOND]; float Quart1[MAXCOND]; float Median[MAXCOND]; /* fivenums */ float Quart3[MAXCOND]; float Maxs[MAXCOND]; float Alldata[MAXDATA]; /* all the data stored in here */ int Allcount; /* total number of data points */ float *Condat[MAXCOND]; /* pointers into Alldata for each condition */ float Grandmin; /* min of all the data */ float Grandmedian; /* median of all the data */ float Grandmax; /* max of all the data */ /* OPTIONS */ Boole InfoVersion; /* print version information */ Boole InfoLimits; /* print program limits */ Boole InfoOptions; /* print usage information */ double Splitter = (-1.0); /* value used to separate condition */ Boole Rankmeans; /* print average ranks? */ Boole Plot; /* should we plot? */ Boole Plotwidth = 60; /* width of plot */ Boole Yates = TRUE; /* apply Yates' correction for continuity */ #define statheader(header) printf ("\n%s:\n", header) \f main (argc, argv) char **argv; { int cond; float rank[MAXDATA]; float sorted[MAXDATA]; double sumrank[MAXCOND]; ARGV0; initial (argc, argv); checkstdin (); readdata (); summarize (); if (Plot) doplot (); if (Nconds > 1) { domedian (); for (cond = 0; cond < Allcount; cond++) rank[cond] = Alldata[cond]; if (ranksort (rank, sorted, NULL, Allcount)) ERRMSG0 (could not rank data for rank order tests) averanks (rank, sumrank, Rankmeans); if (Nconds == 2) /* two conditions */ domannwhitney (sorted, sumrank); dokruskalwallice (sorted, sumrank); } exit (SUCCESS); } \f initial (argc, argv) char **argv; { extern char *optarg; extern int optind; int errflg = 0; int C; int cond; while ((C = getopt (argc, argv, "prs:w:yLOV")) != EOF) switch (C) { case 'O': InfoOptions = TRUE; break; case 'V': InfoVersion = TRUE; break; case 'L': InfoLimits = TRUE; break; case 'p': Plot = TRUE; break; case 'r': Rankmeans = TRUE; break; case 's': if (setreal (Argv0, 's', optarg, &Splitter)) errflg++; break; case 'w': if (setint (Argv0, 'P', optarg, &Plotwidth, 10, 100)) errflg++; Plot = TRUE; break; case 'y': Yates = FALSE; break; default: errflg++; break; } if (errflg) USAGE ([-pry] [-w plotwidth] [-s splitter] [names]) usinfo (); for (cond = 0; optind + cond < argc; cond++) { if (cond >= MAXCOND) ERRMANY (condition names,MAXCOND) Condname[cond] = argv[optind+cond]; } } \f usinfo () { if (InfoVersion) pver (Version); if (InfoLimits) { plim (Argv0); const (MAXCOND, "maximum number of conditions"); const (MAXDATA, "maximum number of input data points"); const (MAXCHARS, "maximum number of characters in input words"); } if (InfoOptions) { ppgm (Argv0, Purpose); lopt ('p', "print fivenum plots", Plot); lopt ('r', "print average ranks for conditions", Rankmeans); ropt ('s', "splitter", "condition separator value", Splitter); iopt ('w', "width", "plot width", Plotwidth); lopt ('y', "use Yates' correction for continuity", Yates); oper ("[names]", "condition names", "Cond-1 Cond-2 ..."); } if (InfoVersion || InfoLimits || InfoOptions) exit (SUCCESS); } \f char * newname (cond) int cond; { char *name = myalloc (char, 10); VOID sprintf (name, "Cond-%d", cond); return (name); } \f checktable (stat, parname, parval) char *stat; char *parname; int parval; { printf ("\tCheck a table for %s", stat); if (parname && *parname) printf (" with %s = %d", parname, parval); putchar ('\n'); } readdata () { char word[MAXCHARS]; /* string input data read in here */ double datum; /* each data value */ int cond; /* loop index for conditions */ Condat[0] = &Alldata[0]; while (getword (word, stdin)) { if (isna (word)) { NAcount[Condno]++; continue; } if (!number (word)) ERRNUM (word,input data) datum = atof (word); if (datum == Splitter) /* new condition */ { if (Count[Condno] != 0) /* ignore superfluous leading splitter(s) */ { Condno++; Condat[Condno] = &Alldata[Allcount]; } } else /* real datum */ { if (Allcount >= MAXDATA) ERRMANY (data points,MAXDATA) if (Condno >= MAXCOND) ERRMANY (conditions,MAXCOND) Alldata[Allcount] = datum; Count[Condno]++; Allcount++; } } if (Count[Condno] == 0) Condno--; Nconds = Condno + 1; for (cond = 0; cond < Nconds; cond++) if (Condname[cond] == NULL || *Condname[cond] == '\0') Condname[cond] = newname (cond+1); } \f doplot () { char *fiveplot (); int cond; putchar ('\n'); for (cond = 0; cond < Nconds; cond++) { printf ("%-9.9s |%s|\n", Condname[cond], fiveplot (Mins[cond], Quart1[cond], Median[cond], Quart3[cond], Maxs[cond], Plotwidth, Grandmin, Grandmax)); } printf ("%-9.9s ", ""); numline (Grandmin, Grandmax, Plotwidth); } \f domedian () { int above[MAXCOND]; /* count of values above Grandmedian */ int below[MAXCOND]; /* count of values below Grandmedian */ int *matrix[2]; /* matrix of above/below */ char *rowname[2]; /* names of above/below rows */ int tabove = 0; /* total above Grandmedian */ int tbelow = 0; /* total below Grandmedian */ int n; /* total scores not tied */ int i; int cond; float condval; /* a value in a condition */ statheader ("Median-Test"); for (cond = 0; cond < Nconds; cond++) { above[cond] = 0; below[cond] = 0; for (i = 0; i < Count[cond]; i++) { condval = Condat[cond][i]; if (condval < Grandmedian) below[cond]++; else if (condval > Grandmedian) above[cond]++; } tabove += above[cond]; tbelow += below[cond]; } n = tabove + tbelow; /* 2 x 2 design, use Fisher exact test if N <= NFISHER */ if (n == 0) { printf ("\tHmmm, all these numbers seem to equal the median\n"); return; } if (Nconds == 2 && n <= NFISHER) fishtest (above[0], above[1], below[0], below[1]); matrix[0] = above; matrix[1] = below; rowname[0] = "above"; rowname[1] = "below"; chitest (matrix, rowname, Condname, 2, Nconds, Yates); } \f domannwhitney (sorted, sumrank) float *sorted; /* sorted data */ double *sumrank; /* sum of all ranks in conditions */ { int n1 = Count[0]; int n2 = Count[1]; int bign = max (n1, n2); int n1n2 = n1*n2; double R1 = sumrank[0]; double U; /* U statistic for Ux, x is larger group */ double mean = n1n2*0.5; /* expected mean for U */ double sqrt (); double sd; /* expected sd (will be corrected with T) */ double T, tiecorrect (); /* correction factor for ties */ double poz (); double z; /* Z statistic for U */ statheader ("Mann-Whitney U"); U = n1n2 + n1*(n1+1)*0.5 - R1; if (U < mean) U = n1n2 - U; tprint ("U", U); tprint ("U'", n1n2 - U); /* normal approximation */ T = sqrt (tiecorrect (sorted, Allcount)); if (fzero (T)) { printf ("\tHmmm, all these numbers seem tied\n"); return; } sd = sqrt ( T * n1n2 * (n1+n2+1) / 12.0 ); z = (U - mean) / sd; tprint ("z(U) (corrected for ties)", z); tprint ("One tailed p(z(U))", 1.0 - poz (z)); if (bign <= NMANNWHITNEY) checktable ("U", "n", bign); } \f dokruskalwallice (sorted, sumrank) float *sorted; /* sorted data */ double *sumrank; /* sum of all ranks in conditions */ { int cond; double H; /* computed statistic */ double T, tiecorrect (); /* correction factor for ties */ double pochisq (); Boole smallsample = TRUE; /* send the analyst to the tables */ statheader ("Kruskal-Wallis"); #ifdef NEVER /* This operation from BMD '79, page 612, did not work */ if (Nconds == 2) /* report Mann-Whitney U */ { double U; U = sumrank[0] - Count[0] * (Count[0]+1.0) / 2.0; tprint ("U (not corrected for ties)", U); } #endif NEVER H = 0.0; for (cond = 0; cond < Nconds; cond++) H += sumrank[cond] * sumrank[cond] / Count[cond]; H *= 12.0 / (Allcount * (Allcount + 1.0)); H -= 3.0 * (Allcount + 1); tprint ("H (not corrected for ties)", H); T = tiecorrect (sorted, Allcount); if (fzero (T)) { printf ("\tHmmm, all these numbers seem tied\n"); return; } tprint ("Tie correction factor", T); H /= T; tprint ("H (corrected for ties)", H); if (Nconds > 3) smallsample = FALSE; else if (Nconds == 3) { for (cond = 0; cond < Nconds; cond++) if (Count[cond] > N1KRUSKALWALLICE) smallsample = FALSE; } else for (cond = 0; cond < Nconds; cond++) if (Count[cond] > N2KRUSKALWALLICE) smallsample = FALSE; chiprint (H, Nconds-1, pochisq (H, Nconds - 1)); if (smallsample) checktable ("Kruskal-Wallis H", NULL, 0); } \f printvec (v, n) float *v; int n; { int i; for (i = 0; i < n; i++) printf ("%g ", v[i]); putchar ('\n'); } \f /*FUNCTION averanks: report average sum ranks */ averanks (rank, sumrank, display) float *rank; /* input ranks */ double *sumrank; /* output sums of ranks */ Boole display; { int cond; int i, j; i = 0; for (cond = 0; cond < Nconds; cond++) { sumrank[cond] = 0.0; for (j = 0; j < Count[cond]; j++) sumrank[cond] += rank[i++]; } if (display) { statheader ("Average Ranks"); for (cond = 0; cond < Nconds; cond++) { printf ("\t\t%-12s %4d %10.2f\n", Condname[cond], Count[cond], sumrank[cond]/Count[cond]); } } } \f /*FUNCTION tiecorrect: compute tie correction dividing factor for U and H */ double tiecorrect (sorted, n) float *sorted; /* vector of sorted values */ int n; /* number of values in sorted vector */ { double T = 0.0; /* correction factor for ties */ int nties; /* number of ties at a particular rank */ int i; if (n <= 1) return (1.0); for (i = 0; i < n-1; i++) { if (sorted[i] == sorted[i+1]) { nties = 1; while (i < n-1 && sorted[i] == sorted[i+1]) { nties++; i++; } T += (double) nties * (double) nties * (double) nties - nties; } } T /= (double) n * (double) n * (double) n - n; T = 1.0 - T; return (T); } \f summarize () { int cond; double *ordstat (); /* returns fivenums in vector */ double *fivenum; /* returned by ordstat */ int tnacount = 0; /* total of NA counts */ for (cond = 0; cond < Nconds; cond++) tnacount += NAcount[cond]; for (cond = 0; cond < Nconds; cond++) { fivenum = ordstat (Condat[cond], Count[cond], cond, Condname[cond], NAcount[cond]); Mins[cond] = fivenum[0]; Quart1[cond] = fivenum[1]; Median[cond] = fivenum[2]; Quart3[cond] = fivenum[3]; Maxs[cond] = fivenum[4]; } if (Nconds > 1) fivenum = ordstat (Alldata, Allcount, Nconds, "Total", tnacount); Grandmin = fivenum[0]; Grandmedian = fivenum[2]; Grandmax = fivenum[4]; }