|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T r
Length: 12090 (0x2f3a)
Types: TextFile
Names: »rankind.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/rankind.c«
/* Copyright 1987 Gary Perlman */
#include "stat.h"
PGM(rankind,Rank Order Analysis for Independent Conditions,5.3,02/04/87)
/*
rankind getword centile getopt setreal number fisher2x2 prodlist \
primes chisq z setint fiveplot numline ranksort ordstat chitest
*/
#define MAXCOND 20 /* maximum number of conditions */
#define MAXCHARS 50 /* maximum number of chars in words */
#define MAXDATA 1000 /* max number of input data points */
/* critical N's over which to use approximations */
#define NMANNWHITNEY 20 /* according to Siegel */
#define N1KRUSKALWALLICE 5 /* for k = 3, any n > 5 --> not (small) */
#define N2KRUSKALWALLICE 20 /* any n > 20 --> not (small) */
#define NFISHER 100 /* largest total cell freq for exact test */
#define SMALLCELL 5.0 /* anything smaller should be noted */
char *Condname[MAXCOND]; /* condition names */
int Condno; /* condition number */
int Nconds; /* number of conditions == Condno + 1 */
int Count[MAXCOND]; /* number of data in each condition */
int NAcount[MAXCOND]; /* number of NA in each condition */
float Mins[MAXCOND];
float Quart1[MAXCOND];
float Median[MAXCOND]; /* fivenums */
float Quart3[MAXCOND];
float Maxs[MAXCOND];
float Alldata[MAXDATA]; /* all the data stored in here */
int Allcount; /* total number of data points */
float *Condat[MAXCOND]; /* pointers into Alldata for each condition */
float Grandmin; /* min of all the data */
float Grandmedian; /* median of all the data */
float Grandmax; /* max of all the data */
/* OPTIONS */
Boole InfoVersion; /* print version information */
Boole InfoLimits; /* print program limits */
Boole InfoOptions; /* print usage information */
double Splitter = (-1.0); /* value used to separate condition */
Boole Rankmeans; /* print average ranks? */
Boole Plot; /* should we plot? */
Boole Plotwidth = 60; /* width of plot */
Boole Yates = TRUE; /* apply Yates' correction for continuity */
#define statheader(header) printf ("\n%s:\n", header)
\f
main (argc, argv) char **argv;
{
int cond;
float rank[MAXDATA];
float sorted[MAXDATA];
double sumrank[MAXCOND];
ARGV0;
initial (argc, argv);
checkstdin ();
readdata ();
summarize ();
if (Plot)
doplot ();
if (Nconds > 1)
{
domedian ();
for (cond = 0; cond < Allcount; cond++)
rank[cond] = Alldata[cond];
if (ranksort (rank, sorted, NULL, Allcount))
ERRMSG0 (could not rank data for rank order tests)
averanks (rank, sumrank, Rankmeans);
if (Nconds == 2) /* two conditions */
domannwhitney (sorted, sumrank);
dokruskalwallice (sorted, sumrank);
}
exit (SUCCESS);
}
\f
initial (argc, argv) char **argv;
{
extern char *optarg;
extern int optind;
int errflg = 0;
int C;
int cond;
while ((C = getopt (argc, argv, "prs:w:yLOV")) != EOF)
switch (C)
{
case 'O': InfoOptions = TRUE; break;
case 'V': InfoVersion = TRUE; break;
case 'L': InfoLimits = TRUE; break;
case 'p':
Plot = TRUE;
break;
case 'r':
Rankmeans = TRUE;
break;
case 's':
if (setreal (Argv0, 's', optarg, &Splitter))
errflg++;
break;
case 'w':
if (setint (Argv0, 'P', optarg, &Plotwidth, 10, 100))
errflg++;
Plot = TRUE;
break;
case 'y': Yates = FALSE; break;
default: errflg++; break;
}
if (errflg)
USAGE ([-pry] [-w plotwidth] [-s splitter] [names])
usinfo ();
for (cond = 0; optind + cond < argc; cond++)
{
if (cond >= MAXCOND)
ERRMANY (condition names,MAXCOND)
Condname[cond] = argv[optind+cond];
}
}
\f
usinfo ()
{
if (InfoVersion)
pver (Version);
if (InfoLimits)
{
plim (Argv0);
const (MAXCOND, "maximum number of conditions");
const (MAXDATA, "maximum number of input data points");
const (MAXCHARS, "maximum number of characters in input words");
}
if (InfoOptions)
{
ppgm (Argv0, Purpose);
lopt ('p', "print fivenum plots", Plot);
lopt ('r', "print average ranks for conditions", Rankmeans);
ropt ('s', "splitter", "condition separator value", Splitter);
iopt ('w', "width", "plot width", Plotwidth);
lopt ('y', "use Yates' correction for continuity", Yates);
oper ("[names]", "condition names", "Cond-1 Cond-2 ...");
}
if (InfoVersion || InfoLimits || InfoOptions)
exit (SUCCESS);
}
\f
char *
newname (cond)
int cond;
{
char *name = myalloc (char, 10);
VOID sprintf (name, "Cond-%d", cond);
return (name);
}
\f
checktable (stat, parname, parval)
char *stat;
char *parname;
int parval;
{
printf ("\tCheck a table for %s", stat);
if (parname && *parname)
printf (" with %s = %d", parname, parval);
putchar ('\n');
}
readdata ()
{
char word[MAXCHARS]; /* string input data read in here */
double datum; /* each data value */
int cond; /* loop index for conditions */
Condat[0] = &Alldata[0];
while (getword (word, stdin))
{
if (isna (word))
{
NAcount[Condno]++;
continue;
}
if (!number (word))
ERRNUM (word,input data)
datum = atof (word);
if (datum == Splitter) /* new condition */
{
if (Count[Condno] != 0) /* ignore superfluous leading splitter(s) */
{
Condno++;
Condat[Condno] = &Alldata[Allcount];
}
}
else /* real datum */
{
if (Allcount >= MAXDATA)
ERRMANY (data points,MAXDATA)
if (Condno >= MAXCOND)
ERRMANY (conditions,MAXCOND)
Alldata[Allcount] = datum;
Count[Condno]++;
Allcount++;
}
}
if (Count[Condno] == 0)
Condno--;
Nconds = Condno + 1;
for (cond = 0; cond < Nconds; cond++)
if (Condname[cond] == NULL || *Condname[cond] == '\0')
Condname[cond] = newname (cond+1);
}
\f
doplot ()
{
char *fiveplot ();
int cond;
putchar ('\n');
for (cond = 0; cond < Nconds; cond++)
{
printf ("%-9.9s |%s|\n", Condname[cond],
fiveplot (Mins[cond], Quart1[cond], Median[cond],
Quart3[cond], Maxs[cond], Plotwidth, Grandmin, Grandmax));
}
printf ("%-9.9s ", "");
numline (Grandmin, Grandmax, Plotwidth);
}
\f
domedian ()
{
int above[MAXCOND]; /* count of values above Grandmedian */
int below[MAXCOND]; /* count of values below Grandmedian */
int *matrix[2]; /* matrix of above/below */
char *rowname[2]; /* names of above/below rows */
int tabove = 0; /* total above Grandmedian */
int tbelow = 0; /* total below Grandmedian */
int n; /* total scores not tied */
int i;
int cond;
float condval; /* a value in a condition */
statheader ("Median-Test");
for (cond = 0; cond < Nconds; cond++)
{
above[cond] = 0;
below[cond] = 0;
for (i = 0; i < Count[cond]; i++)
{
condval = Condat[cond][i];
if (condval < Grandmedian)
below[cond]++;
else if (condval > Grandmedian)
above[cond]++;
}
tabove += above[cond];
tbelow += below[cond];
}
n = tabove + tbelow;
/* 2 x 2 design, use Fisher exact test if N <= NFISHER */
if (n == 0)
{
printf ("\tHmmm, all these numbers seem to equal the median\n");
return;
}
if (Nconds == 2 && n <= NFISHER)
fishtest (above[0], above[1], below[0], below[1]);
matrix[0] = above;
matrix[1] = below;
rowname[0] = "above";
rowname[1] = "below";
chitest (matrix, rowname, Condname, 2, Nconds, Yates);
}
\f
domannwhitney (sorted, sumrank)
float *sorted; /* sorted data */
double *sumrank; /* sum of all ranks in conditions */
{
int n1 = Count[0];
int n2 = Count[1];
int bign = max (n1, n2);
int n1n2 = n1*n2;
double R1 = sumrank[0];
double U; /* U statistic for Ux, x is larger group */
double mean = n1n2*0.5; /* expected mean for U */
double sqrt ();
double sd; /* expected sd (will be corrected with T) */
double T, tiecorrect (); /* correction factor for ties */
double poz ();
double z; /* Z statistic for U */
statheader ("Mann-Whitney U");
U = n1n2 + n1*(n1+1)*0.5 - R1;
if (U < mean)
U = n1n2 - U;
tprint ("U", U);
tprint ("U'", n1n2 - U);
/* normal approximation */
T = sqrt (tiecorrect (sorted, Allcount));
if (fzero (T))
{
printf ("\tHmmm, all these numbers seem tied\n");
return;
}
sd = sqrt ( T * n1n2 * (n1+n2+1) / 12.0 );
z = (U - mean) / sd;
tprint ("z(U) (corrected for ties)", z);
tprint ("One tailed p(z(U))", 1.0 - poz (z));
if (bign <= NMANNWHITNEY)
checktable ("U", "n", bign);
}
\f
dokruskalwallice (sorted, sumrank)
float *sorted; /* sorted data */
double *sumrank; /* sum of all ranks in conditions */
{
int cond;
double H; /* computed statistic */
double T, tiecorrect (); /* correction factor for ties */
double pochisq ();
Boole smallsample = TRUE; /* send the analyst to the tables */
statheader ("Kruskal-Wallis");
#ifdef NEVER
/* This operation from BMD '79, page 612, did not work */
if (Nconds == 2) /* report Mann-Whitney U */
{
double U;
U = sumrank[0] - Count[0] * (Count[0]+1.0) / 2.0;
tprint ("U (not corrected for ties)", U);
}
#endif NEVER
H = 0.0;
for (cond = 0; cond < Nconds; cond++)
H += sumrank[cond] * sumrank[cond] / Count[cond];
H *= 12.0 / (Allcount * (Allcount + 1.0));
H -= 3.0 * (Allcount + 1);
tprint ("H (not corrected for ties)", H);
T = tiecorrect (sorted, Allcount);
if (fzero (T))
{
printf ("\tHmmm, all these numbers seem tied\n");
return;
}
tprint ("Tie correction factor", T);
H /= T;
tprint ("H (corrected for ties)", H);
if (Nconds > 3)
smallsample = FALSE;
else if (Nconds == 3)
{
for (cond = 0; cond < Nconds; cond++)
if (Count[cond] > N1KRUSKALWALLICE)
smallsample = FALSE;
}
else
for (cond = 0; cond < Nconds; cond++)
if (Count[cond] > N2KRUSKALWALLICE)
smallsample = FALSE;
chiprint (H, Nconds-1, pochisq (H, Nconds - 1));
if (smallsample)
checktable ("Kruskal-Wallis H", NULL, 0);
}
\f
printvec (v, n)
float *v;
int n;
{
int i;
for (i = 0; i < n; i++)
printf ("%g ", v[i]);
putchar ('\n');
}
\f
/*FUNCTION averanks: report average sum ranks */
averanks (rank, sumrank, display)
float *rank; /* input ranks */
double *sumrank; /* output sums of ranks */
Boole display;
{
int cond;
int i, j;
i = 0;
for (cond = 0; cond < Nconds; cond++)
{
sumrank[cond] = 0.0;
for (j = 0; j < Count[cond]; j++)
sumrank[cond] += rank[i++];
}
if (display)
{
statheader ("Average Ranks");
for (cond = 0; cond < Nconds; cond++)
{
printf ("\t\t%-12s %4d %10.2f\n",
Condname[cond], Count[cond], sumrank[cond]/Count[cond]);
}
}
}
\f
/*FUNCTION tiecorrect: compute tie correction dividing factor for U and H */
double
tiecorrect (sorted, n)
float *sorted; /* vector of sorted values */
int n; /* number of values in sorted vector */
{
double T = 0.0; /* correction factor for ties */
int nties; /* number of ties at a particular rank */
int i;
if (n <= 1)
return (1.0);
for (i = 0; i < n-1; i++)
{
if (sorted[i] == sorted[i+1])
{
nties = 1;
while (i < n-1 && sorted[i] == sorted[i+1])
{
nties++;
i++;
}
T += (double) nties * (double) nties * (double) nties - nties;
}
}
T /= (double) n * (double) n * (double) n - n;
T = 1.0 - T;
return (T);
}
\f
summarize ()
{
int cond;
double *ordstat (); /* returns fivenums in vector */
double *fivenum; /* returned by ordstat */
int tnacount = 0; /* total of NA counts */
for (cond = 0; cond < Nconds; cond++)
tnacount += NAcount[cond];
for (cond = 0; cond < Nconds; cond++)
{
fivenum = ordstat (Condat[cond], Count[cond], cond, Condname[cond], NAcount[cond]);
Mins[cond] = fivenum[0];
Quart1[cond] = fivenum[1];
Median[cond] = fivenum[2];
Quart3[cond] = fivenum[3];
Maxs[cond] = fivenum[4];
}
if (Nconds > 1)
fivenum = ordstat (Alldata, Allcount, Nconds, "Total", tnacount);
Grandmin = fivenum[0];
Grandmedian = fivenum[2];
Grandmax = fivenum[4];
}