|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - downloadIndex: ┃ T r ┃
Length: 13317 (0x3405) Types: TextFile Names: »rankrel.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki └─ ⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/rankrel.c«
/* Copyright 1987 Gary Perlman */ #include "stat.h" PGM(rankrel,Rank Order Analysis for Related Conditions,5.2,02/04/87) /* matched groups median test */ /* rankrel centile getopt setreal number prodlist \ primes chisq z setint ranksort ordstat */ #define PFVEC(vec,count) \ { \ int ind; \ for(ind=0;ind<count;ind++) \ printf("%g ", vec[ind]); \ printf ("\n"); \ } /* critical N's over which to use approximations */ #define NWILCOXON 16 /* according to Snedecor & Cochran */ #define NSIGNTEST 25 /* according to Siegel */ #define NSPEARMAN 10 /* according to Siegel */ #define MAXCOND 20 /* maximum number of conditions */ #define MAXCHARS BUFSIZ /* maximum number of chars in lines */ #define MAXDATA 100 /* max number of input data cases */ int Maxdata = MAXDATA; char *Condname[MAXCOND]; /* condition names */ float *Condat[MAXCOND]; /* pointers into all data for each condition */ int Nconds; /* number of conditions == ncols */ int Count; /* number of data in each condition */ int NAcount[MAXCOND]; /* number of NA missing values */ int TNAcount; /* number of missing cases */ /* OPTIONS */ Boole InfoVersion; /* print version information */ Boole InfoLimits; /* print program limits */ Boole InfoOptions; /* print usage information */ Boole Rankmeans; /* print average ranks for conditions? */ Boole Yates = TRUE; /* apply Yates' correction for continuity */ Boole Docomps = TRUE; /* do comparison tests? **/ #define noteyates() printf("\tNOTE: Yates' correction for continuity applied\n") #define statheader(header) printf ("\n%s:\n", header) \f /*FUNCTION main: rankrel */ main (argc, argv) char **argv; { ARGV0; initial (argc, argv); checkstdin (); readdata (); summarize (); if (Nconds > 1) { if (Docomps) { if (Nconds == 2) { dosigntest (0, 1); dowilcoxon (0, 1); } dofriedman (); } dospearman (); } exit (SUCCESS); } \f /*FUNCTION initial: set options and condition names */ initial (argc, argv) char **argv; { extern char *optarg; extern int optind; int errflg = 0; int C; int cond; while ((C = getopt (argc, argv, "c:rsyLOV")) != EOF) switch (C) { case 'O': InfoOptions = TRUE; break; case 'V': InfoVersion = TRUE; break; case 'L': InfoLimits = TRUE; break; case 'c': if (setint (Argv0, 'c', optarg, &Maxdata, 1, MAXINT)) errflg++; break; case 'r': Rankmeans = TRUE; break; case 's': Docomps = FALSE; break; case 'y': Yates = FALSE; break; default: errflg++; break; } if (errflg) USAGE ([-ry] [-c maxcases] [names]) usinfo (); for (cond = 0; optind + cond < argc; cond++) { if (cond >= MAXCOND) ERRMANY (condition names,MAXCOND) Condname[cond] = argv[optind+cond]; } } \f /*FUNCTION usinfo: online help */ usinfo () { if (InfoVersion) pver (Version); if (InfoLimits) { plim (Argv0); const (MAXCOND, "maximum number of conditions"); const (Maxdata, "maximum number of input data points"); const (MAXCHARS, "maximum number of characters in input lines"); } if (InfoOptions) { ppgm (Argv0, Purpose); iopt ('c', "maxcases", "maximum number of input cases", Maxdata); lopt ('r', "print average ranks for conditions", Rankmeans); lopt ('s', "run comparative significance tests", Docomps); lopt ('y', "use Yates' correction for continuity", Yates); oper ("[names]", "condition names", "Cond-1 Cond-2 ..."); } if (InfoVersion || InfoLimits || InfoOptions) exit (SUCCESS); } \f /*FUNCTION newname: make a new default name for a condition */ char * newname (cond) int cond; { char *name = myalloc (char, 10); VOID sprintf (name, "Cond-%d", cond); return (name); } \f /*FUNCTION checktable: tell analyst to look for value in a table */ checktable (stat, parname, parval) char *stat; char *parname; int parval; { printf ("\tCheck a table for %s", stat); if (parname && *parname) printf (" with %s = %d", parname, parval); putchar ('\n'); } \f /*FUNCTION: readdata: read in data into global arrays */ readdata () { char line[MAXCHARS]; char *sarray[MAXCOND]; int ncols; int cond; Boole dropcase; while (gets (line)) { ncols = parselin (line, sarray, MAXCOND); if (ncols == 0) continue; dropcase = FALSE; for (cond = 0; cond < ncols; cond++) if (isna (sarray[cond])) { NAcount[cond]++; dropcase = TRUE; } if (dropcase == TRUE) { TNAcount++; continue; } if (Nconds == 0) /* initialize */ { Nconds = ncols; if (Nconds > MAXCOND) ERRMANY (conditions,MAXCOND) for (cond = 0; cond < Nconds; cond++) { Condat[cond] = myalloc (float, Maxdata); if (Condat[cond] == NULL) ERRSPACE (data) } } if (Count == Maxdata) ERRMANY (cases,Maxdata) if (ncols != Nconds) ERRRAGGED for (cond = 0; cond < Nconds; cond++) if (number (sarray[cond])) Condat[cond][Count] = atof (sarray[cond]); else ERRNUM (sarray[cond],input value) Count++; } if (Count <= 1) ERRDATA for (cond = 0; cond < Nconds; cond++) if (Condname[cond] == NULL || *Condname[cond] == '\0') Condname[cond] = newname (cond+1); } \f /*FUNCTION summarize: print summary statistics */ summarize () { int cond; int i; int allcount = 0; float *alldata = myalloc (float, Nconds * Count); int tna = 0; if (TNAcount) printf ("Cases with missing data: %d ignored\n", TNAcount); for (cond = 0; cond < Nconds; cond++) tna += NAcount[cond]; for (cond = 0; cond < Nconds; cond++) { if (alldata) for (i = 0; i < Count; i++) alldata[allcount++] = Condat[cond][i]; ordstat (Condat[cond], Count, cond, Condname[cond], NAcount[cond]); } if (alldata != NULL) { if (Nconds > 1) ordstat (alldata, Nconds*Count, Nconds, "Total", tna); free ((char *) alldata); } else WARNING (not enough space to compute grand median) } \f /*FUNCTION dosigntest: compute sign test for two conditions */ /* ties are dropped and N is reduced if N > NSIGNTEST, then use normal approximation: apobin else use exact binomial probability */ dosigntest (cond1, cond2) int cond1, cond2; { float *data1 = Condat[cond1]; float *data2 = Condat[cond2]; int N = 0; /* number of cases in test */ int r = 0; /* number of cases first > second */ int i; double p; double apobin (), pobin (); statheader ("Binomial Sign Test"); for (i = 0; i < Count; i++) { if (data1[i] == data2[i]) continue; if (data1[i] > data2[i]) r++; N++; } printf ("\tNumber of cases %s is above %s: %3d\n", Condname[cond1], Condname[cond2], r); printf ("\tNumber of cases %s is below %s: %3d\n", Condname[cond1], Condname[cond2], N-r); if (r < N * 0.5) r = N - r; if (N > NSIGNTEST) { p = apobin (N, 1, 2, r); tprint ("One-tail probability approximation", p); noteyates (); } else { p = pobin (N, 1, 2, r); tprint ("One-tail probability (exact)", p); } } \f /*FUNCTION dowilcoxon: compute Wilcoxon test for two conditions */ /* T = min (sumposranks, sumnegranks) if N > NWILCOXON, use normal approximation (seems good even when N = 8) mean = N (N+1) / 4 se = sqrt (N (N+1) (2N+1) / 24 ) z = (T - U) / se (one tailed) */ dowilcoxon (cond1, cond2) int cond1, cond2; { float *data1 = Condat[cond1]; float *data2 = Condat[cond2]; double diff; /* data1 - data2 */ float *rank; /* absolute differences of conditions */ Boole *neg; /* is difference negative? */ int i; /* loop varioable for cases */ double sumnegranks = 0.0; /* sum of all negatively signed ranks */ double sumposranks = 0.0; /* sum of all positively signed ranks */ double T; /* Wilcoxin statistic */ int N = 0; /* number of non-zero differences */ double mean; double se; double z, poz (); rank = myalloc (float, Count); neg = myalloc (Boole, Count); if (rank == NULL || neg == NULL) ERRSPACE(Wilcoxon Test) statheader ("Wilcoxon Matched-Pairs Signed-Ranks Test"); printf (" Comparison of %s and %s\n", Condname[cond1], Condname[cond2]); for (i = 0; i < Count; i++) { diff = data1[i] - data2[i]; if (!fzero (diff)) { neg[N] = (diff < 0.0); rank[N] = fabs (diff); N++; } } if (N == 0) { printf ("\tHmmm, all these pairs seem to be the same.\n"); return; } if (ranksort (rank, NULL, NULL, N)) ERRMSG0 (could not rank data for Wilcoxon test) for (i = 0; i < N; i++) if (neg[i]) sumnegranks += rank[i]; else sumposranks += rank[i]; T = min (sumnegranks, sumposranks); tprint ("T (smaller ranksum of like signs)", T); tprint ("N (number of signed differences)", (double) N); mean = N * (N+1) * 0.25; se = sqrt (N * (N+1) * (2.0 * N + 1.0) / 24.0); z = fabs (T - mean); if (Yates) { z -= 0.5; if (z < 0.0) z = 0.0; } z /= se; tprint ("z", z); tprint ("One-tail probability approximation", 1.0 - poz (z)); if (Yates) noteyates (); if (N <= NWILCOXON) checktable ("T", "N", N); free ((char *) neg); free ((char *) rank); } \f /*FUNCTION dofriedman: multi-condition test */ dofriedman () { double R = 0.0; /* sum of squared sum of ranks in conds */ double chi_r; /* function of R and formula */ double pochisq (); int i, cond; float rank[MAXCOND]; /* ranks of each case computed here */ double sumrank[MAXCOND]; /* sum of ranks in a condition */ /* the following two vectors save allocation time for ranksort */ float sorted[MAXCOND]; /* sorted condition values */ int order[MAXCOND]; /* sorted order index numbers */ Boole needtable = FALSE; /* should we recommend looking at a table? */ for (cond = 0; cond < Nconds; cond++) sumrank[cond] = 0.0; for (i = 0; i < Count; i++) { for (cond = 0; cond < Nconds; cond++) rank[cond] = Condat[cond][i]; if (ranksort (rank, sorted, order, Nconds)) ERRMSG0 (could not rank data for Friedman test) for (cond = 0; cond < Nconds; cond++) sumrank[cond] += rank[cond]; } if (Rankmeans) { statheader ("Average Ranks"); for (cond = 0; cond < Nconds; cond++) printf ("\t\t%-12s %10.1f %10.2f\n", Condname[cond], sumrank[cond], sumrank[cond]/Count); } for (cond = 0; cond < Nconds; cond++) R += sumrank[cond] * sumrank[cond]; chi_r = 12.0 / (Count*Nconds*(Nconds+1)) * R - (3 * Count * (Nconds+1)); statheader ("Friedman Chi-Square Test for Ranks"); tprint ("Chi-square of ranks", chi_r); chiprint (chi_r, Nconds-1, pochisq (chi_r, Nconds - 1)); if (Nconds == 2) /* really, this is a two-tailed wilcoxon */ needtable = TRUE; if (Nconds == 3 && Count <= 9) needtable = TRUE; if (Nconds == 4 && Count <= 4) needtable = TRUE; if (needtable) checktable ("Friedman", "N", Count); } \f /*FUNCTION dospearman: compute & print rank-order correlation matrix */ /* WARNING: this procedure clobbers the original global data */ dospearman () { /* the following two vectors save allocation time for ranksort */ float *sorted; /* sorted condition values */ int *order; /* sorted order index numbers */ double cor (); double r; double pof (); /* prob of F ratio */ int cond; int cond2; order = myalloc (int, Count); sorted = myalloc (float, Count); if (order == NULL || sorted == NULL) ERRSPACE(Spearman Rho) for (cond = 0; cond < Nconds; cond++) if (ranksort (Condat[cond], sorted, order, Count)) ERRMSG0 (could not rank data for Spearman Rho) statheader ("Spearman Rank Correlation (rho) [corrected for ties]"); pcritrho (Count); if (Count < NSPEARMAN) checktable ("Spearman rho", "N", Count); if (Nconds == 2) { r = cor (Condat[0], Condat[1], Count); tprint ("rho", r); return; } #define LFORMAT "%-8.8s " #define SFORMAT "%8.8s " #define PFORMAT "%8.3f " printf (LFORMAT, ""); for (cond = 0; cond < Nconds; cond++) printf (SFORMAT, Condname[cond]); putchar ('\n'); for (cond = 0; cond < Nconds; cond++) { printf (SFORMAT, Condname[cond]); for (cond2 = 0; cond2 < Nconds; cond2++) { if (cond == cond2) printf (SFORMAT, ""); else { r = cor (Condat[cond], Condat[cond2], Count); printf (PFORMAT, r); } } putchar ('\n'); } printf (LFORMAT, ""); for (cond = 0; cond < Nconds; cond++) printf (SFORMAT, Condname[cond]); putchar ('\n'); free ((char *) sorted); free ((char *) order); } \f /*FUNCTION critcor: compute critical value for correlation */ /* t = r * sqrt (df / (1-r*r)); F = r*r * df / (1-r*r) r = sqrt (F / (df + F)) */ double critcor (n, p) int n; /* number of points */ double p; /* significance level */ { int df = n - 2; double critf (); /* critical F ratio */ double sqrt (); double r; double f; if (p <= 0.0 || p >= 1.0 || df < 1) return (0.0); f = critf (p, 1, df); r = sqrt (f / (df + f)); return (r); } pcritrho (n) int n; { double critcor (); /* compute critical correlation value */ double p05 = critcor (n, .05); double p01 = critcor (n, .01); tprint ("Critical r (.05) t approximation", p05); tprint ("Critical r (.01) t approximation", p01); }