DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - download
Index: ┃ T r

⟦6addfe169⟧ TextFile

    Length: 13317 (0x3405)
    Types: TextFile
    Names: »rankrel.c«

Derivation

└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
    └─ ⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/rankrel.c« 

TextFile

/* Copyright 1987 Gary Perlman */

#include "stat.h"

PGM(rankrel,Rank Order Analysis for Related Conditions,5.2,02/04/87)

/*
	matched groups median test
*/

/*
	rankrel centile getopt setreal number prodlist \
		primes chisq z setint ranksort ordstat
*/
#define	PFVEC(vec,count) \
	{ \
	int 	ind; \
	for(ind=0;ind<count;ind++) \
		printf("%g ", vec[ind]); \
	printf ("\n"); \
	}

/* critical N's over which to use approximations */
#define	NWILCOXON    16      /* according to Snedecor & Cochran */
#define	NSIGNTEST    25      /* according to Siegel */
#define	NSPEARMAN    10      /* according to Siegel */

#define	MAXCOND	     20       /* maximum number of conditions */
#define MAXCHARS BUFSIZ       /* maximum number of chars in lines */
#define	MAXDATA     100       /* max number of input data cases */
int 	Maxdata = MAXDATA;

char	*Condname[MAXCOND];   /* condition names */
float	*Condat[MAXCOND];     /* pointers into all data for each condition */
int 	Nconds;               /* number of conditions == ncols */
int 	Count;                /* number of data in each condition */
int 	NAcount[MAXCOND];     /* number of NA missing values */
int 	TNAcount;             /* number of missing cases */

/* OPTIONS */
Boole	InfoVersion;          /* print version information */
Boole	InfoLimits;           /* print program limits */
Boole	InfoOptions;          /* print usage information */
Boole	Rankmeans;            /* print average ranks for conditions? */
Boole	Yates = TRUE;         /* apply Yates' correction for continuity */
Boole	Docomps = TRUE;       /* do comparison tests? **/
#define	noteyates()	printf("\tNOTE: Yates' correction for continuity applied\n")

#define	statheader(header) printf ("\n%s:\n", header)

\f

/*FUNCTION main: rankrel */
main (argc, argv) char **argv;
	{
	ARGV0;
	initial (argc, argv);
	checkstdin ();
	readdata ();
	summarize ();
	if (Nconds > 1)
		{
		if (Docomps)
			{
			if (Nconds == 2)
				{
				dosigntest (0, 1);
				dowilcoxon (0, 1);
				}
			dofriedman ();
			}
		dospearman ();
		}
	exit (SUCCESS);
	}

\f

/*FUNCTION initial: set options and condition names */
initial (argc, argv) char **argv;
	{
	extern	char	*optarg;
	extern	int 	optind;
	int 	errflg = 0;
	int 	C;
	int 	cond;
	while ((C = getopt (argc, argv, "c:rsyLOV")) != EOF)
		switch (C)
			{
			case 'O': InfoOptions = TRUE; break;
			case 'V': InfoVersion = TRUE; break;
			case 'L': InfoLimits = TRUE; break;
			case 'c':
				if (setint (Argv0, 'c', optarg, &Maxdata, 1, MAXINT))
					errflg++;
				break;
			case 'r':
				Rankmeans = TRUE;
				break;
			case 's':
				Docomps = FALSE;
				break;
			case 'y':
				Yates = FALSE;
				break;
			default: errflg++; break;
			}
	if (errflg)
		USAGE ([-ry] [-c maxcases] [names])
	usinfo ();
	for (cond = 0; optind + cond < argc; cond++)
		{
		if (cond >= MAXCOND)
			ERRMANY (condition names,MAXCOND)
		Condname[cond] = argv[optind+cond];
		}
	}

\f

/*FUNCTION usinfo: online help */
usinfo ()
	{
	if (InfoVersion)
		pver (Version);
	if (InfoLimits)
		{
		plim (Argv0);
		const (MAXCOND,  "maximum number of conditions");
		const (Maxdata,  "maximum number of input data points");
		const (MAXCHARS, "maximum number of characters in input lines");
		}
	if (InfoOptions)
		{
		ppgm (Argv0, Purpose);
		iopt ('c', "maxcases", "maximum number of input cases", Maxdata);
		lopt ('r', "print average ranks for conditions", Rankmeans);
		lopt ('s', "run comparative significance tests", Docomps);
		lopt ('y', "use Yates' correction for continuity", Yates);
		oper ("[names]", "condition names", "Cond-1 Cond-2 ...");
		}
	if (InfoVersion || InfoLimits || InfoOptions)
		exit (SUCCESS);
	}

\f

/*FUNCTION newname: make a new default name for a condition */
char *
newname (cond)
int 	cond;
	{
	char	*name = myalloc (char, 10);
	VOID sprintf (name, "Cond-%d", cond);
	return (name);
	}

\f

/*FUNCTION checktable: tell analyst to look for value in a table */
checktable (stat, parname, parval)
char	*stat;
char	*parname;
int 	parval;
	{
	printf ("\tCheck a table for %s", stat);
	if (parname && *parname)
		printf (" with %s = %d", parname, parval);
	putchar ('\n');
	}

\f

/*FUNCTION: readdata: read in data into global arrays */
readdata ()
	{
	char	line[MAXCHARS];
	char	*sarray[MAXCOND];
	int 	ncols;
	int 	cond;
	Boole	dropcase;
	
	while (gets (line))
		{
		ncols = parselin (line, sarray, MAXCOND);
		if (ncols == 0)
			continue;
		dropcase = FALSE;
		for (cond = 0; cond < ncols; cond++)
			if (isna (sarray[cond]))
				{
				NAcount[cond]++;
				dropcase = TRUE;
				}
		if (dropcase == TRUE)
			{
			TNAcount++;
			continue;
			}
		if (Nconds == 0) /* initialize */
			{
			Nconds = ncols;
			if (Nconds > MAXCOND)
				ERRMANY (conditions,MAXCOND)
			for (cond = 0; cond < Nconds; cond++)
				{
				Condat[cond] = myalloc (float, Maxdata);
				if (Condat[cond] == NULL)
					ERRSPACE (data)
				}
			}
		if (Count == Maxdata)
			ERRMANY (cases,Maxdata)
		if (ncols != Nconds)
			ERRRAGGED
		for (cond = 0; cond < Nconds; cond++)
			if (number (sarray[cond]))
				Condat[cond][Count] = atof (sarray[cond]);
			else
				ERRNUM (sarray[cond],input value)
		Count++;
		}
	if (Count <= 1)
		ERRDATA
	for (cond = 0; cond < Nconds; cond++)
		if (Condname[cond] == NULL || *Condname[cond] == '\0')
			Condname[cond] = newname (cond+1);
	}

\f

/*FUNCTION summarize: print summary statistics */
summarize ()
	{
	int 	cond;
	int 	i;
	int 	allcount = 0;
	float	*alldata = myalloc (float, Nconds * Count);
	int 	tna = 0;
	
	if (TNAcount)
		printf ("Cases with missing data: %d ignored\n", TNAcount);
	for (cond = 0; cond < Nconds; cond++)
		tna += NAcount[cond];
	for (cond = 0; cond < Nconds; cond++)
		{
		if (alldata)
			for (i = 0; i < Count; i++)
				alldata[allcount++] = Condat[cond][i];
		ordstat (Condat[cond], Count, cond, Condname[cond], NAcount[cond]);
		}
	if (alldata != NULL)
		{
		if (Nconds > 1)
			ordstat (alldata, Nconds*Count, Nconds, "Total", tna);
		free ((char *) alldata);
		}
	else
		WARNING (not enough space to compute grand median)
	}

\f

/*FUNCTION dosigntest: compute sign test for two conditions */
/*
	ties are dropped and N is reduced
	if N > NSIGNTEST, then use normal approximation: apobin
	else use exact binomial probability
*/
dosigntest (cond1, cond2)
int 	cond1, cond2;
	{
	float	*data1 = Condat[cond1];
	float	*data2 = Condat[cond2];
	int 	N = 0;                   /* number of cases in test */
	int 	r = 0;                   /* number of cases first > second */
	int 	i;
	double	p;
	double	apobin (), pobin ();
	
	statheader ("Binomial Sign Test");
	
	for (i = 0; i < Count; i++)
		{
		if (data1[i] == data2[i])
			continue;
		if (data1[i] > data2[i])
			r++;
		N++;
		}
	printf ("\tNumber of cases %s is above %s: %3d\n",
		Condname[cond1], Condname[cond2], r);
	printf ("\tNumber of cases %s is below %s: %3d\n",
		Condname[cond1], Condname[cond2], N-r);
	if (r < N * 0.5)
		r = N - r;
	if (N > NSIGNTEST)
		{
		p = apobin (N, 1, 2, r);
		tprint ("One-tail probability approximation", p);
		noteyates ();
		}
	else
		{
		p = pobin (N, 1, 2, r);
		tprint ("One-tail probability (exact)", p);
		}
	}

\f

/*FUNCTION dowilcoxon: compute Wilcoxon test for two conditions */
/*
	T = min (sumposranks, sumnegranks)
	if N > NWILCOXON, use normal approximation (seems good even when N = 8)
		mean = N (N+1) / 4
		se = sqrt (N (N+1) (2N+1) / 24 )
		z = (T - U) / se      (one tailed)
*/
dowilcoxon (cond1, cond2)
int 	cond1, cond2;
	{
	float	*data1 = Condat[cond1];
	float	*data2 = Condat[cond2];
	double	diff;                      /* data1 - data2 */
	float	*rank;                     /* absolute differences of conditions */
	Boole	*neg;                      /* is difference negative? */
	int 	i;                         /* loop varioable for cases */
	double	sumnegranks = 0.0;         /* sum of all negatively signed ranks */
	double	sumposranks = 0.0;         /* sum of all positively signed ranks */
	double	T;                         /* Wilcoxin statistic */
	int 	N = 0;                     /* number of non-zero differences */
	double	mean;
	double	se;
	double	z, poz ();
	
	rank = myalloc (float, Count);
	neg = myalloc (Boole, Count);
	if (rank == NULL || neg == NULL)
		ERRSPACE(Wilcoxon Test)
	
	statheader ("Wilcoxon Matched-Pairs Signed-Ranks Test");
	printf ("    Comparison of %s and %s\n", Condname[cond1], Condname[cond2]);
	for (i = 0; i < Count; i++)
		{
		diff = data1[i] - data2[i];
		if (!fzero (diff))
			{
			neg[N] = (diff < 0.0);
			rank[N] = fabs (diff);
			N++;
			}
		}
	if (N == 0)
		{
		printf ("\tHmmm, all these pairs seem to be the same.\n");
		return;
		}
	if (ranksort (rank, NULL, NULL, N))
		ERRMSG0 (could not rank data for Wilcoxon test)
	for (i = 0; i < N; i++)
		if (neg[i])
			sumnegranks += rank[i];
		else
			sumposranks += rank[i];
	T = min (sumnegranks, sumposranks);
	tprint ("T (smaller ranksum of like signs)", T);
	tprint ("N (number of signed differences)", (double) N);
	mean = N * (N+1) * 0.25;
	se = sqrt (N * (N+1) * (2.0 * N + 1.0) / 24.0);
	z = fabs (T - mean);
	if (Yates)
		{
		z -= 0.5;
		if (z < 0.0)
			z = 0.0;
		}
	z /= se;
	tprint ("z", z);
	tprint ("One-tail probability approximation", 1.0 - poz (z));
	if (Yates)
		noteyates ();
	if (N <= NWILCOXON)
		checktable ("T", "N", N);
	free ((char *) neg);
	free ((char *) rank);
	}

\f

/*FUNCTION dofriedman: multi-condition test */
dofriedman ()
	{
	double	R = 0.0;            /* sum of squared sum of ranks in conds */
	double	chi_r;              /* function of R and formula */
	double	pochisq ();
	int 	i, cond;
	float	rank[MAXCOND];      /* ranks of each case computed here */
	double	sumrank[MAXCOND];   /* sum of ranks in a condition */
	/* the following two vectors save allocation time for ranksort */
	float	sorted[MAXCOND];    /* sorted condition values */
	int 	order[MAXCOND];     /* sorted order index numbers */
	Boole	needtable = FALSE;  /* should we recommend looking at a table? */
	
	for (cond = 0; cond < Nconds; cond++)
		sumrank[cond] = 0.0;
	for (i = 0; i < Count; i++)
		{
		for (cond = 0; cond < Nconds; cond++)
			rank[cond] = Condat[cond][i];
		if (ranksort (rank, sorted, order, Nconds))
			ERRMSG0 (could not rank data for Friedman test)
		for (cond = 0; cond < Nconds; cond++)
			sumrank[cond] += rank[cond];
		}
	if (Rankmeans)
		{
		statheader ("Average Ranks");
		for (cond = 0; cond < Nconds; cond++)
			printf ("\t\t%-12s %10.1f %10.2f\n",
				Condname[cond], sumrank[cond], sumrank[cond]/Count);
		}
	for (cond = 0; cond < Nconds; cond++)
		R += sumrank[cond] * sumrank[cond];
	chi_r = 12.0 / (Count*Nconds*(Nconds+1)) * R - (3 * Count * (Nconds+1));
	statheader ("Friedman Chi-Square Test for Ranks");
	tprint ("Chi-square of ranks", chi_r);
	chiprint (chi_r, Nconds-1, pochisq (chi_r, Nconds - 1));
	if (Nconds == 2) /* really, this is a two-tailed wilcoxon */
		needtable = TRUE;
	if (Nconds == 3 && Count <= 9)
		needtable = TRUE;
	if (Nconds == 4 && Count <= 4)
		needtable = TRUE;
	if (needtable)
		checktable ("Friedman", "N", Count);
	}

\f

/*FUNCTION dospearman: compute & print rank-order correlation matrix */
/*
	WARNING: this procedure clobbers the original global data
*/	
dospearman ()
	{
	/* the following two vectors save allocation time for ranksort */
	float	*sorted;    /* sorted condition values */
	int 	*order;     /* sorted order index numbers */
	double	cor ();
	double	r;
	double	pof ();                   /* prob of F ratio */
	int 	cond;
	int 	cond2;
	
	order = myalloc (int, Count);
	sorted = myalloc (float, Count);
	if (order == NULL || sorted == NULL)
		ERRSPACE(Spearman Rho)
	for (cond = 0; cond < Nconds; cond++)
		if (ranksort (Condat[cond], sorted, order, Count))
			ERRMSG0 (could not rank data for Spearman Rho)
	
	statheader ("Spearman Rank Correlation (rho) [corrected for ties]");
	pcritrho (Count);
	if (Count < NSPEARMAN)
		checktable ("Spearman rho", "N", Count);
	if (Nconds == 2)
		{
		r = cor (Condat[0], Condat[1], Count);
		tprint ("rho", r);
		return;
		}
	
#define	LFORMAT    "%-8.8s "
#define	SFORMAT    "%8.8s "
#define	PFORMAT    "%8.3f "
	printf (LFORMAT, "");
	for (cond = 0; cond < Nconds; cond++)
		printf (SFORMAT, Condname[cond]);
	putchar ('\n');
	for (cond = 0; cond < Nconds; cond++)
		{
		printf (SFORMAT, Condname[cond]);
		for (cond2 = 0; cond2 < Nconds; cond2++)
			{
			if (cond == cond2)
				printf (SFORMAT, "");
			else
				{
				r = cor (Condat[cond], Condat[cond2], Count);
				printf (PFORMAT, r);
				}
			}
		putchar ('\n');
		}
	printf (LFORMAT, "");
	for (cond = 0; cond < Nconds; cond++)
		printf (SFORMAT, Condname[cond]);
	putchar ('\n');
	
	free ((char *) sorted);
	free ((char *) order);
	}

\f

/*FUNCTION critcor: compute critical value for correlation */
/*
	t = r * sqrt (df / (1-r*r));
	F = r*r * df / (1-r*r)
	r = sqrt (F / (df + F))
*/
double
critcor (n, p)
int 	n;   /* number of points */
double	p;   /* significance level */
	{
	int 	df = n - 2;
	double	critf ();         /* critical F ratio */
	double	sqrt ();
	double	r;
	double	f;
	
	if (p <= 0.0 || p >= 1.0 || df < 1)
		return (0.0);
	f = critf (p, 1, df);
	r = sqrt (f / (df + f));
	return (r);
	}

pcritrho (n)
int 	n;
	{
	double	critcor (); /* compute critical correlation value */
	double	p05 = critcor (n, .05);
	double	p01 = critcor (n, .01);
	tprint ("Critical r (.05) t approximation", p05);
	tprint ("Critical r (.01) t approximation", p01);
	}