|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T c
Length: 14322 (0x37f2)
Types: TextFile
Names: »contab.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/contab.c«
/* Copyright 1985 Gary Perlman */
#include "stat.h"
PGM(contab,Contingency Tables and Chi-Square,5.4,02/03/87)
/*
THINGS TO DO:
n**k chisq test (see winer)
Option to supply the expected values (complex!)
changes df's
Option to request specific tables and breakdowns
-t "A B C"
SOURCES:
BMD P4F (1981++)
Winer p. 857
Goodman paper in JASA
Siegel p. 96
Bradley p. 195
*/
\f
#ifndef I_DATA
#define I_DATA /* using integer data in mdmat */
#endif I_DATA
#define MAXLEV 40 /* maximum number of levels of factors */
#define MAXCHARS BUFSIZ /* maximum number of chars in lines */
#include "mdmat.h"
#define percent(freq1,freq2) ((100.0 * (freq1)) / (freq2))
extern double pochisq ();
#define SFORMAT "%8.8s" /* format for string labels */
#define LFORMAT "%-8.8s" /* format for left justified labels */
#define CFORMAT "%8d" /* format for cell counts */
#define EFORMAT "%8.2f" /* format for expected frequencies */
#define PFORMAT "%8.1f" /* format for percentages */
static char *Sformat = SFORMAT;
static char *Lformat = LFORMAT;
static char *Cformat = CFORMAT;
static char *Eformat = EFORMAT;
static char *Pformat = PFORMAT;
DATUM ConTable[MAXLEV][MAXLEV]; /* contingency table */
DATUM ConRow[MAXLEV]; /* row totals */
DATUM ConCol[MAXLEV]; /* column totals */
DATUM Total; /* sum of all frequencies */
double Expected[MAXLEV][MAXLEV]; /* expected frequencies */
Boole Exp0; /* any expected frequencies == 0? */
/* OPTIONS */
Boole PrExpected; /* should expected cell frequencies be printed? */
Boole PrDiffs; /* should cell differences be printed? */
Boole PrRows; /* should row percentages be printed? */
Boole PrCols; /* should column percentages be printed? */
Boole PrTotal; /* should total percentages be printed? */
Boole Blank = FALSE; /* should blank lines be used in tables? */
int Interfact = MAXFACT; /* maximum interaction to print */
Boole Debug = FALSE; /* should debugging information be printed? */
Boole DoYates = TRUE; /* do we apply Yates' correction? */
Boole Sigtest = TRUE; /* do we do significance test? */
#define SMALLCELL 5.0 /* can't have too many of these */
#define NFISHER 200 /* do exact test for N <= NFISHER */
Boole InfoVersion; /* print version information */
Boole InfoLimits; /* print program limits */
Boole InfoOptions; /* print usage information */
\f
/*FUNCTION chisq1: comput and print chisq value for 1 way table */
chisq1 (source)
Posint source;
{
/*FUTURE: add binomial exact test when df == 1 */
Posint fact;
Posint factor;
Posint level[MAXFACT];
int df;
Boole yates = FALSE;
Posint count;
double expect;
double diff;
double chival = 0.0;
for (factor = 0; factor < Nfactors; factor++)
if (member (factor, source)) /* identify the source factor numbers */
{
fact = factor;
df = (Nlevels[fact] - 1);
break;
}
if (df < 1)
{
WARNING (no degrees of freedom for test)
return;
}
if (DoYates && df == 1) /* correction not applied if df > 1 */
yates = TRUE;
for (factor = 0; factor < Nfactors; factor++)
level[factor] = 0;
expect = (double) Total / (double) Nlevels[fact];
printf (Lformat, Factname[fact]);
printf (Sformat, "count");
if (PrExpected)
printf (Sformat, "expect");
if (PrDiffs)
printf (Sformat, "diffs");
if (PrRows || PrCols || PrTotal)
printf (Sformat, "percent");
putchar ('\n');
do
{
count = 0;
do
count += Datax[mdaddr(level)];
while (mdnext (level, source, FALSE));
printf (Lformat, Levelname[fact][level[fact]]);
printf (Cformat, count);
if (PrExpected)
printf (Eformat, expect);
diff = count - expect;
if (yates)
{
diff = fabs (diff) - 0.5;
if (diff < 0.0) /* over-correction */
diff = 0.0;
}
if (PrDiffs)
printf (Eformat, diff);
if (PrRows || PrCols || PrTotal)
printf (Pformat, percent (count, Total));
putchar ('\n');
chival += diff*diff / expect;
}
while (mdnext (level, source, TRUE));
printf (Lformat, "Total");
printf (Cformat, Total);
putchar ('\n');
if (Sigtest)
{
if (yates)
printf ("NOTE: Yates' correction for continuity applied\n");
if (expect < SMALLCELL)
printf ("WARNING: expected frequency is less than %g\n", SMALLCELL);
chiprint (chival, df, pochisq (chival, df));
/* if df == 1, then do one and two-tail binomial test */
}
}
\f
/*FUNCTION chisq2: compute and print chisq value for 2 way table */
chisq2 (source)
Posint source;
{
Posint fact1, fact2;
Posint lev1 = 0, lev2 = 0; /* number of levels of factors */
Posint row, col;
Posint factor;
int df; /* degrees of freedom for test */
double diff; /* obtained - expected */
double p;
double value; /* will be chi square value */
Boole yates = FALSE; /* is yates' correction being applied? */
Boole fisher = FALSE; /* use fisher exact test? */
double fisher2x2 ();
int smallcount; /* count of cells with expected freqs < SMALLCELL */
if (Blank)
putchar ('\n');
for (factor = 0; factor < Nfactors; factor++)
if (member (factor, source)) /* identify the source factor numbers */
if (!lev1)
lev1 = Nlevels[fact1 = factor];
else
lev2 = Nlevels[fact2 = factor];
df = (lev1 - 1) * (lev2 - 1);
if (df < 1)
{
WARNING (no degrees of freedom for test)
return;
}
if (DoYates && df == 1) /* correction not applied if df > 1 */
yates = TRUE;
smallcount = 0;
for (row = 0; row < lev1; row++)
for (col = 0; col < lev2; col++)
if (Expected[row][col] < SMALLCELL)
smallcount++;
if (lev1 == 2 && lev2 == 2 && Total <= NFISHER)
fisher = TRUE;
value = 0.0;
for (row = 0; row < lev1; row++)
for (col = 0; col < lev2; col++)
{
diff = ConTable[row][col] - Expected[row][col];
if (yates)
{
diff = fabs (diff) - 0.5;
if (diff < 0.0) /* over-correction */
diff = 0.0;
}
value += diff*diff / Expected[row][col];
}
p = pochisq (value, df);
printf ("Analysis for %s x %s:\n", Factname[fact1], Factname[fact2]);
if (yates)
printf ("\tNOTE: Yates' correction for continuity applied\n");
if (smallcount)
printf ("\tWARNING: %d of %d cells had expected frequencies < %g\n",
smallcount, lev1*lev2, SMALLCELL);
chiprint (value, df, p);
if (fisher)
fishtest (ConTable[0][0], ConTable[0][1],
ConTable[1][0], ConTable[1][1]);
if (lev1 == 2 && lev2 == 2)
tprint ("phi Coefficient == Cramer's V", sqrt (value / Total));
else
tprint ("Cramer's V", sqrt (value / (Total * (min (lev1, lev2) - 1))));
tprint ("Contingency Coefficient", sqrt (value / (value + Total)));
}
\f
/*FUNCTION initial: returns local version of optind, index to first operand */
int
initial (argc, argv) char **argv;
{
extern char *optarg; /* option value accessed through this by getopt */
extern int optind; /* will be index to first operand */
int opterr = 0; /* count of number of errors */
int flag; /* option flag characters read in here */
char *optstring = /* getopt string to be filled in */
"bDi:syc:LOV";
while ((flag = getopt (argc, argv, optstring)) != EOF)
switch (flag)
{
default:
opterr++;
break;
case 'O': InfoOptions = TRUE; break;
case 'V': InfoVersion = TRUE; break;
case 'L': InfoLimits = TRUE; break;
/* put option cases here */
case 'b':
Blank = TRUE;
break;
case 'c':
for (; *optarg; optarg++)
switch (*optarg)
{
case 'e': PrExpected = TRUE; break;
case 'd': PrDiffs = TRUE; break;
case 'p': PrRows = PrCols = PrTotal = TRUE; break;
case 'c': PrCols = TRUE; break;
case 'r': PrRows = TRUE; break;
case 't': PrTotal = TRUE; break;
default:
fprintf (stderr, "%s: unknown cell entry '%c'\n",
argv[0], *optarg);
break;
}
break;
case 'D':
Debug = TRUE;
break;
case 'i':
if (setint (Argv0, flag, optarg, &Interfact, 1, MAXFACT))
opterr++;
break;
case 's':
Sigtest = FALSE;
break;
case 'y':
DoYates = FALSE;
break;
}
if (opterr)
USAGE ([-bsy] [-c cell contents (edprct)] [-i nfactors] [factor names])
usinfo ();
return (optind);
}
\f
/*FUNCTION main */
main (argc, argv) char **argv;
{
int firstname;
Posint ncells;
Posint source;
Posint nsources;
Posint nfactors;
int i;
ARGV0;
Maxlev = MAXLEV;
firstname = initial (argc, argv);
checkstdin ();
ncells = mdread (argc, argv, firstname);
nsources = 1 << Nfactors;
for (i = 0; i < ncells; i++)
Total += Datax[i];
if (Total == 0)
ERRMSG0 (No expected cell frequencies were read in)
printdesign (Total);
for (source = 0; source < nsources; source++)
if ((nfactors = setsize (source, Nfactors)) == 1)
{
putchar ('\n');
chisq1 (source);
}
for (source = 0; source < nsources; source++)
if ((nfactors=setsize (source, Nfactors)) == 2 && nfactors <= Interfact)
{
putchar ('\n');
printsource (source);
sumtab (source);
if (Sigtest)
{
if (Exp0 == TRUE)
WARNING (no chi-square: expected cell frequency == 0)
else
chisq2 (source);
}
}
for (source = 0; source < nsources; source++)
if ((nfactors=setsize (source, Nfactors)) > 2 && nfactors <= Interfact)
{
putchar ('\n');
printsource (source);
summary (source);
}
exit (0);
}
\f
/*FUNCTION summary: print summary of higher order design */
summary (source)
Posint source;
{
Posint level[MAXFACT];
Posint count;
Posint factor;
for (factor = 0; factor < Nfactors; factor++)
{
level[factor] = 0;
printf (Sformat, Factname[factor]);
}
putchar ('\n');
do {
count = 0;
do
count += Datax[mdaddr(level)];
while (mdnext (level, source, FALSE));
for (factor = 0; factor < Nfactors; factor++)
if (member (factor, source))
printf (Sformat, Levelname[factor][level[factor]]);
else
printf (Sformat, "");
printf (Cformat, count);
putchar ('\n');
}
while (mdnext (level, source, TRUE));
}
\f
/*FUNCTION sumtab: summarize contingency table */
sumtab (source)
Posint source;
{
Posint factor, level[MAXFACT];
DATUM count;
Posint row, col;
Posint fact1, fact2;
Posint lev1 = 0, lev2;
/* find factors in source and their levels */
for (factor = 0; factor < Nfactors; factor++)
{
level[factor] = 0;
if (member (factor, source)) /* identify the source factor numbers */
{
if (!lev1)
lev1 = Nlevels[fact1 = factor];
else
lev2 = Nlevels[fact2 = factor];
}
}
/* initialize contingency table */
for (row = 0; row < lev1; row++)
ConRow[row] = 0;
for (col = 0; col < lev2; col++)
ConCol[col] = 0;
for (row = 0; row < lev1; row++)
for (col = 0; col < lev2; col++)
ConTable[row][col] = 0;
do
do
ConTable[level[fact1]][level[fact2]] += Datax[mdaddr (level)];
while (mdnext (level, source, FALSE));
while (mdnext (level, source, TRUE));
for (row = 0; row < lev1; row++)
for (col = 0; col < lev2; col++)
{
count = ConTable[row][col];
ConCol[col] += count;
ConRow[row] += count;
}
doexpect (fact1, fact2);
table (fact1, fact2);
}
\f
/*FUNCTION table: print the contingency table */
table (fact1, fact2)
Posint fact1;
Posint fact2;
{
Posint row, col;
Posint lev1 = Nlevels[fact1];
Posint lev2 = Nlevels[fact2];
printf (Sformat, "");
for (col = 0; col < lev2; col++)
printf (Sformat, Levelname[fact2][col]);
printf (Sformat, "Totals");
putchar ('\n');
for (row = 0; row < lev1; row++)
{
if (Blank)
putchar ('\n');
printf (Lformat, Levelname[fact1][row]);
for (col = 0; col < lev2; col++)
printf (Cformat, ConTable[row][col]);
printf (Cformat, ConRow[row]);
putchar ('\n');
if (PrExpected)
{
printf (Lformat, " expect");
for (col = 0; col < lev2; col++)
printf (Eformat, Expected[row][col]);
putchar ('\n');
}
if (PrDiffs)
{
printf (Lformat, " diffs");
for (col = 0; col < lev2; col++)
printf (Eformat, ConTable[row][col] - Expected[row][col]);
putchar ('\n');
}
if (PrRows)
{
printf (Lformat, " row %");
for (col = 0; col < lev2; col++)
printf (Pformat, percent (ConTable[row][col], ConRow[row]));
printf (Pformat, percent (ConRow[row], Total));
putchar ('\n');
}
if (PrCols)
{
printf (Lformat, " col %");
for (col = 0; col < lev2; col++)
printf (Pformat, percent (ConTable[row][col], ConCol[col]));
putchar ('\n');
}
if (PrTotal)
{
printf (Lformat, " % tot");
for (col = 0; col < lev2; col++)
printf (Pformat, percent (ConTable[row][col], Total));
putchar ('\n');
}
}
if (Blank)
putchar ('\n');
printf (Lformat, "Totals");
for (col = 0; col < lev2; col++)
printf (Cformat, ConCol[col]);
printf (Cformat, Total);
putchar ('\n');
if (PrCols)
{
printf (Lformat, " col %");
for (col = 0; col < lev2; col++)
printf (Pformat, percent (ConCol[col], Total));
printf (Pformat, 100.0);
putchar ('\n');
}
}
\f
/*FUNCTION doexpect: fill expected cell frequency table with value */
doexpect (fact1, fact2)
Posint fact1;
Posint fact2;
{
Posint lev1 = Nlevels[fact1];
Posint lev2 = Nlevels[fact2];
int row, col;
double ftotal = Total; /* assumes this is non-zero */
Exp0 = FALSE;
for (row = 0; row < lev1; row++)
for (col = 0; col < lev2; col++)
{
Expected[row][col] = (ConCol[col] / ftotal) * ConRow[row];
if (fzero (Expected[row][col]))
Exp0 = TRUE;
}
}
\f
/*FUNCTION usinfo: print info about contab */
usinfo ()
{
if (InfoVersion)
pver (Version);
if (InfoLimits)
{
plim (Argv0);
const (MAXFACT, "maximum number of factors");
const (Maxlev, "maximum number of levels");
const (MAXCHARS, "maximum number of characters in lines");
}
if (InfoOptions)
{
ppgm (Argv0, Purpose);
lopt ('b', "insert blank lines in tables", Blank);
sopt ('c', "contents", "request cell contents (edprct)", "");
iopt ('i', "factors", "maximum number of factors in interactions", Interfact);
lopt ('s', "print significance tests", Sigtest);
lopt ('y', "use Yates correction when df == 1", DoYates);
oper ("[names]", "factor names", "A B ... DATA");
}
if (InfoVersion || InfoLimits || InfoOptions)
exit (0);
}