|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T d
Length: 12376 (0x3058)
Types: TextFile
Names: »desc.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/desc.c«
/* Copyright 1979 Gary Perlman */
#include "stat.h"
PGM(desc,Descriptive Statistics and Histograms,5.2,01/20/87)
/*
This program analyses single distributions of data.
It was written by Gary Perlman at UCSD August 1979.
*/
#ifdef SMALL_MEM
#define MAXBINS 250 /* maximum number of bins for tables */
#define MAXPOINTS 2500 /* maximum number of input points if storing */
#else
#define MAXBINS 1000 /* maximum number of bins for tables */
#define MAXPOINTS 10000 /* maximum number of input points if storing */
#endif
#define MAXCHARS 50 /* maximum number of chars in words */
Boole Stats; /* print statistics */
Boole Table; /* print a table of some sort */
Boole Histogram; /* print histogram */
Boole Frequencies; /* print frequency table */
Boole Proportions; /* print proportions table */
Boole Cumulative; /* make table cumulative */
Boole Storedata; /* store the data */
Boole Onepass; /* can run with one pass */
Boole Setmaximum; /* maximum value has been set */
Boole Setminimum; /* minimum value has been set */
Boole Setintwidth; /* interval width has been set */
Boole Variable; /* print stats in variable format */
Boole InfoVersion; /* print version information */
Boole InfoLimits; /* print program limits */
Boole InfoOptions; /* print usage information */
float Datax[MAXPOINTS]; /* data stored in here */
int freq[MAXBINS]; /* frequency counts stored in here */
int N; /* number of points read in */
double Sum; /* sum of all points read in */
double s2; /* sum of squares of all points read in */
double s3; /* sum of cubes of all points read in */
double s4; /* sum of x^4 of all points read in */
double Gmean; /* geometric mean */
double Hmean; /* harmonic mean */
Boole Allgtzero = TRUE; /* all points greater than zero? */
double F_null; /* null value for t test */
double Minx; /* min value of x */
double Maxx; /* max value of x */
int NAcount = 0; /* number of NA missing values */
int Undermin; /* number of points less than minimum */
int Overmax; /* number of points more than maximum */
double Intwidth; /* width of interval of frequency count bins */
double Minimum; /* minimum allowable value of x */
double Maximum; /* maximum allowable value of x */
main (argc, argv) int argc; char *argv[];
{
ARGV0;
initial (argc, argv);
checkstdin ();
input ();
if (Stats)
printstats ();
if (Table)
printtable ();
exit (0);
}
initial (argc, argv) int argc; char **argv;
{
extern int optind;
extern char *optarg;
int C;
int opterr = 0;
if (argc == 1)
{
Storedata = TRUE;
Stats = TRUE;
return;
}
while ((C = getopt (argc, argv, "cfF:t:hi:m:M:posvOVL")) != EOF)
switch (C)
{
case 'O': InfoOptions = TRUE; break;
case 'V': InfoVersion = TRUE; break;
case 'L': InfoLimits = TRUE; break;
case 'c': Cumulative = TRUE; break;
case 'f': Frequencies = Table = TRUE; break;
case 't':
case 'F':
if (setreal (Argv0, C, optarg, &F_null))
opterr++;
Stats = TRUE;
break;
case 'h': Histogram = Frequencies = Table = TRUE; break;
case 'i':
if (setreal (Argv0, C, optarg, &Intwidth))
opterr++;
if (Intwidth < FZERO)
{
fprintf (stderr, "%s: interval width (%g) must be > 0\n",
Argv0, Intwidth);
opterr++;
}
Setintwidth = Table = TRUE;
break;
case 'm':
if (setreal (Argv0, C, optarg, &Minimum))
opterr++;
Setminimum = TRUE;
break;
case 'M':
if (setreal (Argv0, C, optarg, &Maximum))
opterr++;
Setmaximum = TRUE;
break;
case 'p': Proportions = Table = TRUE; break;
case 'o': Storedata = TRUE;
case 's': Stats = TRUE; break;
case 'v': Variable = Stats = TRUE; break;
default: opterr++;
}
if (opterr)
USAGE ([-cfhopsv] [-i interval] [-m min] [-M max] [-F|-t Ho])
usinfo ();
ERROPT (optind)
if (Table)
{
if (Setintwidth && Setminimum)
Onepass = TRUE;
else
Storedata = TRUE;
if (!Frequencies && !Proportions)
Histogram = TRUE;
}
else if (Setminimum || Setmaximum)
Stats = TRUE;
}
input ()
{
double x; /* each datum read in here */
double x2; /* square of x */
char stringx[MAXCHARS]; /* string version of x read in here */
while (getword (stringx, stdin))
{
if (isna (stringx))
{
NAcount++;
continue;
}
if (!number (stringx))
ERRNUM (stringx,input value)
x = atof (stringx);
if (Setminimum && x < Minimum)
{
Undermin++;
continue;
}
if (Setmaximum && x > Maximum)
{
Overmax++;
continue;
}
if (N == 0)
{
Maxx = x;
Minx = x;
}
if (Storedata)
if (N == MAXPOINTS)
{
WARNING (too much data for storing)
Storedata = FALSE;
}
else
Datax[N] = x;
if (Onepass)
freq[bindex(x)]++;
x2 = x*x;
Sum += x;
s2 += x2;
s3 += x2*x;
s4 += x2*x2;
if (Allgtzero && x > FZERO)
{
Gmean += log (x);
Hmean += 1.0 / x;
}
else
Allgtzero = FALSE;
if (x > Maxx)
Maxx = x;
if (x < Minx)
Minx = x;
N++;
}
if (N <= 1)
ERRDATA
}
#ifndef MSDOS /* don't need this for Lattice fqsort */
int
fltcmp (f1, f2)
float *f1, *f2;
{
if (*f1 < *f2)
return (-1);
if (*f1 == *f2)
return (0);
return (1);
}
#endif
#define vprint(label,format,var) printf ("label = %format\n", var)
printstats ()
{
double pof (); /* probability of F ratio */
double centile (); /* percentile function */
double M = Sum/N; /* mean */
double M2 = M*M; /* square of mean */
double var = (s2 - M*Sum)/(N-1); /* variance */
double sd = sqrt (var); /* standard deviation */
double sk; /* skew */
double kt; /* kurtosis */
double q1, q3; /* first and third quartiles */
double median; /* 50th percentile */
char *line =
"------------------------------------------------------------";
double tval, fval, prob;
if (var < FZERO)
ERRMSG2 (All these %d numbers equal %.4g, N, M)
sk = (s3 - 3.0*M*s2 + 3.0*M2*Sum - M2*Sum)/(N*var*sd);
kt = (s4-4.*M*s3+6.*M2*s2-4.*M2*M*Sum+N*M2*M2)/(N*var*var);
if (Storedata)
{
#ifndef MSDOS
qsort ((char *) Datax, N, sizeof (float), fltcmp);
#else
fqsort (Datax, N);
#endif
median = centile (50, Datax, N);
q1 = centile (25, Datax, N),
q3 = centile (75, Datax, N);
}
/* PRINT FREQUENCY COUNTS */
if (!Variable)
puts (line);
if (Variable)
{
vprint (undermin,d,Undermin);
vprint (count,d,N);
vprint (overmax,d,Overmax);
vprint (missing,d,NAcount);
vprint (sum,g,Sum);
vprint (sumsq,g,s2);
}
else
{
printf ("%12s%12s%12s%12s%12s\n",
"Under Range", "In Range", "Over Range", "Missing", "Sum");
printf ("%12d%12d%12d%12d%12.3f\n", Undermin, N, Overmax, NAcount, Sum);
puts (line);
}
/* PRINT CENTRAL TENDENCY */
if (Variable)
{
vprint (mean,g,M);
if (Storedata)
vprint (median,g,median);
vprint (midpoint,g,(Maxx+Minx)/2.0);
if (Allgtzero == TRUE)
{
vprint (geomean,g,exp (Gmean/N));
vprint (harmean,g,N/Hmean);
}
}
else
{
printf ("%12s%12s%12s%12s%12s\n",
"Mean", "Median", "Midpoint", "Geometric", "Harmonic");
printf ("%12.3f", M);
if (Storedata)
printf ("%12.3f", median);
else
printf ("%12s", "");
printf ("%12.3f", (Maxx+Minx)/2.0);
if (Allgtzero == TRUE)
printf("%12.3f%12.3f\n", exp (Gmean/N), N/Hmean);
else
putchar ('\n');
puts (line);
}
/* PRINT VARIABILITY */
if (Variable)
{
vprint (sd,g,sd);
if (Storedata)
vprint (quartdev,g,(q3-q1)/2.0);
vprint (range,g,Maxx-Minx);
vprint (semean,g,sqrt (var/N));
}
else
{
printf ("%12s%12s%12s%12s\n", "SD", "Quart Dev", "Range", "SE mean");
printf("%12.3f", sd);
if (Storedata)
printf ("%12.3f", (q3-q1)/2.0);
else
printf ("%12s", "");
printf ("%12.3f", Maxx-Minx);
printf ("%12.3f\n", sqrt(var/N));
puts (line);
}
/* PRINT FIVENUMS */
if (Variable)
{
vprint (min,g,Minx);
if (Storedata)
{
vprint (q1,g,q1);
vprint (q2,g,median);
vprint (q3,g,q3);
}
vprint (max,g,Maxx);
}
else
{
printf ("%12s", "Minimum");
if (Storedata)
printf ("%12s%12s%12s", "Quartile 1", "Quartile 2", "Quartile 3");
printf ("%12s\n", "Maximum");
printf ("%12.3f", Minx);
if (Storedata)
printf ("%12.3f%12.3f%12.3f", q1, median, q3);
printf ("%12.3f\n", Maxx);
puts (line);
}
if (Variable)
{
vprint (skew,g,sk);
vprint (kurt,g,kt);
}
else
{
printf ("%12s%12s%12s%12s\n", "Skew", "SD Skew", "Kurtosis", "SD Kurt");
printf ("%12.3f%12.3f%12.3f%12.3f\n",
sk, sqrt (6.0/N), kt, sqrt (24.0/N));
puts (line);
}
tval = (M - F_null)/(sqrt (var/N));
fval = tval*tval;
prob = pof (fval, 1, N-1);
if (Variable)
{
vprint (nullmean,g,F_null);
vprint (t,g,tval);
vprint (probt,g,prob);
vprint (F,g,fval);
vprint (probF,g,prob);
}
else
{
printf ("%12s%12s%12s%12s%12s\n",
"Null Mean", "t", "prob (t)", "F", "prob (F)");
printf ("%12.3f%12.3f%12.3f%12.3f%12.3f\n",
F_null, tval, prob, fval, prob);
puts (line);
}
}
printtable ()
{
register int point; /* looping variable */
register int i; /* looping variable */
int maxindex; /* maximum index for Maxx */
double midpoint; /* midpoint of each interval */
int cumf = 0; /* cumulative frequency */
double fcumf = 0.0; /* floating cumulative frequency */
if (!Setminimum)
Minimum = floor (Minx);
if (!Setmaximum)
Maximum = Maxx;
if (!Setintwidth)
{
Intwidth = (Maxx-Minimum)/sqrt(2.0*N);
if (fabs (Intwidth) > 1.0)
Intwidth = floor (Intwidth);
}
if (!Onepass)
for (point=0; point<N; point++)
freq[ bindex ( Datax[point] ) ]++;
midpoint = Minimum - Intwidth/2.0;
maxindex = bindex (Maximum);
printf ("%12s", "Midpt");
if (Frequencies)
{
printf ("%8s", "Freq");
if (Cumulative)
printf ("%8s", "Cum");
}
if (Proportions)
{
printf ("%8s", "Prop");
if (Cumulative)
printf ("%8s", "Cum");
}
putchar ('\n');
for (i = 0; i <= maxindex; i++)
{
printf ("%12.3f", midpoint += Intwidth);
if (Frequencies)
{
printf ("%8d", freq[i]);
if (Cumulative)
printf ("%8d", cumf += freq[i]);
}
if (Proportions)
{
printf ("%8.3f", freq[i]*1.0/N);
if (Cumulative)
{
fcumf += freq[i];
printf ("%8.3f", fcumf/N);
}
}
if (Histogram)
{
putchar (' ');
for (point = 1; point <= freq[i]; point++)
putchar ('*');
}
putchar ('\n');
}
}
int
bindex (xval) float xval;
{
int answer;
float findex;
if (xval == Minimum)
return (0);
findex = (xval - Minimum)/Intwidth;
if (floor (findex) == findex)
answer = findex - 1.0;
else
answer = findex;
if (answer >= MAXBINS)
ERRMSG1 (bin[%d] is out of range, answer)
return (answer);
}
usinfo ()
{
if (InfoVersion)
pver (Version);
if (InfoLimits)
{
plim (Argv0);
const (MAXPOINTS, "maximum number of data points");
const (MAXBINS, "maximum number of frequency bins");
const (MAXCHARS, "maximum number of characters in input numbers");
}
if (InfoOptions)
{
ppgm (Argv0, Purpose);
lopt ('c', "cumulative frequencies or proportions", Cumulative);
lopt ('f', "request table of frequencies", Frequencies);
ropt ('F', "Ho", "F-test against mean Ho", F_null);
lopt ('h', "request a histogram", Histogram);
ropt ('i', "width","interval width for tables & histograms", Intwidth);
ropt ('m', "min", "minimum allowable value", Minimum);
ropt ('M', "max", "maximum allowable value", Maximum);
lopt ('o', "request order statistics", Storedata);
lopt ('p', "request table of proportions", Proportions);
lopt ('s', "request summary statistics", Stats);
ropt ('t', "Ho", "t-test against mean Ho", F_null);
lopt ('v', "print statistics in name=value format", Variable);
}
if (InfoVersion || InfoLimits || InfoOptions)
exit (0);
}