|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T d
Length: 11833 (0x2e39) Types: TextFile Names: »desc.c«
└─⟦87ddcff64⟧ Bits:30001253 CPHDIST85 Tape, 1985 Autumn Conference Copenhagen └─⟦this⟧ »cph85dist/stat/src/desc.c«
#include "unixstat.h" PGM(desc,descriptive statistics,5.0,3/3/85) /* Copyright (c) 1982 Gary Perlman (see Copyright file) */ /* This program analyses single distributions of data. It was written by Gary Perlman at UCSD August 1979. STRUCTURE OF DESC main input bindex printstats percentile printtable bindex ALPHABETICAL ANNOTATION bindex (xval) returns the bin number xval should go into input () reads from standard input and sets N, sums of powers, minimum, maximum might store frequencies in freq array percentile (perc, vector, n) returns the perc percentile of n long vector, vector printstats () prints all summary statistics printtable () prints histogram, frequency or proportion table */ #ifdef SMALL_MEM #define MAXBINS 250 /* maximum number of bins for tables */ #define MAXPOINTS 2500 /* maximum number of input points if storing */ #else #define MAXBINS 1000 /* maximum number of bins for tables */ #define MAXPOINTS 10000 /* maximum number of input points if storing */ #endif typedef int BOOL; /* no boolean type in C */ #define TRUE 1 #define FALSE 0 BOOL stats; /* print statistics */ BOOL ftest; /* print ftest */ BOOL table; /* print a table of some sort */ BOOL histogram; /* print histogram */ BOOL frequencies; /* print frequency table */ BOOL proportions; /* print proportions table */ BOOL cumulative; /* make table cumulative */ BOOL storedata; /* store the data */ BOOL onepass; /* can run with one pass */ BOOL setmaximum; /* maximum value has been set */ BOOL setminimum; /* minimum value has been set */ BOOL setintwidth; /* interval width has been set */ BOOL variable; /* print stats in variable format */ float datax[MAXPOINTS]; /* data stored in here */ int freq[MAXBINS]; /* frequency counts stored in here */ int N; /* number of points read in */ double sum; /* sum of all points read in */ double s2; /* sum of squares of all points read in */ double s3; /* sum of cubes of all points read in */ double s4; /* sum of x^4 of all points read in */ double gmean; /* geometric mean */ double hmean; /* harmonic mean */ int ngtzero; /* number of points greater than zero */ double f_null; /* null value for t test */ double minx; /* min value of x */ double maxx; /* max value of x */ int undermin; /* number of points less than minimum */ int overmax; /* number of points more than maximum */ double intwidth; /* width of interval of freqency count bins */ double minimum; /* minimum allowable value of x */ double maximum; /* maximum allowable value of x */ main (argc, argv) int argc; char *argv[]; { ARGV0; initial (argc, argv); checkstdin (Argv0); input (); if (stats) printstats (); if (table) printtable (); exit (0); } initial (argc, argv) int argc; char **argv; { extern int optind; extern char *optarg; int C; int opterr = 0; if (argc == 1) { stats = storedata = TRUE; return; } #define OPTSTRING "cfF:t:hi:m:M:poSsv" while ((C = getopt (argc, argv, OPTSTRING)) != EOF) switch (C) { case 'c': cumulative = TRUE; break; case 'f': frequencies = table = TRUE; break; case 't': case 'F': if (!number (optarg)) ERRNUM (optarg,null mean) f_null = atof (optarg); ftest = stats = TRUE; break; case 'h': histogram = frequencies = table = TRUE; break; case 'i': if (!number (optarg)) ERRNUM (optarg,interval width) intwidth = atof (optarg); setintwidth = table = TRUE; if (intwidth <= 0.0) ERRVAL (g,intwidth,interval width) break; case 'm': if (!number (optarg)) ERRNUM (optarg,minimum value) minimum = atof (optarg); setminimum = TRUE; break; case 'M': if (!number (optarg)) ERRNUM (optarg,maximum value) maximum = atof (optarg); setmaximum = TRUE; break; case 'p': proportions = table = TRUE; break; case 'S': case 'o': storedata = TRUE; case 's': stats = TRUE; break; case 'v': variable = stats = TRUE; break; default: opterr++; } if (opterr) USAGE ([-cfphsov] [-i interval] [-m min] [-M max] [-F|t Ho]) ERROPT (optind) if (table) { if (setintwidth && setminimum) onepass = TRUE; else storedata = TRUE; if (!frequencies && !proportions) histogram = TRUE; } else if (setminimum || setmaximum) stats = TRUE; } input () { double x; /* each datum read in here */ double x2; /* square of x */ char stringx[50]; /* string version of x read in here */ while (getword (stringx, stdin)) { if (!number (stringx)) ERRNUM (stringx,input value) x = atof (stringx); if (setminimum && x < minimum) {undermin++; continue;} if (setmaximum && x > maximum) {overmax++; continue;} if (N == 0) { minx = maxx = x; } if (storedata) if (N == MAXPOINTS) { WARNING (too much data for storing) storedata = FALSE; } else datax[N] = x; if (onepass) freq[bindex(x)]++; x2 = x*x; sum += x; s2 += x2; s3 += x2*x; s4 += x2*x2; if (x > 0.0) { gmean += log (x); hmean += 1.0 / x; ngtzero++; } if (x > maxx) maxx = x; if (x < minx) minx = x; N++; } if (N <= 1) ERRDATA } #define vprint(label,format,var) printf ("label = %format\n", var) printstats () { double pof (); /* probability of F ratio */ double percentile (); /* percentile function */ #ifndef MSDOS /* don't need to compare floats for qsort */ int fltcmp (); /* compares two float numbers */ #endif double M = sum/N; /* mean */ double M2 = M*M; /* square of mean */ double var = (s2 - M*sum)/(N-1); /* variance */ double sd = sqrt (var); /* standard deviation */ double sk; /* skew */ double kt; /* kurtosis */ double q1, q3; /* first and third quartiles */ double median; /* 50th percentile */ char *line = "------------------------------------------------------------"; if (var == 0.0) ERRMSG2 (All these %d numbers equal %.4g, N, M) sk = (s3 - 3.0*M*s2 + 3.0*M2*sum - M2*sum)/(N*var*sd); kt = (s4-4.*M*s3+6.*M2*s2-4.*M2*M*sum+N*M2*M2)/(N*var*var); if (storedata) { #ifndef MSDOS /* qsort not available on MSDOS */ qsort ((char *) datax, N, sizeof (float), fltcmp); #else shellsort (datax, N); #endif median = percentile (50, datax, N); q1 = percentile (25, datax, N), q3 = percentile (75, datax, N); } /* PRINT FREQUENCY COUNTS */ if (!variable) puts (line); if (variable) { vprint (undermin,d,undermin); vprint (count,d,N); vprint (overmax,d,overmax); vprint (sum,g,sum); vprint (sumsq,g,s2); } else { printf ("%12s%12s%12s%12s\n", "Under Range", "In Range", "Over Range", "Sum"); printf ("%12d%12d%12d%12.3f\n", undermin, N, overmax, sum); puts (line); } /* PRINT CENTRAL TENDENCY */ if (variable) { vprint (mean,g,M); if (storedata) vprint (median,g,median); vprint (midpoint,g,(maxx+minx)/2.0); if (ngtzero == N) { vprint (geomean,g,exp (gmean/ngtzero)); vprint (harmean,g,ngtzero/hmean); } } else { printf ("%12s%12s%12s%12s%12s\n", "Mean", "Median", "Midpoint", "Geometric", "Harmonic"); printf ("%12.3f", M); if (storedata) printf ("%12.3f", median); else printf ("%12s", ""); printf ("%12.3f", (maxx+minx)/2.0); if (ngtzero == N) printf("%12.3f%12.3f\n", exp (gmean/ngtzero), ngtzero/hmean); else putchar ('\n'); puts (line); } /* PRINT VARIABILITY */ if (variable) { vprint (sd,g,sd); if (storedata) vprint (quartdev,g,(q3-q1)/2.0); vprint (range,g,maxx-minx); vprint (semean,g,sqrt (var/N)); } else { printf ("%12s%12s%12s%12s\n", "SD", "Quart Dev", "Range", "SE mean"); printf("%12.3f", sd); if (storedata) printf ("%12.3f", (q3-q1)/2.0); else printf ("%12s", ""); printf ("%12.3f", maxx-minx); printf ("%12.3f\n", sqrt(var/N)); puts (line); } /* PRINT FIVENUMS */ if (variable) { vprint (min,g,minx); if (storedata) { vprint (q1,g,q1); vprint (q2,g,median); vprint (q3,g,q3); } vprint (max,g,maxx); } else { printf ("%12s", "Minimum"); if (storedata) printf ("%12s%12s%12s", "Quartile 1", "Quartile 2", "Quartile 3"); printf ("%12s\n", "Maximum"); printf ("%12.3f", minx); if (storedata) printf ("%12.3f%12.3f%12.3f", q1, median, q3); printf ("%12.3f\n", maxx); puts (line); } /* PRINT OTHER STUFF */ if (variable) { vprint (skew,g,sk); vprint (kurt,g,kt); } else { printf ("%12s%12s\n", "Skew", "Kurtosis"); printf ("%12.3f%12.3f\n", sk, kt); puts (line); } if (ftest) { double tval, fval, prob; tval = (M - f_null)/(sqrt (var/N)); fval = tval*tval; prob = pof (fval, 1, N-1); if (variable) { vprint (nullmean,g,f_null); vprint (t,g,tval); vprint (probt,g,prob); vprint (F,g,fval); vprint (probF,g,prob); } else { printf ("%12s%12s%12s%12s%12s\n", "Null Mean", "t", "prob (t)", "F", "prob (F)"); printf ("%12.3f%12.3f%12.3f%12.3f%12.3f\n", f_null, tval, prob, fval, prob); puts (line); } } } #ifndef MSDOS /* don't need this for unavailable qsort */ int fltcmp (f1, f2) float *f1, *f2; { if (*f1 < *f2) return (-1); if (*f1 == *f2) return (0); return (1); } #else #define FLOAT_SORT #include "shellsort.c" #endif printtable () { register int point; /* looping variable */ register int i; /* looping variable */ int maxindex; /* maximum index for maxx */ double midpoint; /* midpoint of each interval */ int cumf = 0; /* cumulative frequency */ double fcumf = 0.0; /* floating cumulative frequency */ if (!setminimum) minimum = floor (minx); if (!setmaximum) maximum = maxx; if (!setintwidth) { intwidth = (maxx-minimum)/sqrt(2.0*N); if (fabs (intwidth) > 1.0) intwidth = floor (intwidth); } if (!onepass) for (point=0; point<N; point++) freq[ bindex ( datax[point] ) ]++; midpoint = minimum - intwidth/2.0; maxindex = bindex (maximum); printf ("%8s", "Midpt"); if (frequencies) { printf ("%8s", "Freq"); if (cumulative) printf ("%8s", "Cum"); } if (proportions) { printf ("%8s", "Prop"); if (cumulative) printf ("%8s", "Cum"); } putchar ('\n'); for (i = 0; i <= maxindex; i++) { printf ("%8.3f", midpoint += intwidth); if (frequencies) { printf ("%8d", freq[i]); if (cumulative) printf ("%8d", cumf += freq[i]); } if (proportions) { printf ("%8.3f", freq[i]*1.0/N); if (cumulative) { fcumf += freq[i]; printf ("%8.3f", fcumf/N); } } if (histogram) { putchar (' '); for (point = 1; point <= freq[i]; point++) putchar ('*'); } putchar ('\n'); } } int bindex (xval) float xval; { int answer; float findex; if (xval == minimum) return (0); findex = (xval - minimum)/intwidth; if (floor (findex) == findex) answer = findex - 1.0; else answer = findex; if (answer >= MAXBINS) ERRMSG1 (bin[%d] is out of range, answer) return (answer); } /*returns the perc percentile of sorted v[n]*/ double percentile (perc, v, n) float v[]; int perc, n; { float findex; int pindex; findex = perc / 100.0 * n - 0.5; if ( findex < 0.0) findex = 0.0; else if (findex > (n-1.0)) findex = n - 1.0; pindex = findex; return (v[pindex+1] * (findex - pindex) + v[pindex] * (1.0 - findex + pindex)); }