DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - download
Index: ┃ T d

⟦fe5f58b77⟧ TextFile

    Length: 11833 (0x2e39)
    Types: TextFile
    Names: »desc.c«

Derivation

└─⟦87ddcff64⟧ Bits:30001253 CPHDIST85 Tape, 1985 Autumn Conference Copenhagen
    └─ ⟦this⟧ »cph85dist/stat/src/desc.c« 

TextFile

#include "unixstat.h"
PGM(desc,descriptive statistics,5.0,3/3/85)
/* Copyright (c) 1982 Gary Perlman (see Copyright file) */

/*
	This program analyses single distributions of data.
	It was written by Gary Perlman at UCSD August 1979.

		STRUCTURE OF DESC
	main
		input
			bindex
		printstats
			percentile
		printtable
			bindex

		ALPHABETICAL ANNOTATION
	bindex (xval)
		returns the bin number xval should go into
	input ()
		reads from standard input and sets
		N, sums of powers, minimum, maximum
		might store frequencies in freq array
	percentile (perc, vector, n)
		returns the perc percentile of n long vector, vector
	printstats ()
		prints all summary statistics
	printtable ()
		prints histogram, frequency or proportion table
*/

#ifdef SMALL_MEM
#define	MAXBINS      250       /* maximum number of bins for tables */
#define	MAXPOINTS   2500       /* maximum number of input points if storing */
#else
#define	MAXBINS     1000       /* maximum number of bins for tables */
#define	MAXPOINTS  10000       /* maximum number of input points if storing */
#endif

typedef	int 	BOOL; /* no boolean type in C */
#define	TRUE	1
#define	FALSE	0
BOOL	stats;                /* print statistics */
BOOL	ftest;                /* print ftest */
BOOL	table;                /* print a table of some sort */
BOOL	histogram;            /* print histogram */
BOOL	frequencies;          /* print frequency table */
BOOL	proportions;          /* print proportions table */
BOOL	cumulative;           /* make table cumulative */
BOOL	storedata;            /* store the data */
BOOL	onepass;              /* can run with one pass */
BOOL	setmaximum;           /* maximum value has been set */
BOOL	setminimum;           /* minimum value has been set */
BOOL	setintwidth;          /* interval width has been set */
BOOL	variable;             /* print stats in variable format */

float	datax[MAXPOINTS];     /* data stored in here */
int 	freq[MAXBINS];        /* frequency counts stored in here */
int 	N;                    /* number of points read in */
double	sum;                  /* sum of all points read in */
double	s2;                   /* sum of squares of all points read in */
double	s3;                   /* sum of cubes of all points read in */
double	s4;                   /* sum of x^4 of all points read in */
double	gmean;                /* geometric mean */
double	hmean;                /* harmonic mean */
int 	ngtzero;              /* number of points greater than zero */
double	f_null;               /* null value for t test */
double	minx;                 /* min value of x */
double	maxx;                 /* max value of x */
int 	undermin;             /* number of points less than minimum */
int 	overmax;              /* number of points more than maximum */
double	intwidth;             /* width of interval of freqency count bins */
double	minimum;              /* minimum allowable value of x */
double	maximum;              /* maximum allowable value of x */

main (argc, argv) int argc; char *argv[];
	{
	ARGV0;
	initial (argc, argv);
	checkstdin (Argv0);
	input ();
	if (stats) printstats ();
	if (table) printtable ();
	exit (0);
	}

initial (argc, argv) int argc; char **argv;
	{
	extern	int 	optind;
	extern	char 	*optarg;
	int 	C;
	int 	opterr = 0;
	if (argc == 1) { stats = storedata = TRUE; return; }
#define	OPTSTRING	"cfF:t:hi:m:M:poSsv"
	while ((C = getopt (argc, argv, OPTSTRING)) != EOF)
		switch (C)
		{
		case 'c': cumulative = TRUE; break;
		case 'f': frequencies = table = TRUE; break;
		case 't':
		case 'F':
			if (!number (optarg)) ERRNUM (optarg,null mean)
			f_null = atof (optarg);
			ftest = stats = TRUE;
			break;
		case 'h': histogram = frequencies = table = TRUE; break;
		case 'i':
			if (!number (optarg)) ERRNUM (optarg,interval width)
			intwidth = atof (optarg);
			setintwidth = table = TRUE;
			if (intwidth <= 0.0) ERRVAL (g,intwidth,interval width)
			break;
		case 'm':
			if (!number (optarg)) ERRNUM (optarg,minimum value)
			minimum = atof (optarg);
			setminimum = TRUE;
			break;
		case 'M':
			if (!number (optarg)) ERRNUM (optarg,maximum value)
			maximum = atof (optarg);
			setmaximum = TRUE;
			break;
		case 'p': proportions = table =  TRUE; break;
		case 'S':
		case 'o': storedata = TRUE;
		case 's': stats = TRUE; break;
		case 'v': variable = stats = TRUE; break;
		default: opterr++;
		}
	if (opterr)
		USAGE ([-cfphsov] [-i interval] [-m min] [-M max]  [-F|t Ho])
	ERROPT (optind)
	if (table)
		{
		if (setintwidth && setminimum) onepass = TRUE;
		else storedata = TRUE;
		if (!frequencies && !proportions) histogram = TRUE;
		}
	else if (setminimum || setmaximum) stats = TRUE;
	}

input ()
	{
	double	x;                     /* each datum read in here */
	double	x2;                    /* square of x */
	char	stringx[50];           /* string version of x read in here */
	while (getword (stringx, stdin))
		{
		if (!number (stringx))
			ERRNUM (stringx,input value)
		x = atof (stringx);
		if (setminimum && x < minimum) {undermin++; continue;}
		if (setmaximum && x > maximum) {overmax++; continue;}
		if (N == 0)
			{
			minx = maxx = x;
			}
		if (storedata)
			if (N == MAXPOINTS)
				{
				WARNING (too much data for storing)
				storedata = FALSE;
				}
			else datax[N] = x;
		if (onepass) freq[bindex(x)]++;
		x2 = x*x;
		sum += x;
		s2  += x2;
		s3  += x2*x;
		s4  += x2*x2;
		if (x > 0.0)
			{
			gmean += log (x);
			hmean += 1.0 / x;
			ngtzero++;
			}
		if (x > maxx) maxx = x;
		if (x < minx) minx = x;
		N++;
		}
	if (N <= 1)
		ERRDATA
	}

#define	vprint(label,format,var) printf ("label = %format\n", var)
printstats ()
	{
	double	pof ();                     /* probability of F ratio */
	double	percentile ();              /* percentile function */
#ifndef MSDOS /* don't need to compare floats for qsort */
	int 	fltcmp ();                  /* compares two float numbers */
#endif
	double	M	= sum/N;                /* mean */
	double	M2	= M*M;                  /* square of mean */
	double	var	= (s2 - M*sum)/(N-1);   /* variance */
	double	sd	= sqrt (var);           /* standard deviation */
	double	sk;                         /* skew */
	double	kt;                         /* kurtosis */
	double	q1, q3;                     /* first and third quartiles */
	double	median;                     /* 50th percentile */
	char	*line	=
	"------------------------------------------------------------";
	if (var == 0.0)
		ERRMSG2 (All these %d numbers equal %.4g, N, M)
	sk = (s3 - 3.0*M*s2 + 3.0*M2*sum - M2*sum)/(N*var*sd);
	kt = (s4-4.*M*s3+6.*M2*s2-4.*M2*M*sum+N*M2*M2)/(N*var*var);
	if (storedata)
		{
#ifndef MSDOS /* qsort not available on MSDOS */
		qsort ((char *) datax, N, sizeof (float), fltcmp);
#else
		shellsort (datax, N);
#endif
		median = percentile (50, datax, N);
		q1 = percentile (25, datax, N),
		q3 = percentile (75, datax, N);
		}
	/*	PRINT FREQUENCY COUNTS */
	if (!variable) puts (line);
	if (variable)
		{
		vprint (undermin,d,undermin);
		vprint (count,d,N);
		vprint (overmax,d,overmax);
		vprint (sum,g,sum);
		vprint (sumsq,g,s2);
		}
	else
		{
		printf ("%12s%12s%12s%12s\n",
			"Under Range", "In Range", "Over Range", "Sum");
		printf ("%12d%12d%12d%12.3f\n", undermin, N, overmax, sum);
		puts (line);
		}
	/*	PRINT CENTRAL TENDENCY */
	if (variable)
		{
		vprint (mean,g,M);
		if (storedata) vprint (median,g,median);
		vprint (midpoint,g,(maxx+minx)/2.0);
		if (ngtzero == N)
			{
			vprint (geomean,g,exp (gmean/ngtzero));
			vprint (harmean,g,ngtzero/hmean);
			}
		}
	else
		{
		printf ("%12s%12s%12s%12s%12s\n",
			"Mean", "Median", "Midpoint", "Geometric", "Harmonic");
		printf ("%12.3f", M);
		if (storedata)
			printf ("%12.3f", median);
		else	printf ("%12s", "");
		printf ("%12.3f", (maxx+minx)/2.0);
		if (ngtzero == N)
			printf("%12.3f%12.3f\n",
				exp (gmean/ngtzero), ngtzero/hmean);
		else putchar ('\n');
		puts (line);
		}
	/*	PRINT VARIABILITY */
	if (variable)
		{
		vprint (sd,g,sd);
		if (storedata)
			vprint (quartdev,g,(q3-q1)/2.0);
		vprint (range,g,maxx-minx);
		vprint (semean,g,sqrt (var/N));
		}
	else
		{
		printf ("%12s%12s%12s%12s\n",
			"SD", "Quart Dev", "Range", "SE mean");
		printf("%12.3f", sd);
		if (storedata)
			printf ("%12.3f", (q3-q1)/2.0);
		else	printf ("%12s", "");
		printf ("%12.3f", maxx-minx);
		printf ("%12.3f\n", sqrt(var/N));
		puts (line);
		}
	/*	PRINT FIVENUMS */
	if (variable)
		{
		vprint (min,g,minx);
		if (storedata)
			{
			vprint (q1,g,q1);
			vprint (q2,g,median);
			vprint (q3,g,q3);
			}
		vprint (max,g,maxx);
		}
	else
		{
		printf ("%12s", "Minimum");
		if (storedata)
			printf ("%12s%12s%12s",
				"Quartile 1", "Quartile 2", "Quartile 3");
		printf ("%12s\n", "Maximum");
		printf ("%12.3f", minx);
		if (storedata)
			printf ("%12.3f%12.3f%12.3f", q1, median, q3);
		printf ("%12.3f\n", maxx);
		puts (line);
		}
	/*	PRINT OTHER STUFF */
	if (variable)
		{
		vprint (skew,g,sk);
		vprint (kurt,g,kt);
		}
	else
		{
		printf ("%12s%12s\n", "Skew", "Kurtosis");
		printf ("%12.3f%12.3f\n", sk, kt);
		puts (line);
		}
	if (ftest)
		{
		double	tval, fval, prob;
		tval = (M - f_null)/(sqrt (var/N));
		fval = tval*tval;
		prob = pof (fval, 1, N-1);
		if (variable)
			{
			vprint (nullmean,g,f_null);
			vprint (t,g,tval);
			vprint (probt,g,prob);
			vprint (F,g,fval);
			vprint (probF,g,prob);
			}
		else
			{
			printf ("%12s%12s%12s%12s%12s\n",
				"Null Mean", "t", "prob (t)", "F", "prob (F)");
			printf ("%12.3f%12.3f%12.3f%12.3f%12.3f\n", 
				f_null, tval, prob, fval, prob);
			puts (line);
			}
		}
	}

#ifndef MSDOS /* don't need this for unavailable qsort */
int fltcmp (f1, f2) float *f1, *f2;
	{ if (*f1 < *f2) return (-1); if (*f1 == *f2) return (0); return (1); }
#else
#define FLOAT_SORT
#include "shellsort.c"
#endif

printtable ()
	{
	register int point;            /* looping variable */
	register int i;                /* looping variable */
	int 	maxindex;              /* maximum index for maxx */
	double	midpoint;              /* midpoint of each interval */
	int 	cumf	= 0;           /* cumulative frequency */
	double	fcumf	= 0.0;         /* floating cumulative frequency */
	if (!setminimum) minimum = floor (minx);
	if (!setmaximum) maximum = maxx;
	if (!setintwidth)
		{
		intwidth = (maxx-minimum)/sqrt(2.0*N);
		if (fabs (intwidth) > 1.0)
			intwidth = floor (intwidth);
		}
	if (!onepass) for (point=0; point<N; point++)
		freq[ bindex ( datax[point] ) ]++;
	midpoint = minimum - intwidth/2.0;
	maxindex = bindex (maximum);
	printf ("%8s", "Midpt");
	if (frequencies)
		{
		printf ("%8s", "Freq");
		if (cumulative)
			printf ("%8s", "Cum");
		}
	if (proportions)
		{
		printf ("%8s", "Prop");
		if (cumulative)
			printf ("%8s", "Cum");
		}
	putchar ('\n');
	for (i = 0; i <= maxindex; i++)
		{
		printf ("%8.3f", midpoint += intwidth);
		if (frequencies)
			{
			printf ("%8d", freq[i]);
			if (cumulative)
				printf ("%8d", cumf += freq[i]);
			}
		if (proportions)
			{
			printf ("%8.3f", freq[i]*1.0/N);
			if (cumulative)
				{
				fcumf += freq[i];
				printf ("%8.3f", fcumf/N);
				}
			}
		if (histogram)
			{
			putchar (' ');
			for (point = 1; point <= freq[i]; point++)
				putchar ('*');
			}
		putchar ('\n');
		}
	}

int
bindex (xval) float xval;
	{
	int 	answer;
	float	findex;
	if (xval == minimum) return (0);
	findex = (xval - minimum)/intwidth;
	if (floor (findex) == findex) answer = findex - 1.0;
	else answer = findex;
	if (answer >= MAXBINS)
		ERRMSG1 (bin[%d] is out of range, answer)
	return (answer);
	}

/*returns the perc percentile of sorted v[n]*/
double
percentile (perc, v, n) float v[]; int perc, n;
	{
	float findex;
	int pindex;
	findex = perc / 100.0 * n  - 0.5;
	if ( findex < 0.0) findex = 0.0;
	else if (findex > (n-1.0)) findex = n - 1.0;
	pindex = findex;
	return (v[pindex+1] * (findex - pindex) +
		v[pindex] * (1.0 - findex + pindex));
	}