|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T p
Length: 11634 (0x2d72)
Types: TextFile
Names: »pair.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/pair.c«
/* Copyright 1980 Gary Perlman */
#include "stat.h"
PGM(pair,Paired Data Analysis and Plots,5.2,01/20/87)
/*NEW
put estimated X-Y values in axes so that
a line can be drawn through by hand
*/
#define prettyprint(title, x, y, d) \
printf ("%-16s %16.4f %16.4f %16.4f\n", title, x, y, d)
#define MAXPAIRS 1000 /* at most this many pairs */
#define MAXCHARS BUFSIZ /* maximum number of chars in lines */
float Xdata[MAXPAIRS]; /* data stored for plot in here */
float Ydata[MAXPAIRS]; /* data stored for plot in here */
char Plotchar; /* plotting character */
Boole Frameplot = TRUE; /* should plot be framed? */
int Height = 19; /* default height of plot */
int Width = 50; /* default width of plot */
double Top; /* top of plot or Max_y */
Boole SetTop = FALSE;
double Bottom; /* bottom of plot or Min_y */
Boole SetBottom = FALSE;
double Left; /* left of plot or Min_x */
Boole SetLeft = FALSE;
double Right; /* right of plot or Max_x */
Boole SetRight = FALSE;
int Perline = 2; /* number of points per line */
int Count = 0; /* number of points counted in */
int NAcount = 0; /* number of NA missing pairs */
double Sum_x, Sum_y, Sum_d; /* sums of scores */
double SS_x, SS_y, SS_d; /* sums of squares */
double Mean_x, Mean_y, Mean_d;/* means stored here */
double SD_x, SD_y, SD_d; /* standard deviations */
double Sum_xy; /* cross product */
double Min_x, Min_y, Min_d; /* minimums */
double Max_x, Max_y, Max_d; /* maximums */
double Intercept, Slope; /* intercept and slope */
double Correlation; /* correlation coefficient */
char *Name_x = "Column 1", *Name_y = "Column 2";
int Storedata = 1; /* assume data will be stored */
int Wantstats = 0; /* are statistics wanted? */
int Wantplot = 0; /* is a plot wanted? */
Boole InfoVersion; /* print version information */
Boole InfoLimits; /* print program limits */
Boole InfoOptions; /* print usage information */
\f
main (argc, argv) char **argv;
{
ARGV0;
initial (argc, argv);
readdata ();
compstats ();
if (Wantstats)
printstats ();
if (Wantplot)
scatterplot (Xdata, Ydata, Count);
exit (0);
}
\f
initial (argc, argv) char **argv;
{
extern int optind;
extern char *optarg;
int C;
int opterr = 0;
while ((C = getopt (argc, argv, "b:c:fh:l:pr:st:w:x:y:LOV")) != EOF)
switch (C)
{
case 'O': InfoOptions = TRUE; break;
case 'V': InfoVersion = TRUE; break;
case 'L': InfoLimits = TRUE; break;
case 'b':
SetBottom = TRUE;
Wantplot = TRUE;
if (setreal (Argv0, 'b', optarg, &Bottom))
opterr++;
break;
case 'c':
Wantplot = TRUE;
Plotchar = *optarg;
break;
case 'f':
Frameplot = FALSE;
Wantplot = TRUE;
break;
case 'h':
if (setint (Argv0, 'h', optarg, &Height, MIN_PLOT, MAX_HEIGHT))
opterr++;
Wantplot = 1;
break;
case 'l':
SetLeft = TRUE;
Wantplot = TRUE;
if (setreal (Argv0, 'l', optarg, &Left))
opterr++;
break;
case 'p':
Wantplot = TRUE;
break;
case 'r':
SetRight = TRUE;
if (setreal (Argv0, 'r', optarg, &Right))
opterr++;
break;
case 's':
Wantstats = 1;
break;
case 't':
SetTop = TRUE;
Wantplot = TRUE;
if (setreal (Argv0, 't', optarg, &Top))
opterr++;
break;
case 'w':
if (setint (Argv0, 'w', optarg, &Width, MIN_PLOT, MAX_WIDTH))
opterr++;
Wantplot = 1;
break;
case 'x':
Name_x = optarg;
break;
case 'y':
Name_y = optarg;
break;
default: opterr++;
}
if (opterr)
USAGE ([-fps] [-h height] [-w width] [-c char] [-x xname] [-y yname]\n\t[-t top] [-b bottom] [-l left] [-r right])
ERROPT (optind)
if (!Wantplot)
{
Storedata = 0;
Wantstats = 1;
}
usinfo ();
checkstdin ();
}
\f
readdata ()
{
double atof ();
char line[MAXCHARS];
char *array[2];
int fieldcount;
double x, y, d;
int lineno = 0;
while (fgets (line, sizeof (line), stdin))
{
lineno++;
fieldcount = parselin (line, array, 2);
if (fieldcount == 0)
continue;
if (isna (array[0]) || (fieldcount > 1 && isna (array[1])))
{
NAcount++;
continue;
}
if (Count == 0)
{
switch (Perline = fieldcount)
{
case 1: Perline = 1;
Min_x = 1.0;
Min_y = atof (array[0]);
break;
case 2: Perline = 2;
Min_x = atof (array[0]);
Min_y = atof (array[1]);
break;
default:
ERRMANY (columns, 2)
}
Max_x = Min_x;
Max_y = Min_y;
Min_d = Max_d = Min_x - Min_y;
}
else if (fieldcount != Perline)
ERRMSG1 (Must have 1 or 2 numbers per line (see line %d), lineno)
if (!number (array[0]) || ((Perline == 2) && !number (array[1])))
ERRMSG1 (Non-numerical input at line %d, lineno)
if (Perline == 2)
{
x = atof (array[0]);
y = atof (array[1]);
}
else
{
x = (double) (Count+1);
y = atof (array[0]);
}
d = x - y;
Sum_d += d;
SS_d += d*d;
Sum_x += x;
SS_x += x*x;
Sum_xy += x*y;
Sum_y += y;
SS_y += y*y;
if (x > Max_x)
Max_x = x;
else if (x < Min_x)
Min_x = x;
if (y > Max_y)
Max_y = y;
else if (y < Min_y)
Min_y = y;
if (d > Max_d)
Max_d = d;
else if (d < Min_d)
Min_d = d;
if (Storedata)
{
if (Count < MAXPAIRS)
{
Xdata[Count] = x;
Ydata[Count] = y;
}
else
{
WARNING (too much data for a plot)
Storedata = 0;
Wantstats = 1;
}
}
Count++;
}
}
\f
compstats ()
{
double standev (); /* compute standard deviation */
if (Count <= 1)
ERRDATA
Mean_x = Sum_x / Count;
Mean_y = Sum_y / Count;
Mean_d = Sum_d / Count;
SD_x = standev (Sum_x, SS_x, Count);
SD_y = standev (Sum_y, SS_y, Count);
SD_d = standev (Sum_d, SS_d, Count);
if (SD_x > FZERO)
{
Slope = (Sum_xy - Sum_x*Sum_y/Count) / (SS_x - Sum_x*Mean_x);
Intercept = Mean_y - Slope * Mean_x;
if (!fzero (SD_y))
Correlation = Slope * SD_x / SD_y;
else
Correlation = 1.0;
}
else
Intercept = Slope = Correlation = 0.0;
}
\f
printstats ()
{
double t_x, t_y, t_d; /* t test values */
double p_x, p_y, p_d, pof (); /* probability levels */
char tmpstr[20]; /* for internal formatting */
double t_r, p_r; /* correlation, t val & prob */
double fcount = (double) Count; /* double Count */
printf ("Analysis for %d points:\n", Count);
if (NAcount)
printf ("Missing points ignored: %d\n", NAcount);
printf ("%16s %16.16s %16.16s %16s\n", "", Name_x, Name_y, "Difference");
prettyprint ("Minimums", Min_x, Min_y, Min_d);
prettyprint ("Maximums", Max_x, Max_y, Max_d);
prettyprint ("Sums", Sum_x, Sum_y, Sum_d);
prettyprint ("SumSquares", SS_x, SS_y, SS_d);
prettyprint ("Means", Mean_x, Mean_y, Mean_d);
prettyprint ("SDs", SD_x, SD_y, SD_d);
if (SD_x > FZERO)
{
t_x = Mean_x*sqrt(fcount)/SD_x;
p_x = pof (t_x*t_x, 1, Count-1);
}
else
{
t_x = MAXT;
p_x = 0.0;
}
if (SD_y > FZERO)
{
t_y = Mean_y*sqrt(fcount)/SD_y;
p_y = pof (t_y*t_y, 1, Count-1);
}
else
{
t_y = MAXT;
p_y = 0.0;
}
if (SD_d > FZERO)
{
t_d = Mean_d*sqrt(fcount)/SD_d;
p_d = pof (t_d*t_d, 1, Count-1);
}
else
{
t_d = MAXT;
p_d = 0.0;
}
sprintf (tmpstr, "t(%d)", Count-1);
prettyprint (tmpstr, t_x, t_y, t_d);
prettyprint ("p", p_x, p_y, p_d);
if (Count > 2 && Correlation != 1.0 && Correlation != -1.0)
{
t_r = Correlation / sqrt ((1.0 - Correlation*Correlation) / (fcount - 2.0));
p_r = pof (t_r*t_r, 1, Count-2);
}
else
{
t_r = MAXT;
p_r = 0.0;
}
putchar ('\n');
sprintf (tmpstr, "t(%d)", Count-2);
printf ("%16s %16s %16s %16s\n", "Correlation", "r-squared", tmpstr, "p");
printf ("%16.4f %16.4f %16.4f %16.4f\n",
Correlation, Correlation*Correlation, t_r, p_r);
printf ("%16s %16s\n", "Intercept", "Slope");
printf ("%16.4f %16.4f\n", Intercept, Slope);
}
double
standev (sum, ss, count) double sum, ss;
{
if (count <= 1)
return (0.0);
return (sqrt ((ss-sum*sum/count)/(count-1)));
}
\f
#define EPSILON (.000000001)
#define scale(x,min,max,width) ((int) (width*((x)-(min))/((max)-(min)+EPSILON)))
#define MAXCOUNT 127 /* max plot frequency */
scatterplot (x, y, n)
float *x, *y;
{
int i;
int col, row;
char plot[MAX_HEIGHT][MAX_WIDTH];
int height2 = Height/2;
int ix, iy;
double xval, yval;
float minx = SetLeft ? Left : Min_x;
float maxx = SetRight ? Right : Max_x;
float miny = SetBottom ? Bottom : Min_y;
float maxy = SetTop ? Top : Max_y;
for (row = 0; row < Height; row++)
for (col = 0; col < Width; col++)
plot[row][col] = 0;
for (i = 0; i < n; i++)
{
xval = x[i];
yval = y[i];
if (yval < miny || yval > maxy || xval < minx || xval > maxx)
{
fprintf (stderr, "%s: point %d (%g,%g) out of range\n",
Argv0, i+1, xval, yval);
#ifdef DEBUG
printf ("maxy %20.10f\n", maxy);
printf ("yval %20.10f\n", yval);
printf ("miny %20.10f\n", miny);
printf ("minx %20.10f\n", minx);
printf ("xval %20.10f\n", xval);
printf ("maxx %20.10f\n", maxx);
#endif DEBUG
}
else
{
iy = scale (yval, miny, maxy, Height);
ix = scale (xval, minx, maxx, Width);
if (plot[iy][ix] < MAXCOUNT)
plot[iy][ix]++;
}
}
if (Frameplot)
{
putchar ('|');
for (col = 0; col < Width; col++)
putchar ('-');
putchar ('|');
printf ("%g", maxy);
putchar ('\n');
}
for (row = Height-1; row >= 0; row--)
{
if (Frameplot)
putchar ('|');
for (col = 0; col < Width; col++)
if (plot[row][col])
{
if (Plotchar)
putchar (Plotchar);
else if (plot[row][col] >= 10)
putchar ('*');
else
putchar (plot[row][col]+'0');
}
else putchar (' ');
if (Frameplot)
putchar ('|');
if (row == height2)
printf ("%s", Name_y);
putchar ('\n');
}
if (Frameplot)
{
putchar ('|');
for (col = 0; col < Width; col++)
putchar ('-');
putchar ('|');
printf ("%g", miny);
putchar ('\n');
numline (minx, maxx, Width+2); /* width + frame sides */
}
#define CORRWIDTH 8 /* length of " r = %x.yf for Correlation */
for (i = (Width - strlen (Name_x) - CORRWIDTH) / 2; i > 0; i--)
putchar (' ');
printf ("%s r=%6.3f\n", Name_x, Correlation);
}
\f
usinfo ()
{
if (InfoVersion)
pver (Version);
if (InfoLimits)
{
plim (Argv0);
const (MAXPAIRS, "maximum number of pairs for plots");
const (MAX_WIDTH, "maximum width of plot");
const (MAX_HEIGHT, "maximum height of plot");
const (MIN_PLOT, "minimum plot height or width");
const (MAXCHARS, "maximum number of characters in lines");
}
if (InfoOptions)
{
ppgm (Argv0, Purpose);
ropt ('b', "bottom", "minimum y axis plot value", Bottom);
copt ('c', "char", "plotting character", Plotchar);
lopt ('f', "frame plot", Frameplot);
iopt ('h', "height", "height of plot", Height);
ropt ('l', "left", "minimum x axis plot value", Left);
lopt ('p', "print plot", Wantplot);
ropt ('r', "right", "maximum x axis plot value", Right);
lopt ('s', "print statistics", Wantstats);
ropt ('t', "top", "maximum y axis plot value", Top);
iopt ('w', "width", "width of plot", Width);
sopt ('x', "name", "x axis name", Name_x);
sopt ('y', "name", "y axis name", Name_y);
}
if (InfoVersion || InfoLimits || InfoOptions)
exit (0);
}