|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T r
Length: 8886 (0x22b6)
Types: TextFile
Names: »regress.c«
└─⟦87ddcff64⟧ Bits:30001253 CPHDIST85 Tape, 1985 Autumn Conference Copenhagen
└─⟦this⟧ »cph85dist/stat/src/regress.c«
#include "unixstat.h"
PGM(regress,Multiple Linear Regression,5.0,2/1/85)
/* Copyright (c) 1982 Gary Perlman (see Copyright file) */
#define MAXVAR 20 /* max number of variables */
#define MAXLEN 20 /* maximum length of a string */
#define REG 0 /* column number of regressor */
char Varname[MAXVAR][MAXLEN]; /* names of variables */
int Npoints = 0; /* number of points/lines read in */
int Nvar; /* number of variables read in */
double correl[MAXVAR][MAXVAR]; /* correlation matrix */
double covar[MAXVAR][MAXVAR]; /* covariance matrix */
double sum[MAXVAR]; /* sum of scores for each variable */
#define SFORMAT "%10.10s " /* format of strings */
#define LFORMAT "%-10.10s " /* left justified strings */
#define FFORMAT "%10.4f " /* floating point format */
#define PRINTVEC(var,format,label)\
{\
printf (LFORMAT, label);\
for (col = 0; col < Nvar; col++)\
printf (format, var);\
putchar ('\n');\
}
/* OPTIONS */
int Doreg; /* should regression be done? */
int SumSquares; /* print raw sums of squares */
int Covariance; /* print covariance matrix */
int Partial; /* partial correlation analysis */
initial (argc, argv) char **argv;
{
int row;
extern int optind;
extern char *optarg;
int C;
int opterr = 0;
ARGV0;
if (checkargv ("regress", Argv0)) Doreg = 1;
while ((C = getopt (argc, argv, "cps")) != EOF)
switch (C)
{
case 'c': Covariance = 1; break;
case 'p': Partial = 1; Doreg = 1; break;
case 's': SumSquares = 1; break;
default: opterr++; break;
}
if (opterr)
USAGE ([-scp] [variable names])
if (argc-optind > MAXVAR)
ERRMANY (variable names, MAXVAR)
for (row = optind; row < argc; row++)
strncpy (Varname[row-optind], argv[row], MAXLEN);
if (Doreg)
if (Varname[REG][0] == '\0')
strcpy (Varname[REG], "REG");
for (row = Doreg; row < MAXVAR; row++)
if (Varname[row][0] == '\0')
{
Varname[row][0] = 'A' - Doreg + row;
Varname[row][1] = '\0';
}
checkstdin (Argv0);
}
main (argc, argv) int argc; char **argv;
{
initial (argc, argv);
input ();
compute ();
if (Doreg)
regress ();
exit (0);
}
input ()
{
char line[BUFSIZ]; /* each data line read in here */
int ncols; /* number of culumns in an input line */
char *in[MAXVAR]; /* each input column of line in here */
double temp[MAXVAR]; /* each input column number in here */
double minx[MAXVAR]; /* minimum value of each variable */
double maxx[MAXVAR]; /* maximum value of each variable */
register int row, col;
while (gets (line))
{
if ((ncols = parseline (line, in, MAXVAR)) == 0) continue;
if (ncols > MAXVAR)
ERRMANY (input columns, MAXVAR)
if (Npoints == 0)
{
Nvar = ncols;
for (row = 0; row < Nvar; row++)
minx[row] = maxx[row] = atof (in[row]);
}
else if (ncols != Nvar)
ERRRAGGED
for (row = 0; row < Nvar; row++)
{
if (number (in[row]))
temp[row] = atof (in[row]);
else
ERRNUM (in[row],input data)
sum[row] += temp[row];
if (temp[row] > maxx[row]) maxx[row] = temp[row];
if (temp[row] < minx[row]) minx[row] = temp[row];
for (col = 0; col <= row; col++)
covar[row][col] += temp[row] * temp[col];
}
Npoints++;
}
if (Npoints <= 1)
ERRDATA
printf ("Analysis for %d points of %d variables:\n", Npoints, Nvar);
PRINTVEC (Varname[col], SFORMAT, "Variable")
PRINTVEC (minx[col], FFORMAT, "Min")
PRINTVEC (maxx[col], FFORMAT, "Max")
PRINTVEC (sum[col], FFORMAT, "Sum")
if (SumSquares)
printmat (covar, "Raw SS Matrix");
for (row = 0; row < Nvar; row++)
for (col = 0; col <= row; col++)
covar[row][col] -= sum[row]*sum[col]/Npoints;
PRINTVEC (sum[col]/Npoints, FFORMAT, "Mean")
PRINTVEC (sqrt (covar[col][col]/(Npoints-1)), FFORMAT, "SD");
if (Covariance)
printmat (covar, "Covariance Matrix");
}
compute ()
{
register int row, col;
double denom;
for (row = 0; row < Nvar; row++)
{
for (col = 0; col < row; col++)
if (!fzero (denom = covar[row][row] * covar[col][col]))
correl[row][col] = covar[row][col]/sqrt (denom);
else correl[row][col] = 0.0;
correl[row][row] = 1.0;
}
printmat (correl, "Correlation Matrix");
}
printmat (mat, label)
double mat[MAXVAR][MAXVAR];
char *label;
{
int row, col;
printf ("\n%s:\n", label);
for (row = 0; row < Nvar; row++)
{
printf (LFORMAT, Varname[row]);
for (col = 0; col <= row; col++)
printf (FFORMAT, mat[row][col]);
putchar ('\n');
}
PRINTVEC (Varname[col], SFORMAT, "Variable")
}
regress ()
{
register int row, col, var;
double pof (); /* probability of F */
double invert (); /* matrix inversion. returns det */
double invcor[MAXVAR][MAXVAR]; /* inverse correlation matrix */
double partial[MAXVAR][MAXVAR]; /* partial correlations */
double beta[MAXVAR]; /* standardized regression weights */
double b[MAXVAR]; /* regression coefficients slopes */
double a; /* intercept in regression equation */
double F; /* F ratio */
double Rsq; /* squared correlation coefficient */
int npred = Nvar - 1; /* number of predictors */
int dferror = Npoints - Nvar;/* degrees of freedom for error */
char Fline[20]; /* holds the F ratio line */
if (npred < 1)
ERRMSG0 (No predictor variables for regression)
if (dferror < 1)
ERRMSG0 (Not enough degrees of freedom for regression)
for (row = 0; row < Nvar; row++)
for (col = 0; col < row; col++)
correl[col][row] = correl[row][col]; /* make symmetric */
for (row = 0; row < Nvar; row++)
for (col = 0; col < Nvar; col++)
{
invcor[row][col] = correl[row][col]; /* copy */
if (row != 0 && col != 0)
partial[row-1][col-1] = correl[row][col];
}
if (fzero (invert (invcor, Nvar)))
ERRMSG0 (Singular correlation matrix)
if (fzero (invert (partial, Nvar-1)))
ERRMSG0 (Singular partial correlation matrix)
a = sum[REG];
for (var = 1; var < Nvar; var++)
if (!fzero (invcor[REG][REG] * covar[var][var]))
{
beta[var] = -invcor[REG][var] / invcor[REG][REG];
b[var] = beta[var] * sqrt (covar[REG][REG]/covar[var][var]);
a -= b[var] * sum[var];
}
else beta[var] = b[var] = 0.0;
a /= Npoints;
putchar ('\n');
printf ("Regression Equation for %s:\n", Varname[REG]);
printf ("%s = ", Varname[REG]);
for (var = 1; var < Nvar; var++)
printf ("%.4g %s + ", b[var], Varname[var]);
printf ("%g\n", a);
Rsq = (!fzero (invcor[REG][REG])) ? 1.0 - 1.0/invcor[REG][REG] : 0.0;
F = (Rsq == 1.0) ? 99999.999 : Rsq * dferror / (npred * (1.0 - Rsq));
putchar ('\n');
printf ("Significance test for prediction of %s\n", Varname[REG]);
printf (SFORMAT, "Mult-R");
printf (SFORMAT, "R-Squared");
VOID sprintf (Fline, "F(%d,%d)", npred, dferror);
printf (SFORMAT, Fline);
printf (SFORMAT, "prob (F)");
putchar ('\n');
printf (FFORMAT, sqrt (Rsq));
printf (FFORMAT, Rsq);
printf (FFORMAT, F);
printf (FFORMAT, pof (F, npred, dferror));
putchar ('\n');
if (Partial)
{
double SEestsq = covar[REG][REG]*(1.0-Rsq)/dferror;
putchar ('\n');
printf ("Significance test(s) for predictor(s) of %s\n", Varname[REG]);
/* printf ("SEest = %g ", sqrt (SEestsq)); */
printf (LFORMAT, "Predictor");
printf (SFORMAT, "beta");
printf (SFORMAT, "b");
printf (SFORMAT, "Rsq");
VOID sprintf (Fline, "t(%d)", dferror);
printf (SFORMAT, Fline);
VOID sprintf (Fline, "F(1,%d)", dferror);
printf (SFORMAT, Fline);
printf (SFORMAT, "p");
putchar ('\n');
for (var = 1; var < Nvar; var++)
{
double rsq = 0.0;
double SEsq;
double F = 0.0;
double p;
double SSres;
if (!fzero (partial[var-1][var-1]))
rsq = 1.0 - 1.0/partial[var-1][var-1];
else rsq = 0.0;
SSres = covar[var][var] * (1.0 - rsq);
if (SSres > FZERO && SEestsq > FZERO)
{
SEsq = SEestsq/SSres;
F = b[var]*b[var] / SEsq;
p = pof (F, 1, dferror);
}
else
{
F = 99999.9999;
p = 0.0;
}
printf (LFORMAT, Varname[var]);
printf (FFORMAT, beta[var]);
printf (FFORMAT, b[var]);
printf (FFORMAT, rsq);
printf (FFORMAT, sqrt (F));
printf (FFORMAT, F);
printf (FFORMAT, p);
putchar ('\n');
}
}
}
/* returns the determinant of its input matrix. modifies matrix. */
double
invert (matrix, size) double matrix[MAXVAR][MAXVAR]; int size;
{
int j,k,l;
double pivot;
double temp;
double det = 1.0;
for (j = 0; j < size; j++)
{
pivot = matrix[j][j];
det *= pivot;
matrix[j][j] = 1.0;
for (k = 0; k < size; k++)
if (!fzero (pivot)) matrix[j][k] /= pivot;
else return (0.0);
for (k = 0; k < size; k++)
if (k != j)
{
temp = matrix[k][j];
matrix[k][j] = 0.0;
for (l = 0; l < size; l++)
matrix[k][l] -= matrix[j][l] * temp;
}
}
return (det);
}