|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T r
Length: 12029 (0x2efd)
Types: TextFile
Names: »regress.c«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/stat-5.3/eu/stat/src/regress.c«
/* Copyright 1980 Gary Perlman */
#include "stat.h"
PGM(regress,Multiple Linear Regression,5.5,01/27/87)
#define MAXVAR 20 /* max number of variables */
#define MAXLEN 20 /* maximum length of a string */
#define MAXCHARS BUFSIZ /* maximum number of chars in lines */
#define REG 0 /* column number of regressor */
char Varname[MAXVAR][MAXLEN]; /* names of variables */
int Ncases = 0; /* number of points/lines read in */
int NAcount = 0; /* number of NA missing values */
int Nvar; /* number of variables read in */
double correl[MAXVAR][MAXVAR]; /* correlation matrix */
double covar[MAXVAR][MAXVAR]; /* covariance matrix */
double sum[MAXVAR]; /* sum of scores for each variable */
#define FORMWIDTH 20 /* max space for format string */
#define MAXWIDTH 20 /* max width of field */
#define MINWIDTH 5 /* min width of field */
int Fieldwidth = 10; /* width of field in format string */
#define SFORMAT "%10.10s " /* format of strings */
#define LFORMAT "%-10.10s " /* left justified strings */
#define FFORMAT "%10.4f " /* floating point format */
char Sformat[FORMWIDTH] = SFORMAT;
char Lformat[FORMWIDTH] = LFORMAT;
char Fformat[FORMWIDTH] = FFORMAT;
#define PRINTVEC(var,format,label)\
{\
printf (Lformat, label);\
for (col = 0; col < Nvar; col++)\
printf (format, var);\
putchar ('\n');\
}
\f
/* OPTIONS */
int Doreg = 1; /* should regression be done? */
int SumSquares; /* print raw sums of squares */
int Covariance; /* print covariance matrix */
int Partial; /* partial correlation analysis */
int Equation; /* save regression equation */
Boole InfoVersion; /* print version information */
Boole InfoLimits; /* print program limits */
Boole InfoOptions; /* print usage information */
Boole Debug = FALSE;
initial (argc, argv) char **argv;
{
int row;
extern int optind;
extern char *optarg;
int C;
int opterr = 0;
ARGV0;
while ((C = getopt (argc, argv, "ceprsDF:LOV")) != EOF)
switch (C)
{
case 'O': InfoOptions = TRUE; break;
case 'V': InfoVersion = TRUE; break;
case 'L': InfoLimits = TRUE; break;
case 'D': Debug = TRUE; break;
case 'F':
if (setint (Argv0, C, optarg, &Fieldwidth, MINWIDTH, MAXWIDTH))
opterr++;
else
{
sprintf (Lformat, "%%-%d.%ds ", Fieldwidth, Fieldwidth);
sprintf (Sformat, "%%%d.%ds ", Fieldwidth, Fieldwidth);
sprintf (Fformat, "%%%d.%df ", Fieldwidth,(Fieldwidth-2)/2);
}
break;
case 'c': Covariance = 1; break;
case 'e': Equation = 1; break;
case 'p': Partial = 1; Doreg = 1; break;
case 'r': Doreg = 0; break;
case 's': SumSquares = 1; break;
default: opterr++; break;
}
if (Equation)
Doreg = 1;
if (opterr)
USAGE ([-cprs] [variable names])
if (argc-optind > MAXVAR)
ERRMANY (variable names, MAXVAR)
usinfo ();
for (row = optind; row < argc; row++)
strncpy (Varname[row-optind], argv[row], MAXLEN);
if (Doreg)
if (Varname[REG][0] == '\0')
strcpy (Varname[REG], "REG");
for (row = (Doreg ? 1 : 0); row < MAXVAR; row++)
if (Varname[row][0] == '\0')
{
Varname[row][0] = 'A' - (Doreg ? 1 : 0) + row;
Varname[row][1] = '\0';
}
checkstdin ();
}
\f
/*FUNCTION main */
main (argc, argv) int argc; char **argv;
{
initial (argc, argv);
input ();
compute ();
if (Doreg)
regress ();
exit (0);
}
\f
/*FUNCTION input */
input ()
{
char line[MAXCHARS]; /* each data line read in here */
int ncols; /* number of columns in an input line */
char *in[MAXVAR]; /* each input column of line in here */
double temp[MAXVAR]; /* each input column number in here */
double minx[MAXVAR]; /* minimum value of each variable */
double maxx[MAXVAR]; /* maximum value of each variable */
register int row, col;
while (gets (line))
{
if ((ncols = parselin (line, in, MAXVAR)) == 0)
continue;
if (ncols > MAXVAR)
ERRMANY (input columns, MAXVAR)
for (col = 0; col < ncols; col++)
if (isna (in[col]))
break;
if (col < ncols)
{
NAcount++;
continue;
}
if (Ncases == 0)
{
Nvar = ncols;
for (row = 0; row < Nvar; row++)
minx[row] = maxx[row] = atof (in[row]);
}
else if (ncols != Nvar)
ERRRAGGED
for (row = 0; row < Nvar; row++)
{
if (number (in[row]))
temp[row] = atof (in[row]);
else
ERRNUM (in[row],input data)
sum[row] += temp[row];
if (temp[row] > maxx[row])
maxx[row] = temp[row];
if (temp[row] < minx[row])
minx[row] = temp[row];
for (col = 0; col <= row; col++)
covar[row][col] += temp[row] * temp[col];
}
Ncases++;
}
if (Ncases <= 1)
ERRDATA
printf ("Analysis for %d cases of %d variables:\n", Ncases, Nvar);
if (NAcount)
printf ("Missing cases ignored: %d\n", NAcount);
if (SumSquares)
printmat (covar, "Raw SS Matrix");
else
PRINTVEC (Varname[col], Sformat, "Variable")
PRINTVEC (minx[col], Fformat, "Min")
PRINTVEC (maxx[col], Fformat, "Max")
PRINTVEC (sum[col], Fformat, "Sum")
for (row = 0; row < Nvar; row++)
for (col = 0; col <= row; col++)
covar[row][col] -= sum[row]*sum[col]/Ncases;
PRINTVEC (sum[col]/Ncases, Fformat, "Mean")
PRINTVEC (sqrt (covar[col][col]/(Ncases-1)), Fformat, "SD");
if (Covariance)
printmat (covar, "Covariance Matrix");
}
\f
/*FUNCTION compute: calculate correlation matrix */
compute ()
{
register int row, col;
double denom;
for (row = 0; row < Nvar; row++)
{
for (col = 0; col < row; col++)
if (!fzero (denom = covar[row][row] * covar[col][col]))
correl[row][col] = covar[row][col]/sqrt (denom);
else correl[row][col] = 0.0;
correl[row][row] = 1.0;
}
printmat (correl, "Correlation Matrix");
}
\f
/*FUNCTION regress */
regress ()
{
register int row, col, var;
double pof (); /* probability of F */
double invert (); /* matrix inversion. returns det */
double invcor[MAXVAR][MAXVAR]; /* inverse correlation matrix */
double partial[MAXVAR][MAXVAR]; /* partial correlations */
double beta[MAXVAR]; /* standardized regression weights */
double b[MAXVAR]; /* regression coefficients slopes */
double a; /* intercept in regression equation */
double F; /* F ratio */
double Rsq; /* squared correlation coefficient */
double SEestsq; /* squared SE estimate */
int npred = Nvar - 1; /* number of predictors */
int dferror = Ncases - Nvar;/* degrees of freedom for error */
char Fline[20]; /* holds the F ratio line */
if (npred < 1)
ERRMSG0 (No predictor variables for regression)
if (dferror < 1)
ERRMSG0 (Not enough degrees of freedom for regression)
for (row = 0; row < Nvar; row++)
for (col = 0; col < row; col++)
correl[col][row] = correl[row][col]; /* make symmetric */
for (row = 0; row < Nvar; row++)
for (col = 0; col < Nvar; col++)
{
invcor[row][col] = correl[row][col]; /* copy */
if (row != 0 && col != 0)
partial[row-1][col-1] = correl[row][col];
}
if (fzero (invert (partial, Nvar-1)))
{
WARNING (These predictors seem to be non-orthogonal)
ERRMSG0 (Singular partial correlation matrix)
}
if (fzero (invert (invcor, Nvar)))
Rsq = 1.0;
else if fzero (invcor[REG][REG])
Rsq = 0.0;
else
Rsq = 1.0 - 1.0/invcor[REG][REG];
for (var = 1; var < Nvar; var++)
{
beta[var] = b[var] = 0.0;
for (col = 1; col < Nvar; col++)
beta[var] += partial[var-1][col-1] * correl[REG][col];
}
a = sum[REG];
for (var = 1; var < Nvar; var++)
if (!fzero (covar[var][var]))
{
b[var] = beta[var] * sqrt (covar[REG][REG]/covar[var][var]);
a -= b[var] * sum[var];
}
a /= Ncases;
putchar ('\n');
printf ("Regression Equation for %s:\n", Varname[REG]);
printf ("%s = ", Varname[REG]);
for (var = 1; var < Nvar; var++)
printf ("%.4g %s + ", b[var], Varname[var]);
printf ("%g\n", a);
if (Equation)
{
FILE *ioptr = fopen ("regress.eqn", "w");
if (ioptr)
{
fprintf (ioptr, "s1\n");
for (var = 1; var < Nvar; var++)
fprintf (ioptr, "(x%d * %.20g) + ", var+1, b[var]);
fprintf (ioptr, "%.20g\n", a);
VOID fclose (ioptr);
}
else
WARNING (could not open regression output file)
}
if (fzero (Rsq - 1.0))
F = MAXF;
else
F = Rsq * dferror / (npred * (1.0 - Rsq));
SEestsq = covar[REG][REG]*(1.0-Rsq)/dferror;
putchar ('\n');
printf ("Significance test for prediction of %s\n", Varname[REG]);
printf (Sformat, "Mult-R");
printf (Sformat, "R-Squared");
printf (Sformat, "SEest");
VOID sprintf (Fline, "F(%d,%d)", npred, dferror);
printf (Sformat, Fline);
printf (Sformat, "prob (F)");
putchar ('\n');
printf (Fformat, sqrt (Rsq));
printf (Fformat, Rsq);
printf (Fformat, sqrt (SEestsq));
printf (Fformat, F);
printf (Fformat, pof (F, npred, dferror));
putchar ('\n');
if (Partial)
{
putchar ('\n');
printf ("Significance test(s) for predictor(s) of %s\n", Varname[REG]);
printf (Lformat, "Predictor");
printf (Sformat, "beta");
printf (Sformat, "b");
printf (Sformat, "Rsq");
printf (Sformat, "se");
VOID sprintf (Fline, "t(%d)", dferror);
printf (Sformat, Fline);
/*
VOID sprintf (Fline, "F(1,%d)", dferror);
printf (Sformat, Fline);
*/
printf (Sformat, "p");
putchar ('\n');
for (var = 1; var < Nvar; var++)
{
double rsq = 0.0;
double SEsq;
double Fvar = 0.0;
double p;
double SSres;
if (!fzero (partial[var-1][var-1]))
rsq = 1.0 - 1.0/partial[var-1][var-1];
else
rsq = 0.0;
SSres = covar[var][var] * (1.0 - rsq);
if (SSres > FZERO && SEestsq > FZERO)
{
SEsq = SEestsq/SSres;
Fvar = b[var]*b[var] / SEsq;
p = pof (Fvar, 1, dferror);
}
else
{
SEsq = 0.0;
Fvar = MAXF;
p = 0.0;
}
printf (Lformat, Varname[var]);
printf (Fformat, beta[var]);
printf (Fformat, b[var]);
printf (Fformat, rsq);
printf (Fformat, sqrt (SEsq));
printf (Fformat, sqrt (Fvar));
/* printf (Fformat, Fvar); */
printf (Fformat, p);
putchar ('\n');
}
}
}
\f
/*FUNCTION invert: returns determinant of input matrix. modifies matrix. */
double
invert (matrix, size)
double matrix[MAXVAR][MAXVAR];
int size;
{
int j,k,l;
double pivot;
double temp;
double det = 1.0;
for (j = 0; j < size; j++)
{
pivot = matrix[j][j];
det *= pivot;
matrix[j][j] = 1.0;
for (k = 0; k < size; k++)
if (!fzero (pivot))
matrix[j][k] /= pivot;
else
return (0.0);
for (k = 0; k < size; k++)
if (k != j)
{
temp = matrix[k][j];
matrix[k][j] = 0.0;
for (l = 0; l < size; l++)
matrix[k][l] -= matrix[j][l] * temp;
}
}
return (det);
}
\f
/*FUNCTION usinfo */
usinfo ()
{
if (InfoVersion)
pver (Version);
if (InfoLimits)
{
plim (Argv0);
const (MAXVAR, "maximum number of variables");
const (MAXLEN, "maximum length of variable names");
const (MAXCHARS, "maximum number of characters in lines");
const (MAXWIDTH, "maximum width of output fields");
const (MINWIDTH, "minimum width of output fields");
}
if (InfoOptions)
{
ppgm (Argv0, Purpose);
lopt ('c', "print covariance matrix", Covariance);
lopt ('e', "save regression equation in regress.eqn", Equation);
iopt ('F', "width", "width of output fields", Fieldwidth);
lopt ('p', "partial correlation analysis", Partial);
lopt ('r', "print regression analysis", Doreg);
lopt ('s', "print sums of squares", SumSquares);
oper ("[names]", "variable names", "[REG] A B ...");
}
if (InfoVersion || InfoLimits || InfoOptions)
exit (0);
}
printmat (mat, label)
double mat[MAXVAR][MAXVAR];
char *label;
{
int row, col;
printf ("\n%s:\n", label);
for (row = 0; row < Nvar; row++)
{
printf (Lformat, Varname[row]);
for (col = 0; col <= row; col++)
printf (Fformat, mat[row][col]);
putchar ('\n');
}
PRINTVEC (Varname[col], Sformat, "Variable")
}