|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - downloadIndex: ┃ T r ┃
Length: 8886 (0x22b6) Types: TextFile Names: »regress.c«
└─⟦87ddcff64⟧ Bits:30001253 CPHDIST85 Tape, 1985 Autumn Conference Copenhagen └─ ⟦this⟧ »cph85dist/stat/src/regress.c«
#include "unixstat.h" PGM(regress,Multiple Linear Regression,5.0,2/1/85) /* Copyright (c) 1982 Gary Perlman (see Copyright file) */ #define MAXVAR 20 /* max number of variables */ #define MAXLEN 20 /* maximum length of a string */ #define REG 0 /* column number of regressor */ char Varname[MAXVAR][MAXLEN]; /* names of variables */ int Npoints = 0; /* number of points/lines read in */ int Nvar; /* number of variables read in */ double correl[MAXVAR][MAXVAR]; /* correlation matrix */ double covar[MAXVAR][MAXVAR]; /* covariance matrix */ double sum[MAXVAR]; /* sum of scores for each variable */ #define SFORMAT "%10.10s " /* format of strings */ #define LFORMAT "%-10.10s " /* left justified strings */ #define FFORMAT "%10.4f " /* floating point format */ #define PRINTVEC(var,format,label)\ {\ printf (LFORMAT, label);\ for (col = 0; col < Nvar; col++)\ printf (format, var);\ putchar ('\n');\ } /* OPTIONS */ int Doreg; /* should regression be done? */ int SumSquares; /* print raw sums of squares */ int Covariance; /* print covariance matrix */ int Partial; /* partial correlation analysis */ initial (argc, argv) char **argv; { int row; extern int optind; extern char *optarg; int C; int opterr = 0; ARGV0; if (checkargv ("regress", Argv0)) Doreg = 1; while ((C = getopt (argc, argv, "cps")) != EOF) switch (C) { case 'c': Covariance = 1; break; case 'p': Partial = 1; Doreg = 1; break; case 's': SumSquares = 1; break; default: opterr++; break; } if (opterr) USAGE ([-scp] [variable names]) if (argc-optind > MAXVAR) ERRMANY (variable names, MAXVAR) for (row = optind; row < argc; row++) strncpy (Varname[row-optind], argv[row], MAXLEN); if (Doreg) if (Varname[REG][0] == '\0') strcpy (Varname[REG], "REG"); for (row = Doreg; row < MAXVAR; row++) if (Varname[row][0] == '\0') { Varname[row][0] = 'A' - Doreg + row; Varname[row][1] = '\0'; } checkstdin (Argv0); } main (argc, argv) int argc; char **argv; { initial (argc, argv); input (); compute (); if (Doreg) regress (); exit (0); } input () { char line[BUFSIZ]; /* each data line read in here */ int ncols; /* number of culumns in an input line */ char *in[MAXVAR]; /* each input column of line in here */ double temp[MAXVAR]; /* each input column number in here */ double minx[MAXVAR]; /* minimum value of each variable */ double maxx[MAXVAR]; /* maximum value of each variable */ register int row, col; while (gets (line)) { if ((ncols = parseline (line, in, MAXVAR)) == 0) continue; if (ncols > MAXVAR) ERRMANY (input columns, MAXVAR) if (Npoints == 0) { Nvar = ncols; for (row = 0; row < Nvar; row++) minx[row] = maxx[row] = atof (in[row]); } else if (ncols != Nvar) ERRRAGGED for (row = 0; row < Nvar; row++) { if (number (in[row])) temp[row] = atof (in[row]); else ERRNUM (in[row],input data) sum[row] += temp[row]; if (temp[row] > maxx[row]) maxx[row] = temp[row]; if (temp[row] < minx[row]) minx[row] = temp[row]; for (col = 0; col <= row; col++) covar[row][col] += temp[row] * temp[col]; } Npoints++; } if (Npoints <= 1) ERRDATA printf ("Analysis for %d points of %d variables:\n", Npoints, Nvar); PRINTVEC (Varname[col], SFORMAT, "Variable") PRINTVEC (minx[col], FFORMAT, "Min") PRINTVEC (maxx[col], FFORMAT, "Max") PRINTVEC (sum[col], FFORMAT, "Sum") if (SumSquares) printmat (covar, "Raw SS Matrix"); for (row = 0; row < Nvar; row++) for (col = 0; col <= row; col++) covar[row][col] -= sum[row]*sum[col]/Npoints; PRINTVEC (sum[col]/Npoints, FFORMAT, "Mean") PRINTVEC (sqrt (covar[col][col]/(Npoints-1)), FFORMAT, "SD"); if (Covariance) printmat (covar, "Covariance Matrix"); } compute () { register int row, col; double denom; for (row = 0; row < Nvar; row++) { for (col = 0; col < row; col++) if (!fzero (denom = covar[row][row] * covar[col][col])) correl[row][col] = covar[row][col]/sqrt (denom); else correl[row][col] = 0.0; correl[row][row] = 1.0; } printmat (correl, "Correlation Matrix"); } printmat (mat, label) double mat[MAXVAR][MAXVAR]; char *label; { int row, col; printf ("\n%s:\n", label); for (row = 0; row < Nvar; row++) { printf (LFORMAT, Varname[row]); for (col = 0; col <= row; col++) printf (FFORMAT, mat[row][col]); putchar ('\n'); } PRINTVEC (Varname[col], SFORMAT, "Variable") } regress () { register int row, col, var; double pof (); /* probability of F */ double invert (); /* matrix inversion. returns det */ double invcor[MAXVAR][MAXVAR]; /* inverse correlation matrix */ double partial[MAXVAR][MAXVAR]; /* partial correlations */ double beta[MAXVAR]; /* standardized regression weights */ double b[MAXVAR]; /* regression coefficients slopes */ double a; /* intercept in regression equation */ double F; /* F ratio */ double Rsq; /* squared correlation coefficient */ int npred = Nvar - 1; /* number of predictors */ int dferror = Npoints - Nvar;/* degrees of freedom for error */ char Fline[20]; /* holds the F ratio line */ if (npred < 1) ERRMSG0 (No predictor variables for regression) if (dferror < 1) ERRMSG0 (Not enough degrees of freedom for regression) for (row = 0; row < Nvar; row++) for (col = 0; col < row; col++) correl[col][row] = correl[row][col]; /* make symmetric */ for (row = 0; row < Nvar; row++) for (col = 0; col < Nvar; col++) { invcor[row][col] = correl[row][col]; /* copy */ if (row != 0 && col != 0) partial[row-1][col-1] = correl[row][col]; } if (fzero (invert (invcor, Nvar))) ERRMSG0 (Singular correlation matrix) if (fzero (invert (partial, Nvar-1))) ERRMSG0 (Singular partial correlation matrix) a = sum[REG]; for (var = 1; var < Nvar; var++) if (!fzero (invcor[REG][REG] * covar[var][var])) { beta[var] = -invcor[REG][var] / invcor[REG][REG]; b[var] = beta[var] * sqrt (covar[REG][REG]/covar[var][var]); a -= b[var] * sum[var]; } else beta[var] = b[var] = 0.0; a /= Npoints; putchar ('\n'); printf ("Regression Equation for %s:\n", Varname[REG]); printf ("%s = ", Varname[REG]); for (var = 1; var < Nvar; var++) printf ("%.4g %s + ", b[var], Varname[var]); printf ("%g\n", a); Rsq = (!fzero (invcor[REG][REG])) ? 1.0 - 1.0/invcor[REG][REG] : 0.0; F = (Rsq == 1.0) ? 99999.999 : Rsq * dferror / (npred * (1.0 - Rsq)); putchar ('\n'); printf ("Significance test for prediction of %s\n", Varname[REG]); printf (SFORMAT, "Mult-R"); printf (SFORMAT, "R-Squared"); VOID sprintf (Fline, "F(%d,%d)", npred, dferror); printf (SFORMAT, Fline); printf (SFORMAT, "prob (F)"); putchar ('\n'); printf (FFORMAT, sqrt (Rsq)); printf (FFORMAT, Rsq); printf (FFORMAT, F); printf (FFORMAT, pof (F, npred, dferror)); putchar ('\n'); if (Partial) { double SEestsq = covar[REG][REG]*(1.0-Rsq)/dferror; putchar ('\n'); printf ("Significance test(s) for predictor(s) of %s\n", Varname[REG]); /* printf ("SEest = %g ", sqrt (SEestsq)); */ printf (LFORMAT, "Predictor"); printf (SFORMAT, "beta"); printf (SFORMAT, "b"); printf (SFORMAT, "Rsq"); VOID sprintf (Fline, "t(%d)", dferror); printf (SFORMAT, Fline); VOID sprintf (Fline, "F(1,%d)", dferror); printf (SFORMAT, Fline); printf (SFORMAT, "p"); putchar ('\n'); for (var = 1; var < Nvar; var++) { double rsq = 0.0; double SEsq; double F = 0.0; double p; double SSres; if (!fzero (partial[var-1][var-1])) rsq = 1.0 - 1.0/partial[var-1][var-1]; else rsq = 0.0; SSres = covar[var][var] * (1.0 - rsq); if (SSres > FZERO && SEestsq > FZERO) { SEsq = SEestsq/SSres; F = b[var]*b[var] / SEsq; p = pof (F, 1, dferror); } else { F = 99999.9999; p = 0.0; } printf (LFORMAT, Varname[var]); printf (FFORMAT, beta[var]); printf (FFORMAT, b[var]); printf (FFORMAT, rsq); printf (FFORMAT, sqrt (F)); printf (FFORMAT, F); printf (FFORMAT, p); putchar ('\n'); } } } /* returns the determinant of its input matrix. modifies matrix. */ double invert (matrix, size) double matrix[MAXVAR][MAXVAR]; int size; { int j,k,l; double pivot; double temp; double det = 1.0; for (j = 0; j < size; j++) { pivot = matrix[j][j]; det *= pivot; matrix[j][j] = 1.0; for (k = 0; k < size; k++) if (!fzero (pivot)) matrix[j][k] /= pivot; else return (0.0); for (k = 0; k < size; k++) if (k != j) { temp = matrix[k][j]; matrix[k][j] = 0.0; for (l = 0; l < size; l++) matrix[k][l] -= matrix[j][l] * temp; } } return (det); }