DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - download
Index: ┃ T r

⟦67e1a292d⟧ TextFile

    Length: 8886 (0x22b6)
    Types: TextFile
    Names: »regress.c«

Derivation

└─⟦87ddcff64⟧ Bits:30001253 CPHDIST85 Tape, 1985 Autumn Conference Copenhagen
    └─ ⟦this⟧ »cph85dist/stat/src/regress.c« 

TextFile

#include "unixstat.h"
PGM(regress,Multiple Linear Regression,5.0,2/1/85)
/* Copyright (c) 1982 Gary Perlman (see Copyright file) */

#define MAXVAR          20       /* max number of variables */
#define MAXLEN          20       /* maximum length of a string */
#define REG              0       /* column number of regressor */
char	Varname[MAXVAR][MAXLEN]; /* names of variables */
int 	Npoints = 0;             /* number of points/lines read in */
int 	Nvar;                    /* number of variables read in */
double	correl[MAXVAR][MAXVAR];  /* correlation matrix */
double	covar[MAXVAR][MAXVAR];   /* covariance matrix */
double	sum[MAXVAR];             /* sum of scores for each variable */

#define	SFORMAT	"%10.10s "       /* format of strings */
#define	LFORMAT	"%-10.10s "      /* left justified strings */
#define	FFORMAT	"%10.4f "        /* floating point format */

#define	PRINTVEC(var,format,label)\
	{\
	printf (LFORMAT, label);\
	for (col = 0; col < Nvar; col++)\
		printf (format, var);\
	putchar ('\n');\
	}

/* OPTIONS */
int 	Doreg;           /* should regression be done? */
int 	SumSquares;      /* print raw sums of squares */
int 	Covariance;      /* print covariance matrix */
int 	Partial;         /* partial correlation analysis */

initial (argc, argv) char **argv;
	{
	int 	row;
	extern	int 	optind;
	extern	char	*optarg;
	int 	C;
	int 	opterr = 0;
	ARGV0;
	if (checkargv ("regress", Argv0)) Doreg = 1;
	while ((C = getopt (argc, argv, "cps")) != EOF)
		switch (C)
			{
			case 'c': Covariance = 1; break;
			case 'p': Partial = 1; Doreg = 1; break;
			case 's': SumSquares = 1; break;
			default: opterr++; break;
			}
	if (opterr)
		USAGE ([-scp] [variable names])
	if (argc-optind > MAXVAR)
		ERRMANY (variable names, MAXVAR)
	for (row = optind; row < argc; row++)
		strncpy (Varname[row-optind], argv[row], MAXLEN);
	if (Doreg)
		if (Varname[REG][0] == '\0')
			strcpy (Varname[REG], "REG");
	for (row = Doreg; row < MAXVAR; row++)
		if (Varname[row][0] == '\0')
			{
			Varname[row][0] = 'A' - Doreg + row;
			Varname[row][1] = '\0';
			}
	checkstdin (Argv0);
	}

main (argc, argv) int argc; char **argv;
	{
	initial (argc, argv);
	input ();
	compute ();
	if (Doreg)
		regress ();
	exit (0);
	}

input ()
	{
	char	line[BUFSIZ];  /* each data line read in here */
	int 	ncols;         /* number of culumns in an input line */
	char	*in[MAXVAR];   /* each input column of line in here */
	double	temp[MAXVAR];  /* each input column number in here */
	double	minx[MAXVAR];   /* minimum value of each variable */
	double	maxx[MAXVAR];   /* maximum value of each variable */
	register int row, col;
	while (gets (line))
		{
		if ((ncols = parseline (line, in, MAXVAR)) == 0) continue;
		if (ncols > MAXVAR)
			ERRMANY (input columns, MAXVAR)
		if (Npoints == 0)
			{
			Nvar = ncols;
			for (row = 0; row < Nvar; row++)
				minx[row] = maxx[row] = atof (in[row]);
			}
		else if (ncols != Nvar)
			ERRRAGGED
		for (row = 0; row < Nvar; row++)
			{
			if (number (in[row]))
				temp[row] = atof (in[row]);
			else
				ERRNUM (in[row],input data)
			sum[row] += temp[row];
			if (temp[row] > maxx[row]) maxx[row] = temp[row];
			if (temp[row] < minx[row]) minx[row] = temp[row];
			for (col = 0; col <= row; col++)
				covar[row][col] += temp[row] * temp[col];
			}
		Npoints++;
		}
	if (Npoints <= 1)
		ERRDATA
	printf ("Analysis for %d points of %d variables:\n", Npoints, Nvar);
	PRINTVEC (Varname[col], SFORMAT, "Variable")
	PRINTVEC (minx[col], FFORMAT, "Min")
	PRINTVEC (maxx[col], FFORMAT, "Max")
	PRINTVEC (sum[col], FFORMAT, "Sum")
	if (SumSquares)
		printmat (covar, "Raw SS Matrix");
	for (row = 0; row < Nvar; row++)
		for (col = 0; col <= row; col++)
			covar[row][col] -= sum[row]*sum[col]/Npoints;
	PRINTVEC (sum[col]/Npoints, FFORMAT, "Mean")
	PRINTVEC (sqrt (covar[col][col]/(Npoints-1)), FFORMAT, "SD");
	if (Covariance)
		printmat (covar, "Covariance Matrix");
	}

compute ()
	{
	register int row, col;
	double	denom;
	for (row = 0; row < Nvar; row++)
		{
		for (col = 0; col < row; col++)
			if (!fzero (denom = covar[row][row] * covar[col][col]))
				correl[row][col] = covar[row][col]/sqrt (denom);
			else correl[row][col] = 0.0;
		correl[row][row] = 1.0;
		}
	printmat (correl, "Correlation Matrix");
	}

printmat (mat, label)
double	mat[MAXVAR][MAXVAR];
char	*label;
	{
	int 	row, col;
	printf ("\n%s:\n", label);
	for (row = 0; row < Nvar; row++)
		{
		printf (LFORMAT, Varname[row]);
		for (col = 0; col <= row; col++)
			printf (FFORMAT, mat[row][col]);
		putchar ('\n');
		}
	PRINTVEC (Varname[col], SFORMAT, "Variable")
	}

regress ()
	{
	register int row, col, var;
	double	pof ();                  /* probability of F */
	double	invert ();       	     /* matrix inversion. returns det */
	double	invcor[MAXVAR][MAXVAR];  /* inverse correlation matrix */
	double	partial[MAXVAR][MAXVAR]; /* partial correlations */
	double	beta[MAXVAR];            /* standardized regression weights */
	double	b[MAXVAR];               /* regression coefficients slopes */
	double	a;                       /* intercept in regression equation */
	double	F;                       /* F ratio */
	double	Rsq;                     /* squared correlation coefficient */
	int 	npred = Nvar - 1;        /* number of predictors */
	int 	dferror = Npoints - Nvar;/* degrees of freedom for error */
	char	Fline[20];               /* holds the F ratio line */
	if (npred < 1)
		ERRMSG0 (No predictor variables for regression)
	if (dferror < 1)
		ERRMSG0 (Not enough degrees of freedom for regression)
	for (row = 0; row < Nvar; row++)
		for (col = 0; col < row; col++)
			correl[col][row] = correl[row][col]; /* make symmetric */
	for (row = 0; row < Nvar; row++)
		for (col = 0; col < Nvar; col++)
			{
			invcor[row][col] = correl[row][col]; /* copy */
			if (row != 0 && col != 0)
				partial[row-1][col-1] = correl[row][col];
			}
	if (fzero (invert (invcor, Nvar)))
		ERRMSG0 (Singular correlation matrix)
	if (fzero (invert (partial, Nvar-1)))
		ERRMSG0 (Singular partial correlation matrix)
	a = sum[REG];
	for (var = 1; var < Nvar; var++)
		if (!fzero (invcor[REG][REG] * covar[var][var]))
			{
			beta[var] = -invcor[REG][var] / invcor[REG][REG];
			b[var] = beta[var] * sqrt (covar[REG][REG]/covar[var][var]);
			a -= b[var] * sum[var];
			}
		else beta[var] = b[var] = 0.0;
	a /= Npoints;
	putchar ('\n');
	printf ("Regression Equation for %s:\n", Varname[REG]);
	printf ("%s  =  ", Varname[REG]);
	for (var = 1; var < Nvar; var++)
		printf ("%.4g %s  +  ", b[var], Varname[var]);
	printf ("%g\n", a);
	Rsq = (!fzero (invcor[REG][REG])) ? 1.0 - 1.0/invcor[REG][REG] : 0.0;
	F = (Rsq == 1.0) ? 99999.999 : Rsq * dferror / (npred * (1.0 - Rsq));
	putchar ('\n');
	printf ("Significance test for prediction of %s\n", Varname[REG]);
	printf (SFORMAT, "Mult-R");
	printf (SFORMAT, "R-Squared");
	VOID sprintf (Fline, "F(%d,%d)", npred, dferror);
	printf (SFORMAT, Fline);
	printf (SFORMAT, "prob (F)");
	putchar ('\n');
	printf (FFORMAT, sqrt (Rsq));
	printf (FFORMAT, Rsq);
	printf (FFORMAT, F);
	printf (FFORMAT, pof (F, npred, dferror));
	putchar ('\n');
	if (Partial)
		{
		double	SEestsq = covar[REG][REG]*(1.0-Rsq)/dferror;
		putchar ('\n');
		printf ("Significance test(s) for predictor(s) of %s\n", Varname[REG]);
		/* printf ("SEest = %g ", sqrt (SEestsq)); */
		printf (LFORMAT, "Predictor");
		printf (SFORMAT, "beta");
		printf (SFORMAT, "b");
		printf (SFORMAT, "Rsq");
		VOID sprintf (Fline, "t(%d)", dferror);
		printf (SFORMAT, Fline);
		VOID sprintf (Fline, "F(1,%d)", dferror);
		printf (SFORMAT, Fline);
		printf (SFORMAT, "p");
		putchar ('\n');
		for (var = 1; var < Nvar; var++)
			{
			double	rsq = 0.0;
			double	SEsq;
			double	F = 0.0;
			double	p;
			double	SSres;
			if (!fzero (partial[var-1][var-1]))
				rsq = 1.0 - 1.0/partial[var-1][var-1];
			else rsq = 0.0;
			SSres = covar[var][var] * (1.0 - rsq);
			if (SSres > FZERO && SEestsq > FZERO)
				{
				SEsq = SEestsq/SSres;
				F = b[var]*b[var] / SEsq;
				p = pof (F, 1, dferror);
				}
			else
				{
				F = 99999.9999;
				p = 0.0;
				}
			printf (LFORMAT, Varname[var]);
			printf (FFORMAT, beta[var]);
			printf (FFORMAT, b[var]);
			printf (FFORMAT, rsq);
			printf (FFORMAT, sqrt (F));
			printf (FFORMAT, F);
			printf (FFORMAT, p);
			putchar ('\n');
			}
		}
	}

/* returns the determinant of its input matrix. modifies matrix. */
double
invert (matrix, size) double matrix[MAXVAR][MAXVAR]; int size;
	{
	int	j,k,l;
	double	pivot;
	double	temp;
	double	det = 1.0;
	for (j = 0; j < size; j++)
		{
		pivot = matrix[j][j];
		det *= pivot;
		matrix[j][j] = 1.0;
		for (k = 0; k < size; k++)
			if (!fzero (pivot)) matrix[j][k] /= pivot;
			else return (0.0);
		for (k = 0; k < size; k++)
			if (k != j)
				{
				temp = matrix[k][j];
				matrix[k][j] = 0.0;
				for (l = 0; l < size; l++)
					matrix[k][l] -= matrix[j][l] * temp;
				}
		}
	return (det);
	}