2020-10-14 14:28:59 +00:00
|
|
|
unit RegressionUnit;
|
|
|
|
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
{$modeswitch advancedrecords}
|
|
|
|
|
|
|
|
interface
|
|
|
|
|
|
|
|
uses
|
|
|
|
Classes, SysUtils,
|
|
|
|
MatrixUnit;
|
|
|
|
|
2020-10-17 22:42:41 +00:00
|
|
|
|
|
|
|
type
|
|
|
|
ERegression = class(Exception);
|
|
|
|
|
2020-10-14 14:28:59 +00:00
|
|
|
type
|
2020-10-17 22:42:41 +00:00
|
|
|
TRegressionError = (regOK, regTooFewValues, regStdDevZero);
|
|
|
|
|
2020-10-14 14:28:59 +00:00
|
|
|
TBivariateRegressionResults = record
|
2020-10-17 22:42:41 +00:00
|
|
|
public
|
2020-10-14 14:28:59 +00:00
|
|
|
Slope, Intercept: Double;
|
|
|
|
XMean, YMean: Double;
|
|
|
|
XVariance, YVariance: Double;
|
|
|
|
XStdDev, YStdDev: Double;
|
|
|
|
StdErrorPredicted: Double;
|
|
|
|
R: Double;
|
|
|
|
t: Double;
|
|
|
|
SXX, SXY, SYY: Double;
|
|
|
|
Count, DF: Integer;
|
|
|
|
function ConfidenceLimits(x: Double; Upper: Boolean): Double;
|
|
|
|
end;
|
|
|
|
|
|
|
|
procedure BivariateRegression(const xData, yData: TDblVector;
|
|
|
|
AConfLevel: Double; out AResults: TBivariateRegressionResults);
|
|
|
|
|
|
|
|
type
|
|
|
|
TMultipleRegressionResults = record
|
2020-10-17 22:42:41 +00:00
|
|
|
private
|
|
|
|
procedure WriteMatrixReport(AReport: TStrings; AMatrix: TDblMatrix;
|
|
|
|
ATitle: String; AVarNames: TStringArray);
|
|
|
|
public
|
|
|
|
Coeffs: TDblVector; // Coefficients of the factors
|
|
|
|
CoeffStdErrors: TDblVector; // Standard errors of the coefficients
|
|
|
|
|
|
|
|
Beta: TDblVector; // Standardized coefficients
|
|
|
|
VarCovar: TDblMatrix; // Variance-covariance matrix
|
|
|
|
Correlations: TDblMatrix; // Correlations matrix
|
|
|
|
|
|
|
|
SStotal, SSreg, SSresid: Double; // Sum of squares
|
|
|
|
NumCases, NumDepVars: Integer; // number of cases, number of dependent variables
|
|
|
|
R2: Double; // coefficient of determination
|
|
|
|
AdjR2: Double; // Adjusted coefficient of determination
|
|
|
|
StdErrorEstimate: Double; // standard error of estimate
|
|
|
|
t: TDblVector;
|
|
|
|
p: TDblVector; // Probability for > t
|
|
|
|
F: Double;
|
|
|
|
Probability: Double; // Probability for > F
|
|
|
|
|
|
|
|
Means: TDblVector;
|
|
|
|
Variances: TDblVector;
|
|
|
|
StdDevs: TDblVector;
|
|
|
|
(*
|
|
|
|
XMeans: TDblVector; // Means of the x columns
|
|
|
|
XVariances: TDblVector; // Variances of the x columns
|
|
|
|
XStdDevs: TDblVector; // Std Deviations of the x columns
|
|
|
|
*)
|
|
|
|
MeanY: Double; // Mean of y column
|
|
|
|
VarianceY: Double; // Variance of y column
|
|
|
|
StdDevY: Double; // Std Deviation of y column
|
|
|
|
|
|
|
|
procedure WriteANOVAReport(AReport: TStrings);
|
|
|
|
procedure WriteCoeffsReport(AReport: TStrings; AVarNames: TStringArray);
|
|
|
|
procedure WriteCorrelationReport(AReport: TStrings; AVarNames: TStringArray);
|
|
|
|
procedure WriteVarCovarReport(AReport: TStrings; AVarNames: TStringArray);
|
2020-10-14 14:28:59 +00:00
|
|
|
end;
|
|
|
|
|
2020-10-17 22:42:41 +00:00
|
|
|
function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
|
|
|
|
AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError;
|
2020-10-14 14:28:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
implementation
|
|
|
|
|
|
|
|
uses
|
2020-10-17 22:42:41 +00:00
|
|
|
StrUtils,
|
2020-10-14 14:28:59 +00:00
|
|
|
MathUnit;
|
|
|
|
|
|
|
|
{===============================================================================
|
|
|
|
Bivariate regression
|
|
|
|
|
|
|
|
Calculates a bivariate regression y = a x + b
|
|
|
|
|
|
|
|
x is a vector of independent observations, y of dependent observations.
|
|
|
|
Both vectors must have the same length.
|
|
|
|
|
|
|
|
It is assumed that xData and yData contain at least 3 elements and
|
|
|
|
have the same count of elements.
|
|
|
|
===============================================================================}
|
|
|
|
procedure BivariateRegression(const xData, yData: TDblVector;
|
|
|
|
AConfLevel: Double; out AResults: TBivariateRegressionResults);
|
|
|
|
var
|
|
|
|
i: Integer;
|
|
|
|
begin
|
|
|
|
with AResults do
|
|
|
|
begin
|
|
|
|
Count := Length(xData);
|
|
|
|
|
|
|
|
// Calculate means, variances, stddevs
|
|
|
|
VecMeanVarStdDevSS(xData, XMean, XVariance, XStdDev, SXX);
|
|
|
|
VecMeanVarStdDevSS(yData, YMean, YVariance, YStdDev, SYY);
|
|
|
|
|
|
|
|
SXY := 0;
|
|
|
|
for i := 0 to Count-1 do
|
|
|
|
SXY := SXY + xData[i] * yData[i];
|
|
|
|
|
|
|
|
R := (SXY - XMean * YMean * Count) / ((Count - 1) * XStdDev * YStdDev);
|
|
|
|
StdErrorPredicted := sqrt(1.0 - sqr(R)) * YStdDev * sqrt((Count - 1) / (Count - 2));
|
|
|
|
|
|
|
|
Slope := R * YStdDev / XStdDev;
|
|
|
|
Intercept := YMean - Slope * XMean;
|
|
|
|
|
|
|
|
DF := Count - 2;
|
|
|
|
t := InverseT(AConfLevel, DF);
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
function TBivariateRegressionResults.ConfidenceLimits(x: Double; Upper: Boolean): Double;
|
|
|
|
var
|
|
|
|
yPred, seData: Double;
|
|
|
|
begin
|
|
|
|
yPred := Intercept + Slope * x;
|
|
|
|
seData := StdErrorPredicted * sqrt(1.0 + 1/Count + sqr(x - XMean)/SXX);
|
|
|
|
if Upper then
|
|
|
|
Result := yPred + t*seData
|
|
|
|
else
|
|
|
|
Result := yPred - t*seData;
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
{===============================================================================
|
|
|
|
Multiple regression
|
|
|
|
===============================================================================}
|
2020-10-17 22:42:41 +00:00
|
|
|
function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
|
|
|
|
AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError;
|
|
|
|
var
|
|
|
|
i, j, nx, mx, ny: Integer;
|
|
|
|
X, XT, XT_X, inv_XT_X: TDblMatrix;
|
|
|
|
XT_Y: TDblVector;
|
|
|
|
SY, SSY: Double;
|
|
|
|
DFresid: Integer;
|
2020-10-14 14:28:59 +00:00
|
|
|
begin
|
2020-10-17 22:42:41 +00:00
|
|
|
MatCheck(xData);
|
|
|
|
MatSize(xData, nx, mx);
|
|
|
|
ny := Length(yData);
|
|
|
|
if nx <> ny then
|
|
|
|
raise ERegression.Create('Dimension error.');
|
|
|
|
|
|
|
|
if nx < 2 then begin
|
|
|
|
Result := regTooFewValues;
|
|
|
|
exit;
|
|
|
|
end;
|
|
|
|
|
|
|
|
// Augmented x matrix: append ones vector for intercept
|
|
|
|
X := MatAppendColVector(xData, VecOnes(ny));
|
|
|
|
|
|
|
|
// Transposed augmented matrix
|
|
|
|
XT := MatTransposed(X);
|
|
|
|
|
|
|
|
// Product of augmented matrix with augmented Y
|
|
|
|
XT_Y := XT * yData;
|
|
|
|
|
|
|
|
// Product of transposed augmented with augmented matrix
|
|
|
|
XT_X := XT * X;
|
|
|
|
|
|
|
|
// Product of inverse of XT_X with XT_Y yields the coefficients. Intercept is last.
|
|
|
|
inv_XT_X := MatInverse(XT_X);
|
|
|
|
AResults.Coeffs := inv_XT_X * XT_Y;
|
|
|
|
|
|
|
|
// THe XT_X matrix contains the sum of squares (diagonal elements), sum of
|
|
|
|
// cross-products (off-diagonal elements and the sum the variable values
|
|
|
|
// (augmented column).
|
|
|
|
SetLength(AResults.Means, mx+1);
|
|
|
|
SetLength(AResults.Variances, mx+1);
|
|
|
|
SetLength(AResults.StdDevs, mx+1);
|
|
|
|
for i := 0 to mx do
|
|
|
|
begin
|
|
|
|
AResults.Means[i] := XT_X[i, mx] / nx;
|
|
|
|
AResults.Variances[i] := (XT_X[i, i] - sqr(AResults.Means[i]) * nx) / (nx - 1);
|
|
|
|
AResults.StdDevs[i] := sqrt(AResults.Variances[i]);
|
|
|
|
{
|
|
|
|
if AResults.StdDevs[i] = 0.0 then
|
|
|
|
begin
|
|
|
|
Result := regStdDevZero;
|
|
|
|
exit;
|
|
|
|
end;
|
|
|
|
}
|
|
|
|
end;
|
|
|
|
|
|
|
|
// Variance-covariance matrix
|
|
|
|
SetLength(AResults.VarCovar, mx, mx);
|
|
|
|
SetLength(AResults.Correlations, mx, mx);
|
|
|
|
for i := 0 to mx-1 do
|
|
|
|
for j := 0 to mx-1 do
|
|
|
|
begin
|
|
|
|
AResults.VarCovar[i, j] := (XT_X[i, j] - XT_X[i, mx] * XT_X[j, mx] / nx) / (nx - 1);
|
|
|
|
AResults.Correlations[i, j] := AResults.VarCovar[i, j] / (AResults.StdDevs[i] * AResults.StdDevs[j]);
|
|
|
|
end;
|
|
|
|
|
|
|
|
with AResults do
|
|
|
|
begin
|
|
|
|
NumCases := nx;
|
|
|
|
NumDepVars := mx;
|
|
|
|
DFresid := NumCases - NumDepVars - 1;
|
|
|
|
|
|
|
|
// Calculate column means of xData
|
|
|
|
// MatColMeanVarStdDev(X, XMeans, XVariances, XStdDevs);
|
|
|
|
|
|
|
|
// Basic stats of y data column
|
|
|
|
VecSumSS(yData, SY, SSY);
|
|
|
|
MeanY := SY / NumCases;
|
|
|
|
SStotal := SSY - sqr(MeanY) * NumCases; // needed for ANOVA
|
|
|
|
VarianceY := SStotal / (NumCases - 1);
|
|
|
|
StdDevY := sqrt(VarianceY);
|
|
|
|
{
|
|
|
|
if StdDevY = 0 then
|
|
|
|
begin
|
|
|
|
Result := regStdDevZero;
|
|
|
|
exit;
|
|
|
|
end;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Standardized coefficients
|
|
|
|
Beta := VecMultiply(Coeffs, StdDevs) * (1.0/StdDevY);
|
|
|
|
|
|
|
|
// Get standard errors, squared multiple correlation, tests of significance
|
|
|
|
SetLength(CoeffStdErrors, mx+1);
|
|
|
|
SetLength(t, mx+1);
|
|
|
|
SetLength(p, mx+1);
|
|
|
|
|
|
|
|
SSresid := SSY - Coeffs * XT_Y;
|
|
|
|
SSreg := SStotal - SSresid;
|
|
|
|
StdErrorEstimate := sqrt(SSresid / DFresid);
|
|
|
|
R2 := SSreg / SStotal;
|
|
|
|
AdjR2 := 1.0 - (1.0 - R2) * (NumCases - 1) / DFresid;
|
|
|
|
F := (SSreg / NumDepVars) / (SSresid / DFresid);
|
|
|
|
Probability := ProbF(F, NumDepVars, DFresid);
|
|
|
|
|
|
|
|
for i := 0 to mx do
|
|
|
|
begin
|
|
|
|
CoeffStdErrors[i] := StdErrorEstimate * sqrt(inv_XT_X[i, i]);
|
|
|
|
t[i] := Coeffs[i] / CoeffStdErrors[i];
|
|
|
|
p[i] := ProbT(t[i], DFresid);
|
|
|
|
end;
|
|
|
|
end;
|
|
|
|
|
|
|
|
Result := regOK;
|
2020-10-14 14:28:59 +00:00
|
|
|
end;
|
|
|
|
|
|
|
|
|
2020-10-17 22:42:41 +00:00
|
|
|
{ Writes the ANOVA of the regression to a "report". }
|
|
|
|
procedure TMultipleRegressionResults.WriteANOVAReport(AReport: TStrings);
|
|
|
|
var
|
|
|
|
DFreg, DFresid, DFtotal: Integer;
|
|
|
|
begin
|
|
|
|
DFreg := NumDepVars;
|
|
|
|
DFtotal := NumCases - 1;
|
|
|
|
DFresid := DFtotal - DFreg;
|
|
|
|
AReport.Add('ANOVA');
|
|
|
|
AReport.Add('------------------------------------------------------------------------------');
|
|
|
|
AReport.Add('SOURCE DF Sum of Squares Mean Square F Probability');
|
|
|
|
AReport.Add('---------- --- ---------------- ---------------- ------------ -----------');
|
|
|
|
AReport.Add('Regression %3d %16.3f %16.3f %12.4f %10.4f', [DFreg, SSreg, SSreg/DFreg, F, Probability]);
|
|
|
|
AReport.Add('Residual %3d %16.3f %16.3f', [DFresid, SSresid, SSresid/DFresid]);
|
|
|
|
AReport.Add('Total %3d %16.3f', [DFtotal, SStotal]);
|
|
|
|
AReport.Add('------------------------------------------------------------------------------');
|
|
|
|
AReport.Add('');
|
|
|
|
AReport.Add('R2: %10.4f', [R2]);
|
|
|
|
AReport.Add('Adjusted R2: %10.4f', [AdjR2]);
|
|
|
|
AReport.Add('F: %10.2f (with %d and %d degrees of freedom)', [F, DFreg, DFresid]);
|
|
|
|
AReport.Add('Probability > F: %10.4f', [Probability]);
|
|
|
|
AReport.Add('Standard Error of Estimate: %10.2f', [StdErrorEstimate]);
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
{ Writes the determined coefficients and their statistics to a "report".
|
|
|
|
The names of the independent variables are needed in the array AVarNames. }
|
|
|
|
procedure TMultipleRegressionResults.WriteCoeffsReport(AReport: TStrings;
|
|
|
|
AVarNames: TStringArray);
|
|
|
|
var
|
|
|
|
varName: String;
|
|
|
|
i, first, last: Integer;
|
|
|
|
begin
|
|
|
|
AReport.Add('COEFFICIENTS');
|
|
|
|
AReport.Add('------------------------------------------------------------------------------');
|
|
|
|
AReport.Add(' Variable Unstandardized Std.Error Standardized t p ');
|
|
|
|
AReport.Add('------------ -------------- ------------ ------------ --------- ---------');
|
|
|
|
|
|
|
|
first := 0;
|
|
|
|
last := High(Coeffs);
|
|
|
|
for i := first to last do
|
|
|
|
begin
|
|
|
|
if i = last then
|
|
|
|
varName := '(Intercept)'
|
|
|
|
else
|
|
|
|
if i <= High(AVarNames) then
|
|
|
|
varName := AVarNames[i]
|
|
|
|
else
|
|
|
|
varName := Format('VAR.%d', [i+1]);
|
|
|
|
|
|
|
|
AReport.Add('%12s %14.3f %12.3f %12.3f %9.4f %9.4f',
|
|
|
|
[varName, Coeffs[i], CoeffStdErrors[i], Beta[i], t[i], p[i]]);
|
|
|
|
end;
|
|
|
|
AReport.Add('------------------------------------------------------------------------------');
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
{ Writes the correlation matrix to a "report".
|
|
|
|
The names of the independent variables are needed in the array AVarNames. }
|
|
|
|
procedure TMultipleRegressionResults.WriteCorrelationReport(AReport: TStrings;
|
|
|
|
AVarNames: TStringArray);
|
|
|
|
begin
|
|
|
|
WriteMatrixReport(AReport, Correlations, 'CORRELATION MATRIX', AVarNames);
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
procedure TMultipleRegressionResults.WriteMatrixReport(AReport: TStrings;
|
|
|
|
AMatrix: TDblMatrix; ATitle: String; AVarNames: TStringArray);
|
|
|
|
|
|
|
|
function GetVarName(i: Integer): String;
|
|
|
|
begin
|
|
|
|
if i < High(AVarNames) then
|
|
|
|
Result := AVarNames[i]
|
|
|
|
else
|
|
|
|
Result := Format('VAR.%d', [i+1]);
|
|
|
|
end;
|
|
|
|
|
|
|
|
const
|
|
|
|
SPACE = ' ';
|
|
|
|
var
|
|
|
|
i, j, first, last: Integer;
|
|
|
|
s: String;
|
|
|
|
begin
|
|
|
|
first := 0;
|
|
|
|
last := High(AMatrix);
|
|
|
|
|
|
|
|
AReport.Add(ATitle);
|
|
|
|
|
|
|
|
s := DupeString(' ', 12);
|
|
|
|
for j := first to last do
|
|
|
|
s := s + SPACE + Format('%12s', [GetVarName(j)]);
|
|
|
|
AReport.Add(DupeString('-', Length(s)));
|
|
|
|
AReport.Add(s);
|
|
|
|
|
|
|
|
s := DupeString(' ', 12);
|
|
|
|
for j := first to last do
|
|
|
|
s := s + SPACE + DupeString('-', 12);
|
|
|
|
AReport.Add(s);
|
|
|
|
|
|
|
|
for i := first to last do
|
|
|
|
begin
|
|
|
|
s := Format('%12s', [GetVarName(i)]);
|
|
|
|
for j := first to last do
|
|
|
|
s := s + SPACE + Format('%12.3f', [AMatrix[i, j]]);
|
|
|
|
AReport.Add(s);
|
|
|
|
end;
|
|
|
|
AReport.Add(DupeString('-', Length(s)));
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
|
|
{ Writes the variance/covariance matrix to a "report".
|
|
|
|
The names of the independent variables are needed in the array AVarNames. }
|
|
|
|
procedure TMultipleRegressionResults.WriteVarCovarReport(AReport: TStrings;
|
|
|
|
AVarNames: TStringArray);
|
|
|
|
begin
|
|
|
|
WriteMatrixReport(AReport, VarCovar, 'VARIANCE-COVARIANCE MATRIX', AVarNames);
|
|
|
|
end;
|
|
|
|
|
2020-10-14 14:28:59 +00:00
|
|
|
|
|
|
|
end.
|
|
|
|
|