You've already forked lazarus-ccr
LazStats: Massive refactoring of LSMRegUnit re-using general regression routines.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7782 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -43,8 +43,10 @@ type
|
||||
CoeffStdErrors: TDblVector; // Standard errors of the coefficients
|
||||
|
||||
Beta: TDblVector; // Standardized coefficients
|
||||
XT_X: TDblMatrix; // Cross-products matrix
|
||||
VarCovar: TDblMatrix; // Variance-covariance matrix
|
||||
Correlations: TDblMatrix; // Correlations matrix
|
||||
InvIndepCorrs: TDblMatrix; // Inverse of the INDEPENDENT correlation matrix
|
||||
|
||||
SStotal, SSreg, SSresid: Double; // Sum of squares
|
||||
NumCases, NumDepVars: Integer; // number of cases, number of dependent variables
|
||||
@ -59,24 +61,29 @@ type
|
||||
Means: TDblVector;
|
||||
Variances: TDblVector;
|
||||
StdDevs: TDblVector;
|
||||
(*
|
||||
|
||||
XMeans: TDblVector; // Means of the x columns
|
||||
XVariances: TDblVector; // Variances of the x columns
|
||||
XStdDevs: TDblVector; // Std Deviations of the x columns
|
||||
*)
|
||||
MeanY: Double; // Mean of y column
|
||||
VarianceY: Double; // Variance of y column
|
||||
StdDevY: Double; // Std Deviation of y column
|
||||
|
||||
YMean: Double; // Mean of y column
|
||||
YVariance: Double; // Variance of y column
|
||||
YStdDev: Double; // Std Deviation of y column
|
||||
|
||||
procedure WriteANOVAReport(AReport: TStrings);
|
||||
procedure WriteCoeffsReport(AReport: TStrings; AVarNames: TStringArray);
|
||||
procedure WriteCorrelationReport(AReport: TStrings; AVarNames: TStringArray);
|
||||
procedure WriteCrossProductsReport(AReport: TStrings; AVarNames: TStringArray);
|
||||
procedure WriteVarCovarReport(AReport: TStrings; AVarNames: TStringArray);
|
||||
end;
|
||||
|
||||
function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
|
||||
AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError;
|
||||
|
||||
procedure PredictMR(const xData: TDblMatrix; const yData: TDblVector;
|
||||
const ARegressionResults: TMultipleRegressionResults;
|
||||
out zPred, zResid, RawPred, RawResid, StdErrPred, Hi95, Lo95: TDblVector);
|
||||
|
||||
|
||||
implementation
|
||||
|
||||
@ -171,6 +178,7 @@ begin
|
||||
|
||||
// Product of transposed augmented with augmented matrix
|
||||
XT_X := XT * X;
|
||||
AResults.XT_X := Matcopy(XT_X);
|
||||
|
||||
// Product of inverse of XT_X with XT_Y yields the coefficients. Intercept is last.
|
||||
inv_XT_X := MatInverse(XT_X);
|
||||
@ -187,24 +195,20 @@ begin
|
||||
AResults.Means[i] := XT_X[i, mx] / nx;
|
||||
AResults.Variances[i] := (XT_X[i, i] - sqr(AResults.Means[i]) * nx) / (nx - 1);
|
||||
AResults.StdDevs[i] := sqrt(AResults.Variances[i]);
|
||||
{
|
||||
if AResults.StdDevs[i] = 0.0 then
|
||||
begin
|
||||
Result := regStdDevZero;
|
||||
exit;
|
||||
end;
|
||||
}
|
||||
end;
|
||||
|
||||
// Variance-covariance matrix
|
||||
SetLength(AResults.VarCovar, mx, mx);
|
||||
SetLength(AResults.Correlations, mx, mx);
|
||||
for i := 0 to mx-1 do
|
||||
for j := 0 to mx-1 do
|
||||
begin
|
||||
AResults.VarCovar[i, j] := (XT_X[i, j] - XT_X[i, mx] * XT_X[j, mx] / nx) / (nx - 1);
|
||||
AResults.Correlations[i, j] := AResults.VarCovar[i, j] / (AResults.StdDevs[i] * AResults.StdDevs[j]);
|
||||
end;
|
||||
with AResults do
|
||||
begin
|
||||
SetLength(VarCovar, mx, mx);
|
||||
SetLength(Correlations, mx, mx);
|
||||
for i := 0 to mx-1 do
|
||||
for j := 0 to mx-1 do
|
||||
begin
|
||||
VarCovar[i, j] := (XT_X[i, j] - XT_X[i, mx] * XT_X[j, mx] / nx) / (nx - 1);
|
||||
Correlations[i, j] := VarCovar[i, j] / (StdDevs[i] * StdDevs[j]);
|
||||
end;
|
||||
end;
|
||||
|
||||
with AResults do
|
||||
begin
|
||||
@ -213,24 +217,17 @@ begin
|
||||
DFresid := NumCases - NumDepVars - 1;
|
||||
|
||||
// Calculate column means of xData
|
||||
// MatColMeanVarStdDev(X, XMeans, XVariances, XStdDevs);
|
||||
MatColMeanVarStdDev(xData, xMeans, xVariances, xStdDevs);
|
||||
|
||||
// Basic stats of y data column
|
||||
VecSumSS(yData, SY, SSY);
|
||||
MeanY := SY / NumCases;
|
||||
SStotal := SSY - sqr(MeanY) * NumCases; // needed for ANOVA
|
||||
VarianceY := SStotal / (NumCases - 1);
|
||||
StdDevY := sqrt(VarianceY);
|
||||
{
|
||||
if StdDevY = 0 then
|
||||
begin
|
||||
Result := regStdDevZero;
|
||||
exit;
|
||||
end;
|
||||
}
|
||||
yMean := SY / NumCases;
|
||||
SStotal := SSY - sqr(yMean) * NumCases; // needed for ANOVA
|
||||
yVariance := SStotal / (NumCases - 1);
|
||||
yStdDev := sqrt(yVariance);
|
||||
|
||||
// Standardized coefficients
|
||||
Beta := VecMultiply(Coeffs, StdDevs) * (1.0/StdDevY);
|
||||
Beta := VecMultiply(Coeffs, StdDevs) * (1.0/yStdDev);
|
||||
|
||||
// Get standard errors, squared multiple correlation, tests of significance
|
||||
SetLength(CoeffStdErrors, mx+1);
|
||||
@ -251,6 +248,8 @@ begin
|
||||
t[i] := Coeffs[i] / CoeffStdErrors[i];
|
||||
p[i] := ProbT(t[i], DFresid);
|
||||
end;
|
||||
|
||||
InvIndepCorrs := MatInverse(SubMatrix(Correlations, 0, 0, mx-1, mx-1));
|
||||
end;
|
||||
|
||||
Result := regOK;
|
||||
@ -266,13 +265,13 @@ begin
|
||||
DFtotal := NumCases - 1;
|
||||
DFresid := DFtotal - DFreg;
|
||||
AReport.Add('ANOVA');
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
AReport.Add('SOURCE DF Sum of Squares Mean Square F Probability');
|
||||
AReport.Add('---------- --- ---------------- ---------------- ------------ -----------');
|
||||
AReport.Add('Regression %3d %16.3f %16.3f %12.4f %10.4f', [DFreg, SSreg, SSreg/DFreg, F, Probability]);
|
||||
AReport.Add('Residual %3d %16.3f %16.3f', [DFresid, SSresid, SSresid/DFresid]);
|
||||
AReport.Add('Total %3d %16.3f', [DFtotal, SStotal]);
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
AReport.Add('--------------------------------------------------------------------------');
|
||||
AReport.Add('SOURCE DF Sum of Squares Mean Square F Probability');
|
||||
AReport.Add('---------- --- ---------------- ---------------- -------- -----------');
|
||||
AReport.Add('Regression %4d %16.3f %16.3f %8.3f %10.3f', [DFreg, SSreg, SSreg/DFreg, F, Probability]);
|
||||
AReport.Add('Residual %4d %16.3f %16.3f', [DFresid, SSresid, SSresid/DFresid]);
|
||||
AReport.Add('Total %4d %16.3f', [DFtotal, SStotal]);
|
||||
AReport.Add('--------------------------------------------------------------------------');
|
||||
AReport.Add('');
|
||||
AReport.Add('R2: %10.4f', [R2]);
|
||||
AReport.Add('Adjusted R2: %10.4f', [AdjR2]);
|
||||
@ -291,9 +290,9 @@ var
|
||||
i, first, last: Integer;
|
||||
begin
|
||||
AReport.Add('COEFFICIENTS');
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
AReport.Add(' Variable Unstandardized Std.Error Standardized t p ');
|
||||
AReport.Add('------------ -------------- ------------ ------------ --------- ---------');
|
||||
AReport.Add('--------------------------------------------------------------------------');
|
||||
AReport.Add(' Variable Unstandardized Std.Error Standardized t p ');
|
||||
AReport.Add('------------ -------------- ------------ ------------ ------- -------');
|
||||
|
||||
first := 0;
|
||||
last := High(Coeffs);
|
||||
@ -307,10 +306,10 @@ begin
|
||||
else
|
||||
varName := Format('VAR.%d', [i+1]);
|
||||
|
||||
AReport.Add('%12s %14.3f %12.3f %12.3f %9.4f %9.4f',
|
||||
AReport.Add('%12s %14.3f %12.3f %12.3f %7.3f %7.3f',
|
||||
[varName, Coeffs[i], CoeffStdErrors[i], Beta[i], t[i], p[i]]);
|
||||
end;
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
AReport.Add('--------------------------------------------------------------------------');
|
||||
end;
|
||||
|
||||
|
||||
@ -323,12 +322,26 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
{ Writes the cross-product matrix to a "report"
|
||||
The names of the independent variables are neede in the array AVarNames. }
|
||||
procedure TMultipleRegressionResults.WriteCrossProductsReport(AReport: TStrings;
|
||||
AVarNames: TStringArray);
|
||||
var
|
||||
saved: String;
|
||||
begin
|
||||
saved := AVarNames[High(AVarNames)];
|
||||
AVarNames[High(AVarNames)] := '(Intercept)';
|
||||
WriteMatrixReport(AReport, XT_X, 'CROSS-PRODUCT MATRIX', AVarNames);
|
||||
AVarNames[High(AVarNames)] := saved;
|
||||
end;
|
||||
|
||||
|
||||
procedure TMultipleRegressionResults.WriteMatrixReport(AReport: TStrings;
|
||||
AMatrix: TDblMatrix; ATitle: String; AVarNames: TStringArray);
|
||||
|
||||
function GetVarName(i: Integer): String;
|
||||
begin
|
||||
if i < High(AVarNames) then
|
||||
if i <= High(AVarNames) then
|
||||
Result := AVarNames[i]
|
||||
else
|
||||
Result := Format('VAR.%d', [i+1]);
|
||||
@ -376,5 +389,90 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
procedure PredictMR(const xData: TDblMatrix; const yData: TDblVector;
|
||||
const ARegressionResults: TMultipleRegressionResults;
|
||||
out zPred, zResid, RawPred, RawResid, StdErrPred, Hi95, Lo95: TDblVector);
|
||||
var
|
||||
i, j, k, n, m: Integer;
|
||||
x, x1, x2, y: Double;
|
||||
term1, term2, t: Double;
|
||||
begin
|
||||
zPred := nil; // to silence the compiler
|
||||
zResid := nil;
|
||||
RawPred := nil;
|
||||
RawResid := nil;
|
||||
StdErrPred := nil;
|
||||
Hi95 := nil;
|
||||
Lo95 := nil;
|
||||
|
||||
MatSize(xData, n,m);
|
||||
|
||||
// z predicted and zResidual
|
||||
SetLength(zPred, n);
|
||||
SetLength(zResid, n);
|
||||
for i := 0 to n-1 do
|
||||
begin
|
||||
zPred[i] := 0;
|
||||
for j := 0 to m-1 do
|
||||
begin
|
||||
x := (xData[i, j] - ARegressionResults.xMeans[j]) / ARegressionResults.xStdDevs[j];
|
||||
zPred[i] := zPred[i] + ARegressionResults.Beta[j] * x;
|
||||
end;
|
||||
y := (yData[i] - ARegressionResults.yMean) / ARegressionResults.yStdDev;
|
||||
zResid[i] := y - zPred[i];
|
||||
end;
|
||||
|
||||
// Raw predicted and raw residuals
|
||||
SetLength(RawPred, n);
|
||||
SetLength(RawResid, n);
|
||||
for i := 0 to n-1 do
|
||||
begin
|
||||
RawPred[i] := ARegressionResults.Coeffs[m]; // intercept
|
||||
for j := 0 to m-1 do
|
||||
RawPred[i] := RawPred[i] + ARegressionResults.Coeffs[j] * xData[i, j];
|
||||
RawResid[i] := yData[i] - RawPred[i];
|
||||
end;
|
||||
|
||||
// Calculate confidence interval for raw predicted score
|
||||
SetLength(StdErrPred, n);
|
||||
SetLength(Hi95, n);
|
||||
SetLength(Lo95, n);
|
||||
|
||||
for i := 0 to n-1 do
|
||||
begin
|
||||
// Get term1 of the std. err. prediction
|
||||
term1 := 0.0;
|
||||
for j := 0 to m-1 do
|
||||
begin
|
||||
with ARegressionResults do
|
||||
begin
|
||||
x := (xData[i, j] - xMeans[j]) / xStdDevs[j];
|
||||
term1 := term1 + x * x * InvIndepCorrs[j, j];;
|
||||
end;
|
||||
end;
|
||||
|
||||
// Get term2 of the std err. of prediction
|
||||
term2 := 0.0;
|
||||
for j := 0 to m-1 do
|
||||
begin
|
||||
for k := j + 1 to m-1 do
|
||||
begin
|
||||
with ARegressionResults do
|
||||
begin
|
||||
x1 := (xData[i, j] - xMeans[j]) / xStdDevs[j];
|
||||
x2 := (xData[i, k] - xMeans[k]) / xStdDevs[k];
|
||||
term2 := term2 + x1 * x2 * InvIndepCorrs[j, k];
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
term2 := 2.0 * Term2;
|
||||
stdErrPred[i] := sqrt(n + 1 + term1 + term2);
|
||||
stdErrPred[i] := (ARegressionResults.StdErrorEstimate / sqrt(n)) * stdErrPred[i];
|
||||
t := InverseT(0.975, n - m - 1);
|
||||
Lo95[i] := RawPred[i] - t*StdErrPred[i];
|
||||
Hi95[i] := RawPred[i] + t*StdErrPred[i];
|
||||
end;
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
|
Reference in New Issue
Block a user