LazStats: Fix usage of invalid rows in WLSUnit. Cleanup.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7781 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-10-18 13:52:00 +00:00
parent 791746e7ef
commit dfffbcf6f8
4 changed files with 175 additions and 257 deletions

View File

@ -57,9 +57,11 @@ type
ResidualsRegReportFrame: TReportFrame; ResidualsRegReportFrame: TReportFrame;
WLSReportFrame: TReportFrame; WLSReportFrame: TReportFrame;
procedure AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String); procedure AddVariable(AVarName: String; AData: DblDyneVec;
ANumFormat: String; const ABadRows: IntDyneVec);
procedure AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec); procedure AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec;
const ABadRows: IntDyneVec);
procedure CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec; procedure CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec;
out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec); out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec);
@ -76,19 +78,20 @@ type
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String); const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
procedure Predict(const xData: DblDyneMat; const yData: DblDyneVec; procedure Predict(const xData: DblDyneMat; const yData: DblDyneVec;
ARegressionResults: TMultipleRegressionResults); const ABadRows: IntDyneVec; ARegressionResults: TMultipleRegressionResults);
function PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer; function PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out AWeightCol: Integer; out ARowLabels: StrDyneVec; out AWeightCol: Integer; out ARowLabels: StrDyneVec;
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean; out xValues: DblDyneMat; out yValues: DblDyneVec; out AWeights: DblDyneVec;
out ABadRows: IntDyneVec): Boolean;
function Process_OLSRegression(AIndepCols: IntDyneVec; ADepCol: Integer; function Process_OLSRegression(AIndepCols: IntDyneVec; ADepCol: Integer;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat; const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec): Boolean; const yValues: DblDyneVec; const ABadRows: IntDyneVec): Boolean;
function Process_SquaredResidualsRegression(AIndepCols: IntDyneVec; function Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat; const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
out AWeights: DblDyneVec): Boolean; out AWeights: DblDyneVec; const ABadRows: IntDyneVec): Boolean;
function Process_WeightedRegression(AIndepCols: IntDyneVec; function Process_WeightedRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat; const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
@ -173,10 +176,27 @@ end;
{ Adds a new variable names AColTitle after the last grid column, { Adds a new variable names AColTitle after the last grid column,
and writes the specified data to the grid (in the specified number format). } and writes the specified data to the grid (in the specified number format).
procedure TWLSFrm.AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String); Rows mentioned in ABadRows must be omitted because they are not contained in
AData. }
procedure TWLSFrm.AddVariable(AVarName: String; AData: DblDyneVec;
ANumFormat: String; const ABadRows: IntDyneVec);
function IsBadRow(ARow: Integer): Boolean;
var
j: Integer;
begin
for j := 0 to High(ABadRows) do
if ARow = ABadRows[j] then
begin
Result := true;
exit;
end;
Result := false;
end;
var var
i, colIndex: Integer; i, j, colIndex, row: Integer;
begin begin
colIndex := GetVariableIndex(OS3MainFrm.DataGrid, AVarname); colIndex := GetVariableIndex(OS3MainFrm.DataGrid, AVarname);
if colIndex = -1 then if colIndex = -1 then
@ -188,17 +208,25 @@ begin
OS3MainFrm.DataGrid.Cells[colIndex, 0] := AVarName; OS3MainFrm.DataGrid.Cells[colIndex, 0] := AVarName;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables); OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
end; end;
row := 1;
for i := 0 to High(AData) do for i := 0 to High(AData) do
OS3MainFrm.DataGrid.Cells[colIndex, i+1] := Format(ANumFormat, [AData[i]]); begin
while IsBadRow(row) do inc(row);
if row >= OS3MainFrm.DataGrid.RowCount then
raise Exception.Create('Bad row error.');
OS3MainFrm.DataGrid.Cells[colIndex, row] := Format(ANumFormat, [AData[i]]);
inc(row);
end;
end; end;
{ Calculate predicted squared residuals and save recipricols to grid as weights } { Calculate predicted squared residuals and save recipricols to grid as weights }
procedure TWLSFrm.AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec); procedure TWLSFrm.AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec;
const ABadRows: IntDyneVec);
begin begin
// Create new variables and add to grid // Create new variables and add to grid
AddVariable('Pred SqrResid', ASqrPredictedResiduals, '%.3f'); AddVariable('Pred SqrResid', ASqrPredictedResiduals, '%.3f', ABadRows);
AddVariable('WEIGHTS', AWeights, '%.3f'); AddVariable('WEIGHTS', AWeights, '%.3f', ABadRows);
end; end;
@ -219,7 +247,7 @@ begin
sum := 0; sum := 0;
for i := 0 to n-1 do for i := 0 to n-1 do
begin begin
ASquaredPredictedResiduals[i] := ACoeffs[m]; // intercept value ASquaredPredictedResiduals[i] := ACoeffs[m]; // intercept coefficient
for j := 0 to m-1 do for j := 0 to m-1 do
ASquaredPredictedResiduals[i] += abs(xValues[i, j] * ACoeffs[j]); ASquaredPredictedResiduals[i] += abs(xValues[i, j] * ACoeffs[j]);
if ASquaredPredictedResiduals[i] <> 0 then if ASquaredPredictedResiduals[i] <> 0 then
@ -228,9 +256,6 @@ begin
AWeights[i] := 0; AWeights[i] := 0;
sum := sum + AWeights[i]; sum := sum + AWeights[i];
end; end;
// Normalize weights to 1.0
AWeights := AWeights * (1.0 / sum);
end; end;
@ -245,26 +270,12 @@ begin
); );
end; end;
procedure TWLSFrm.Compute; procedure TWLSFrm.Compute;
var var
i, j, noIndep, NCases, pos, col: integer;
X, weight: double;
Means: DblDyneVec = nil;
Variances: DblDyneVec = nil;
StdDevs: DblDyneVec = nil;
BWeights: DblDyneVec = nil;
BetaWeights: DblDyneVec = nil;
BStdErrs: DblDyneVec = nil;
BtTests: DblDyneVec = nil;
tProbs: DblDyneVec = nil;
lReport: TStrings;
StdErrEst: Double = 0.0;
R2: Double = 0.0;
errorcode: Boolean = false;
PrintDesc: boolean = true;
indepCols: IntDyneVec = nil; indepCols: IntDyneVec = nil;
rowLabels: StrDyneVec = nil; rowLabels: StrDyneVec = nil;
badRows: IntDyneVec = nil;
weights: DblDyneVec = nil; weights: DblDyneVec = nil;
xValues: DblDyneMat = nil; xValues: DblDyneMat = nil;
yValues: DblDyneVec = nil; yValues: DblDyneVec = nil;
@ -272,164 +283,25 @@ var
weightCol: Integer = -1; weightCol: Integer = -1;
useOrigin: Boolean; useOrigin: Boolean;
begin begin
SetLength(Means, NoVariables + 2); if not PrepareData(indepCols, depCol, weightCol, RowLabels, xValues, yValues, weights, badRows) then
SetLength(Variances, NoVariables + 2);
SetLength(StdDevs, NoVariables + 2);
SetLength(BWeights, NoVariables + 2); // do not remove!
SetLength(BetaWeights, NoVariables + 2);
SetLength(BStdErrs, NoVariables + 2);
SetLength(Bttests, NoVariables + 2);
SetLength(tprobs, NoVariables + 2);
lReport := TStringList.Create;
try
NCases := NoCases;
// Get column indexes and do some validation checks.
if not PrepareData(indepCols, depCol, weightCol, RowLabels, xValues, yValues) then
exit; exit;
// Do the OLS regression // Do the OLS regression
if not Process_OLSRegression(indepCols, depCol, RowLabels, xValues, yValues) then if not Process_OLSRegression(indepCols, depCol, RowLabels, xValues, yValues, badRows) then
exit; exit;
// Regress the squared residuals on the predictors // Regress the squared residuals on the predictors
ResidualsRegPage.TabVisible := WeightChk.Checked;
if WeightChk.Checked then if WeightChk.Checked then
begin begin
if not Process_SquaredResidualsRegression(indepCols, RowLabels, xValues, weights) then if not Process_SquaredResidualsRegression(indepCols, RowLabels, xValues, weights, badRows) then
exit; exit;
useOrigin := OriginChk.Checked; useOrigin := OriginChk.Checked;
end else end else
begin
// Read the weights from the user column
weights := CollectVecValues(OS3MainFrm.DataGrid, weightCol, indepCols);
useOrigin := Origin2Chk.Checked; useOrigin := Origin2Chk.Checked;
end;
// Do the weighted regression, finally // Do the weighted regression, finally
Process_WeightedRegression(indepCols, RowLabels, xValues, yValues, weights, useOrigin); Process_WeightedRegression(indepCols, RowLabels, xValues, yValues, weights, useOrigin);
exit;
if WeightChk.Checked then
begin
// Weight variables and do OLS regression on weighted variables
// DepCol := olddepcol;
IndepCols[Noindep] := DepCol;
for i := 1 to NoCases do
begin
weight := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[NoVariables,i]));
for j := 0 to Noindep do
begin
pos := IndepCols[j];
X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos,i]));
X := X * weight;
OS3MainFrm.DataGrid.Cells[pos, i] := FloatToStr(X); // wp: DON'T OVERWRITE GRID CELLS
end;
end;
// get means of variables and subtract from the values
if OriginChk.Checked then
begin
for j := 0 to NoIndep do
begin
Means[j] := 0.0;
NCases := 0;
pos := IndepCols[j];
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
Means[j] := Means[j] + StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos,i]));
NCases := NCases + 1;
end;
end;
Means[j] := Means[j] / NCases;
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos,i]));
X := X - Means[j];
OS3MainFrm.DataGrid.Cells[pos,i] := FloatToStr(X); // wp: DON'T OVERWRITE GRID DATA!
end;
end; // next i
end; // next j
end; // if origin checked
lReport.Clear;
lReport.Add('WEIGHTED LEAST SQUARES (WLS) REGRESSION RESULTS');
lReport.Add('');
MReg(Noindep, IndepCols, DepCol, RowLabels, Means, Variances, StdDevs,
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
NCases, errorcode, PrintDesc, lReport);
WLSReportFrame.DisplayReport(lReport);
lReport.Clear;
end; // if weightschk checked
// use the weights entered by the user
if UserWeightsChk.Checked then
begin
// Weight variables and do OLS regression on weighted variables
// depCol := olddepcol;
indepCols[Noindep] := depCol; // wp: CALCULATION SHOULD NORMALIZE USER WEIGHTS HERE !!!
for i := 1 to NoCases do
begin
weight := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[weightCol, i]));
for j := 0 to Noindep do
begin
pos := indepCols[j];
X := StrToFloat(OS3MainFrm.DataGrid.Cells[pos,i]);
X := X * weight;
OS3MainFrm.DataGrid.Cells[pos, i] := FloatToStr(X);
end;
end;
if Origin2Chk.Checked then // get means of variables and subtract from the values
begin
for j := 0 to Noindep do
begin
Means[j] := 0.0;
NCases := 0;
pos := IndepCols[j];
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
Means[j] := Means[j] + StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos, i]));
NCases := NCases + 1;
end;
end;
Means[j] := Means[j] / NCases;
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
X := StrToFloat(OS3MainFrm.DataGrid.Cells[pos,i]);
X := X - Means[j];
OS3MainFrm.DataGrid.Cells[pos,i] := FloatToStr(X); // wp: DON'T OVERWRITE GRID DATA!
end;
end; // next i
end; // next j
end; // if origin checked
lReport.Clear;
lReport.Add('WEIGHTED LEAST SQUARES (WLS) REGRESSION RESULTS');
lReport.Add('');
MReg(Noindep, IndepCols, DepCol, RowLabels, Means, Variances, StdDevs,
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
NCases, errorcode, PrintDesc, lReport);
WLSReportFrame.DisplayReport(lReport);
lReport.Clear;
end;
finally
lReport.Free;
end;
end; end;
@ -584,6 +456,8 @@ begin
end; end;
{ We will plot the selected vector of the independent values vertically,
and the dependent values horizontally. }
procedure TWLSFrm.PlotSquaredResiduals(AIndepCols: IntDyneVec; procedure TWLSFrm.PlotSquaredResiduals(AIndepCols: IntDyneVec;
ADepCol: Integer; const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec); ADepCol: Integer; const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec);
var var
@ -595,9 +469,6 @@ var
xLabel, yLabel: String; xLabel, yLabel: String;
numIndepCols: Integer; numIndepCols: Integer;
begin begin
// We will plot the selected vector of the independent values vertically,
// and the dependent values horizontally.
xCol := ADepCol; xCol := ADepCol;
x := VecCopy(ADepValues); x := VecCopy(ADepValues);
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0]; xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
@ -623,47 +494,7 @@ begin
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel); WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
end; end;
end; end;
(*
var
xCol, yCol: Integer;
xLabel, yLabel: String;
i: Integer;
colNoSelected: IntDyneVec = nil;
xPoints: DblDyneVec = nil;
yPoints: DblDyneVec = nil;
regressionRes: TBivariateRegressionResults;
memo: TMemo;
chartFrame: TChartFrame;
begin
SetLength(colNoSelected, 2);
xCol := ADepCol;
for i := 0 to ANumIndepCols-1 do
begin
// Get values
yCol := AIndepCols[i];
colNoSelected[0] := xCol;
colNoSelected[1] := yCol;
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
yLabel := OS3MainFrm.DataGrid.Cells[yCol, 0];
xPoints := CollectVecValues(OS3MainFrm.DataGrid, xCol, colNoSelected);
yPoints := CollectVecValues(OS3MainFrm.DataGrid, yCol, colNoSelected);
SortOnX(xPoints, yPoints);
// Regression
BivariateRegression(xPoints, yPoints, AConfLevel, regressionRes);
// Create tab with chart and report controls
CreateOrGetChartFrame(yCol, yLabel, memo, chartFrame);
// Plot
PlotXY(chartFrame, xPoints, yPoints, regressionRes, xLabel, yLabel);
// Print the descriptive statistics
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
end;
end;
*)
procedure TWLSFrm.PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec; procedure TWLSFrm.PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec;
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String); const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
@ -730,12 +561,13 @@ begin
end; end;
end; end;
{ Routine obtains predicted raw and standardized scores and their { Routine obtains predicted raw and standardized scores and their
residuals. It is assumed that the dependent variable is last in the residuals. It is assumed that the dependent variable is last in the
list of variable column pointers stored in the ColNoSelected vector. list of variable column pointers stored in the ColNoSelected vector.
Get the z predicted score and its residual } Get the z predicted score and its residual }
procedure TWLSFrm.Predict(const xData: DblDyneMat; const yData: DblDyneVec; procedure TWLSFrm.Predict(const xData: DblDyneMat; const yData: DblDyneVec;
ARegressionResults: TMultipleRegressionResults); const ABadRows: IntDyneVec; ARegressionResults: TMultipleRegressionResults);
var var
means, stddevs, variances: DblDyneVec; means, stddevs, variances: DblDyneVec;
i, j, n, m: Integer; i, j, n, m: Integer;
@ -783,11 +615,11 @@ begin
sqrResid[i] := sqr(rawResid[i]); sqrResid[i] := sqr(rawResid[i]);
end; end;
AddVariable('z Pred', zPred, '%.4f'); AddVariable('z Pred', zPred, '%.4f', ABadRows);
// AddGridColumn('z Resid', zResid, '%.4f'); // AddGridColumn('z Resid', zResid, '%.4f');
AddVariable('Raw Pred', rawPred, '%.3f'); AddVariable('Raw Pred', rawPred, '%.3f', ABadRows);
AddVariable('Raw Resid', rawResid, '%.3f'); AddVariable('Raw Resid', rawResid, '%.3f', ABadRows);
AddVariable('Sqr Resid', sqrResid, '%.3f'); AddVariable('Sqr Resid', sqrResid, '%.3f', ABadRows);
end; end;
@ -802,21 +634,28 @@ end;
- xValues: matrix with all independent values. The columns of the matrix - xValues: matrix with all independent values. The columns of the matrix
correspond to the variables, the row correspond to the cases. correspond to the variables, the row correspond to the cases.
- yValues: vector with the dependent variable values - yValues: vector with the dependent variable values
- ABadRows: indices of rows in which there is at least one invalid value
in the colums specified by AIndepCols. A value is "invalid" when it
is filtered, numeric but empty, or contains the missing value code.
} }
function TWLSFrm.PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer; function TWLSFrm.PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out AWeightCol: Integer; out ARowLabels: StrDyneVec; out AWeightCol: Integer; out ARowLabels: StrDyneVec;
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean; out xValues: DblDyneMat; out yValues: DblDyneVec; out AWeights: DblDyneVec;
out ABadRows: IntDyneVec): Boolean;
var var
i: Integer; i, n: Integer;
msg: String; msg: String;
C: TWinControl; C: TWinControl;
numIndepCols: Integer; numIndepCols: Integer;
cols: IntDyneVec = nil;
begin begin
Result := false; Result := false;
AIndepCols := nil; AIndepCols := nil;
ARowLabels := nil; ARowLabels := nil;
xValues := nil; xValues := nil;
yvalues := nil; yvalues := nil;
AWeights := nil;
ABadRows := nil;
if not Validate(msg, C) then if not Validate(msg, C) then
begin begin
@ -864,8 +703,42 @@ begin
exit; exit;
end; end;
xValues := CollectMatValues(OS3MainFrm.DataGrid, AIndepCols); // Prepare list of all column indices to be loaded: x, y, weights
yValues := CollectVecValues(OS3MainFrm.DataGrid, ADepCol); // ADepCol will follow the x columns immediatey, WeightCol is last.
if AWeightCol > -1 then
begin
SetLength(cols, numIndepCols + 2);
cols[numIndepCols] := ADepCol;
cols[numIndepCols+1] := AWeightCol;
end else
begin
SetLength(cols, NumIndepCols + 1);
cols[numIndepCols] := ADepCol;
end;
for i := 0 to numIndepCols-1 do cols[i] := AIndepCols[i];
// Determine list of indices of rows containing invalid entries.
SetLength(ABadRows, OS3MainFrm.DataGrid.RowCount);
n := 0;
for i := 1 to OS3MainFrm.DataGrid.RowCount-1 do
if not GoodRecord(OS3MainFrm.DataGrid, i, cols) then
begin
ABadRows[n] := i;
inc(n);
end;
SetLength(ABadRows, n);
// Extract data values; take care to omit invalid values in both x and y
xValues := CollectMatValues(OS3MainFrm.DataGrid, cols);
// The y column has index numIndepCols, i.e. follows the x columns.
yValues := MatColVector(xValues, numIndepCols);
MatColDelete(xValues, numIndepCols);
if AWeightCol > -1 then
begin
// The weight column is the last column
AWeights := MatColVector(xValues, High(xValues[0]));
MatColDelete(xValues, High(xValues[0]));
end;
Result := true; Result := true;
end; end;
@ -874,7 +747,8 @@ end;
{ Runs the ordinary least squares regression on the grid data } { Runs the ordinary least squares regression on the grid data }
function TWLSFrm.Process_OLSRegression(AIndepCols: IntDyneVec; function TWLSFrm.Process_OLSRegression(AIndepCols: IntDyneVec;
ADepCol: Integer; const ARowLabels: StrDyneVec; ADepCol: Integer; const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec): Boolean; const xValues: DblDyneMat; const yValues: DblDyneVec;
const ABadRows: IntDyneVec): Boolean;
var var
lReport: TStrings; lReport: TStrings;
regressionRes: TMultipleRegressionResults; regressionRes: TMultipleRegressionResults;
@ -899,7 +773,7 @@ begin
if Result then if Result then
begin begin
Predict(xValues, yValues, regressionRes); Predict(xValues, yValues, ABadRows, regressionRes);
OLSReportFrame.DisplayReport(lReport); OLSReportFrame.DisplayReport(lReport);
end; end;
@ -910,7 +784,8 @@ end;
function TWLSFrm.Process_SquaredResidualsRegression(AIndepCols: IntDyneVec; function TWLSFrm.Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat; out AWeights: DblDyneVec): Boolean; const ARowLabels: StrDyneVec; const xValues: DblDyneMat; out AWeights: DblDyneVec;
const ABadRows: IntDyneVec): Boolean;
var var
lReport: TStrings; lReport: TStrings;
sqrResiduals: DblDyneVec; sqrResiduals: DblDyneVec;
@ -919,7 +794,6 @@ var
i, depCol, numIndepCols: Integer; i, depCol, numIndepCols: Integer;
begin begin
AWeights := nil; AWeights := nil;
ResidualsRegPage.TabVisible := WeightChk.Checked;
if not WeightChk.Checked then if not WeightChk.Checked then
exit; exit;
@ -957,7 +831,7 @@ begin
// Store weights to the grid // Store weights to the grid
if SaveChk.Checked then if SaveChk.Checked then
AddWeightsToGrid(predSqrResiduals, AWeights); AddWeightsToGrid(predSqrResiduals, AWeights, ABadRows);
end; end;
finally finally
@ -974,19 +848,27 @@ var
regressionRes: TMultipleRegressionResults; regressionRes: TMultipleRegressionResults;
lReport: TStrings; lReport: TStrings;
means: DblDyneVec; means: DblDyneVec;
yMean: Double;
begin begin
MatSize(xValues, n, m); MatSize(xValues, n, m);
for i :=0 to n-1 do for i :=0 to n-1 do
begin
for j := 0 to m-1 do for j := 0 to m-1 do
xValues[i, j] := xValues[i, j] * AWeights[i]; xValues[i, j] := xValues[i, j] * AWeights[i];
yValues[i] := yValues[i] * AWeights[i];
end;
if SubtractMeans then if SubtractMeans then
begin begin
means := MatRowMeans(xValues); means := MatColMeans(xValues);
yMean := VecMean(yValues);
for i := 0 to n-1 do for i := 0 to n-1 do
begin
for j := 0 to m-1 do for j := 0 to m-1 do
xValues[i, j] := xValues[i, j] - means[i]; xValues[i, j] := xValues[i, j] - means[j];
yValues[i] := yValues[i] - yMean;
end;
end; end;
lReport := TStringList.Create; lReport := TStringList.Create;
@ -1019,9 +901,8 @@ var
begin begin
err := MultipleRegression(xValues, yValues, CONF_LEVEL, ARegressionResults); err := MultipleRegression(xValues, yValues, CONF_LEVEL, ARegressionResults);
case err of case err of
regOK: ; regOK : ;
regTooFewValues: ErrorMsg('At least two values required for regression.'); regTooFewValues : ErrorMsg('At least two values required for regression.');
regStdDevZero: ErrorMsg('Standard deviation is zero.');
end; end;
Result := (err = regOK); Result := (err = regOK);

View File

@ -75,27 +75,29 @@ end;
{ Extracts the grid values from the columns with indices given by AColIndices { Extracts the grid values from the columns with indices given by AColIndices
and puts them into the columns of the result matrix. and puts them into the columns of the result matrix.
This means: The result matrix contains the variables as columns and the This means: The result matrix contains the variables as columns and the
cases as rows. } cases as rows.
"Bad" records (filtered, empty) are skipped. }
function CollectMatValues(AGrid: TStringGrid; AColIndices: IntDyneVec): DblDyneMat; function CollectMatValues(AGrid: TStringGrid; AColIndices: IntDyneVec): DblDyneMat;
var var
nr, r, c, i, j: Integer; r, c, i, j: Integer;
val: Double; val: Double;
begin begin
SetLength(Result, AGrid.RowCount, Length(AColIndices)); SetLength(Result, AGrid.RowCount, Length(AColIndices));
nr := 0; i := 0;
for r:= 1 to AGrid.RowCount-1 do for r:= 1 to AGrid.RowCount-1 do
begin begin
if not GoodRecord(AGrid, r, AColIndices) then Continue; if not GoodRecord(AGrid, r, AColIndices) then Continue;
i := r - 1;
for j := 0 to High(AColIndices) do for j := 0 to High(AColIndices) do
begin begin
c := AColIndices[j]; c := AColIndices[j];
if TryStrToFloat(trim(AGrid.Cells[c, r]), val) then if TryStrToFloat(trim(AGrid.Cells[c, r]), val) then
Result[i, j] := val; Result[i, j] := val
else
Result[i, j] := NaN;
end; end;
inc(nr); // count the number of rows in the matrix. inc(i);
end; end;
SetLength(Result, nr); SetLength(Result, i);
end; end;

View File

@ -32,6 +32,7 @@ procedure VecSize(A: TDblVector; out n: Integer);
procedure VecMaxMin(const AData: TDblVector; procedure VecMaxMin(const AData: TDblVector;
out AMax, AMin: Double); out AMax, AMin: Double);
function VecMean(const AData: TDblVector): Double;
procedure VecMeanStdDev(const AData: TDblVector; procedure VecMeanStdDev(const AData: TDblVector;
out AMean, AStdDev: Double); out AMean, AStdDev: Double);
procedure VecMeanVarStdDev(const AData: TDblVector; procedure VecMeanVarStdDev(const AData: TDblVector;
@ -57,6 +58,7 @@ operator * (A: TDblMatrix; v: TDblVector): TDblVector;
function MatAppendColVector(A: TDblMatrix; v: TDblVector): TDblMatrix; function MatAppendColVector(A: TDblMatrix; v: TDblVector): TDblMatrix;
procedure MatCheck(A: TDblMatrix); procedure MatCheck(A: TDblMatrix);
procedure MatCheckSquare(A: TDblMatrix; out n: Integer); procedure MatCheckSquare(A: TDblMatrix; out n: Integer);
procedure MatColDelete(A: TDblMatrix; ACol: Integer);
procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector); procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector);
function MatColMeans(A: TDblMatrix): TDblVector; function MatColMeans(A: TDblMatrix): TDblVector;
function MatColVector(A: TDblMatrix; AColIndex: Integer): TDblVector; function MatColVector(A: TDblMatrix; AColIndex: Integer): TDblVector;
@ -227,6 +229,22 @@ begin
end; end;
function VecMean(const AData: TDblVector): Double;
var
i, n: Integer;
begin
Result := 0;
n := Length(AData);
if n > 0 then
begin
for i := 0 to n-1 do
Result := Result + AData[i];
Result := Result / n;
end else
Result := NaN;
end;
procedure VecMeanStdDev(const AData: TDblVector; out AMean, AStdDev: Double); procedure VecMeanStdDev(const AData: TDblVector; out AMean, AStdDev: Double);
var var
variance: Double; variance: Double;
@ -432,6 +450,23 @@ begin
end; end;
procedure MatColDelete(A: TDblMatrix; ACol: Integer);
var
n, m, i, j: Integer;
begin
MatSize(A, n,m);
if (ACol < 0) or (ACol >= m) then
raise EMatrix.Create('MatColDelete: illegal column index.');
for i := 0 to n - 1 do begin
for j := 0 to m - 2 do
if j >= ACol then
A[i, j] := A[i, j+1];
SetLength(A[i], m-1);
end;
end;
procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector); procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector);
var var
n, m, i, j: Integer; n, m, i, j: Integer;

View File

@ -14,7 +14,7 @@ type
ERegression = class(Exception); ERegression = class(Exception);
type type
TRegressionError = (regOK, regTooFewValues, regStdDevZero); TRegressionError = (regOK, regTooFewValues);
TBivariateRegressionResults = record TBivariateRegressionResults = record
public public