LazStats: Massive refactoring of WLSUnit avoiding overwriting of grid data. Fix crash when accessing non-existing cells in DictionaryFrm.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7780 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-10-17 22:42:41 +00:00
parent f25070ba4f
commit 791746e7ef
13 changed files with 964 additions and 322 deletions

View File

@ -47,7 +47,6 @@
<Linking>
<Debugging>
<DebugInfoType Value="dsDwarf2Set"/>
<UseHeaptrc Value="True"/>
<TrashVariables Value="True"/>
</Debugging>
</Linking>

View File

@ -146,9 +146,9 @@ begin
FColIndex := AColIndex;
if Length(FColsSelected) = 0 then
FValues := CollectValues(FDataGrid, AColIndex, FColsSelected)
FValues := CollectVecValues(FDataGrid, AColIndex, FColsSelected)
else
FValues := CollectValues(FDataGrid, AColIndex);
FValues := CollectVecValues(FDataGrid, AColIndex);
FNumCases := Length(FValues);
SortOnX(FValues);

View File

@ -267,8 +267,8 @@ begin
ColNoSelected[0] := xCol;
ColNoSelected[1] := yCol;
XData := CollectValues(ADataGrid, xCol, colNoSelected);
YData := CollectValues(ADataGrid, ycol, colNoSelected);
XData := CollectVecValues(ADataGrid, xCol, colNoSelected);
YData := CollectVecValues(ADataGrid, ycol, colNoSelected);
N := Length(XData);
if N < 3 then
begin

View File

@ -361,8 +361,8 @@ begin
ColNoSelected[0] := xCol;
ColNoSelected[1] := yCol;
XData := CollectValues(ADataGrid, xCol, colNoSelected);
YData := CollectValues(ADataGrid, ycol, colNoSelected);
XData := CollectVecValues(ADataGrid, xCol, colNoSelected);
YData := CollectVecValues(ADataGrid, ycol, colNoSelected);
N := Length(XData);
if N < 3 then
begin

View File

@ -128,7 +128,7 @@ begin
lReport.Add('');
// Stores values of the variable
values := CollectValues(OS3MainFrm.DataGrid, k);
values := CollectVecValues(OS3MainFrm.DataGrid, k);
nCases := Length(values);
SetLength(valueString, nCases);

View File

@ -119,10 +119,10 @@ begin
end;
// Extract x and y values
xValues := CollectValues(OS3MainFrm.DataGrid, xCol, selected);
xValues := CollectVecValues(OS3MainFrm.DataGrid, xCol, selected);
SetLength(yValues, nY);
for i := 0 to nY-1 do
yValues[i] := CollectValues(OS3MainFrm.DataGrid, selected[i], selected);
yValues[i] := CollectVecValues(OS3MainFrm.DataGrid, selected[i], selected);
// Make sure that all y columns have the same length
N := Length(yValues[0]);

View File

@ -42,26 +42,24 @@ inherited WLSFrm: TWLSFrm
AnchorSideLeft.Control = ParamsPanel
AnchorSideBottom.Control = ButtonBevel
Left = 0
Height = 147
Top = 265
Height = 131
Top = 281
Width = 266
Anchors = [akLeft, akBottom]
AutoSize = True
Caption = 'Options'
ClientHeight = 127
ClientHeight = 111
ClientWidth = 262
TabOrder = 10
object SaveChk: TCheckBox
AnchorSideLeft.Control = OptionsGroup
AnchorSideTop.Control = OptionsBevel
AnchorSideLeft.Control = WeightChk
AnchorSideTop.Control = OriginChk
AnchorSideTop.Side = asrBottom
Left = 12
Left = 40
Height = 19
Top = 100
Top = 42
Width = 180
BorderSpacing.Left = 12
BorderSpacing.Top = 8
BorderSpacing.Bottom = 8
BorderSpacing.Left = 24
Caption = 'Save estimated weights in grid'
TabOrder = 0
end
@ -83,8 +81,9 @@ inherited WLSFrm: TWLSFrm
AnchorSideTop.Side = asrBottom
Left = 40
Height = 19
Top = 65
Top = 84
Width = 119
BorderSpacing.Bottom = 8
Caption = 'Through the origin'
TabOrder = 2
end
@ -104,32 +103,17 @@ inherited WLSFrm: TWLSFrm
end
object UserWeightsChk: TRadioButton
AnchorSideLeft.Control = WeightChk
AnchorSideTop.Control = OriginChk
AnchorSideTop.Control = SaveChk
AnchorSideTop.Side = asrBottom
Left = 16
Height = 19
Top = 46
Top = 65
Width = 156
BorderSpacing.Top = 4
Caption = 'Use weights from column'
OnChange = UserWeightsChkChange
TabOrder = 4
end
object OptionsBevel: TBevel
AnchorSideLeft.Control = OptionsGroup
AnchorSideTop.Control = Origin2Chk
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = OptionsGroup
AnchorSideRight.Side = asrBottom
Left = 12
Height = 8
Top = 84
Width = 238
Anchors = [akTop, akLeft, akRight]
BorderSpacing.Left = 12
BorderSpacing.Right = 12
Shape = bsBottomLine
end
end
object Label1: TLabel[6]
AnchorSideLeft.Control = ParamsPanel
@ -169,7 +153,7 @@ inherited WLSFrm: TWLSFrm
AnchorSideBottom.Control = WeightVarEdit
Left = 161
Height = 15
Top = 205
Top = 221
Width = 77
Anchors = [akLeft, akBottom]
BorderSpacing.Bottom = 2
@ -184,7 +168,7 @@ inherited WLSFrm: TWLSFrm
AnchorSideRight.Control = DepInBtn
AnchorSideBottom.Control = OptionsGroup
Left = 0
Height = 240
Height = 256
Top = 17
Width = 119
Anchors = [akTop, akLeft, akRight, akBottom]
@ -266,7 +250,7 @@ inherited WLSFrm: TWLSFrm
AnchorSideBottom.Control = WeightOutBtn
Left = 127
Height = 26
Top = 201
Top = 217
Width = 26
Anchors = [akLeft, akBottom]
BorderSpacing.Bottom = 4
@ -283,7 +267,7 @@ inherited WLSFrm: TWLSFrm
AnchorSideBottom.Side = asrBottom
Left = 127
Height = 26
Top = 231
Top = 247
Width = 26
Anchors = [akLeft, akBottom]
Images = MainDataModule.ImageList
@ -320,7 +304,7 @@ inherited WLSFrm: TWLSFrm
AnchorSideRight.Side = asrBottom
AnchorSideBottom.Control = WeightInBtn
Left = 161
Height = 63
Height = 79
Top = 122
Width = 119
Anchors = [akTop, akLeft, akRight, akBottom]
@ -342,7 +326,7 @@ inherited WLSFrm: TWLSFrm
AnchorSideBottom.Side = asrBottom
Left = 161
Height = 23
Top = 222
Top = 238
Width = 119
Anchors = [akLeft, akRight, akBottom]
BorderSpacing.Left = 8
@ -362,13 +346,13 @@ inherited WLSFrm: TWLSFrm
Height = 453
Top = 8
Width = 715
ActivePage = OLSPage
ActivePage = ResidualsRegPage
Align = alClient
BorderSpacing.Left = 4
BorderSpacing.Top = 8
BorderSpacing.Right = 8
BorderSpacing.Bottom = 8
TabIndex = 0
TabIndex = 1
TabOrder = 2
object OLSPage: TTabSheet
Caption = 'OLS Regression'

View File

@ -15,7 +15,6 @@ type
{ TWLSFrm }
TWLSFrm = class(TBasicStatsParamsForm)
OptionsBevel: TBevel;
DepInBtn: TBitBtn;
DepOutBtn: TBitBtn;
IndInBtn: TBitBtn;
@ -58,34 +57,47 @@ type
ResidualsRegReportFrame: TReportFrame;
WLSReportFrame: TReportFrame;
procedure AddPredictedStuffToGrid(AIndepCols: IntDyneVec; ANumIndepCols: Integer;
BWeights: DblDyneVec);
procedure AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String);
procedure AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec);
procedure CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec;
out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec);
procedure CreateOrGetChartFrame(AColIndex: Integer; AVarName: String;
out AMemo: TMemo; out AChartFrame: TChartFrame);
function GetPageCaption(AVarName: String): String;
procedure PlotSquaredResiduals(AIndepCols: IntDyneVec;
ANumIndepCols, ADepCol: Integer; AConfLevel: Double);
procedure PlotSquaredResiduals(AIndepCols: IntDyneVec; ADepCol: Integer;
const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec);
procedure PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec;
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
procedure PredictIt(ColNoSelected: IntDyneVec; NoVars: integer;
Means, StdDevs, BetaWeights: DblDyneVec;
StdErrEst: double; NoIndepVars: integer);
procedure Predict(const xData: DblDyneMat; const yData: DblDyneVec;
ARegressionResults: TMultipleRegressionResults);
function PrepareData(out ADepCol, ANumIndepCols: Integer;
out AIndepCols: IntDyneVec; out AWeightCol: Integer;
out ARowLabels: StrDyneVec): Boolean;
function PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out AWeightCol: Integer; out ARowLabels: StrDyneVec;
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean;
function Process_OLSRegression(AIndepCols: IntDyneVec; ANumIndepCols, ADepCol: Integer;
ARowLabels: StrDyneVec; ANumCases: Integer; PrintAll: Boolean): Boolean;
function Process_OLSRegression(AIndepCols: IntDyneVec; ADepCol: Integer;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec): Boolean;
function Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
ANumIndepCols, ADepCol: Integer; ARowLabels: StrDyneVec;
BWeights: DblDyneVec; ANumCases: Integer; PrintAll: Boolean): Boolean;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
out AWeights: DblDyneVec): Boolean;
function Process_WeightedRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec; const AWeights: DblDyneVec;
SubtractMeans: Boolean): Boolean;
function RegressionAndReport(const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec;
out ARegressionResults: TMultipleRegressionResults; AReport: TStrings): Boolean;
procedure WriteDescriptiveReport(AMemo: TMemo;
const ARegressionResults: TBivariateRegressionResults;
@ -116,6 +128,9 @@ uses
TAChartUtils, TAChartAxisUtils, TALegend, TASources, TACustomSeries,
Utils, MatrixUnit, GridProcs;
const
CONF_LEVEL = DEFAULT_CONFIDENCE_LEVEL_PERCENT / 100.0;
{ TWLSFrm }
constructor TWLSFrm.Create(AOwner: TComponent);
@ -152,50 +167,70 @@ begin
WLSReportFrame.BorderSpacing.Bottom := 0;
WLSReportFrame.BorderSpacing.Right := 0;
InitToolbar(WLSReportFrame.ReportToolbar, tpRight);
PageControl.ActivePageIndex := 0;
end;
{ Get predicted squared residuals and save recipricols to grid as weights }
procedure TWLSFrm.AddPredictedStuffToGrid(AIndepCols: IntDyneVec;
ANumIndepCols: Integer; BWeights: DblDyneVec);
{ Adds a new variable names AColTitle after the last grid column,
and writes the specified data to the grid (in the specified number format). }
procedure TWLSFrm.AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String);
var
col: Integer;
i, j: Integer;
X, predicted: Double;
i, colIndex: Integer;
begin
col := NoVariables + 1;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
DictionaryFrm.NewVar(col);
DictionaryFrm.DictGrid.Cells[1, col] := 'PredResid2';
OS3MainFrm.DataGrid.Cells[col, 0] := 'PredResid2';
col := NoVariables + 1;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
DictionaryFrm.NewVar(col);
DictionaryFrm.DictGrid.Cells[1, col] := 'WEIGHT';
OS3MainFrm.DataGrid.Cells[col, 0] := 'WEIGHT';
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
for i := 1 to NoCases do
colIndex := GetVariableIndex(OS3MainFrm.DataGrid, AVarname);
if colIndex = -1 then
begin
if (DataProcs.ValidValue(i, col-2)) then // do we have a valid squared OLS residual?
begin
predicted := 0.0;
for j := 0 to ANumIndepCols - 1 do
begin
X := StrToFloat(OS3MainFrm.DataGrid.Cells[AIndepCols[j], i]);
predicted := predicted + BWeights[j] * X;
end;
predicted := predicted + BWeights[ANumIndepCols];
predicted := abs(predicted);
OS3MainFrm.DataGrid.Cells[col-1, i] := Format('%.3f', [predicted]);
if (predicted > 0.0) then
predicted := 1.0 / sqrt(predicted)
else
predicted := 0.0;
OS3MainFrm.DataGrid.Cells[col, i] := Format('%.3f', [predicted]);
end;
colIndex := NoVariables + 1;
DictionaryFrm.NewVar(colIndex);
DictionaryFrm.DictGrid.Cells[1, colIndex] := AVarName;
DictionaryFrm.DictGrid.Cells[7, colIndex] := 'R';
OS3MainFrm.DataGrid.Cells[colIndex, 0] := AVarName;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
end;
for i := 0 to High(AData) do
OS3MainFrm.DataGrid.Cells[colIndex, i+1] := Format(ANumFormat, [AData[i]]);
end;
{ Calculate predicted squared residuals and save recipricols to grid as weights }
procedure TWLSFrm.AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec);
begin
// Create new variables and add to grid
AddVariable('Pred SqrResid', ASqrPredictedResiduals, '%.3f');
AddVariable('WEIGHTS', AWeights, '%.3f');
end;
{ Calculate predicted values of the squared residuals, as well as the weights }
procedure TWLSFrm.CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec;
out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec);
var
i, j, n, m: Integer;
sum: Double;
begin
ASquaredPredictedResiduals := nil;
AWeights := nil;
MatSize(xValues, n,m);
SetLength(ASquaredPredictedResiduals, n);
SetLength(AWeights, n);
sum := 0;
for i := 0 to n-1 do
begin
ASquaredPredictedResiduals[i] := ACoeffs[m]; // intercept value
for j := 0 to m-1 do
ASquaredPredictedResiduals[i] += abs(xValues[i, j] * ACoeffs[j]);
if ASquaredPredictedResiduals[i] <> 0 then
AWeights[i] := 1 / ASquaredPredictedResiduals[i]
else
AWeights[i] := 0;
sum := sum + AWeights[i];
end;
// Normalize weights to 1.0
AWeights := AWeights * (1.0 / sum);
end;
@ -212,9 +247,7 @@ end;
procedure TWLSFrm.Compute;
var
i, j, noIndep, depCol, weightCol, oldDepCol, NCases, pos, col: integer;
IndepCols: IntDyneVec = nil;
RowLabels: StrDyneVec = nil;
i, j, noIndep, NCases, pos, col: integer;
X, weight: double;
Means: DblDyneVec = nil;
Variances: DblDyneVec = nil;
@ -229,6 +262,15 @@ var
R2: Double = 0.0;
errorcode: Boolean = false;
PrintDesc: boolean = true;
indepCols: IntDyneVec = nil;
rowLabels: StrDyneVec = nil;
weights: DblDyneVec = nil;
xValues: DblDyneMat = nil;
yValues: DblDyneVec = nil;
depCol: Integer;
weightCol: Integer = -1;
useOrigin: Boolean;
begin
SetLength(Means, NoVariables + 2);
SetLength(Variances, NoVariables + 2);
@ -244,31 +286,38 @@ begin
NCases := NoCases;
// Get column indexes and do some validation checks.
// NOTE that the Length(indepCols) is different from NoIndep.
if not PrepareData(depCol, noIndep, indepCols, weightCol, RowLabels) then
if not PrepareData(indepCols, depCol, weightCol, RowLabels, xValues, yValues) then
exit;
// Save dependent column so we can re-use DepCol
oldDepCol := depCol;
// Do the OLS regression
if not Process_OLSRegression(indepCols, depCol, RowLabels, xValues, yValues) then
exit;
// *** Get OLS regression ***
Process_OLSRegression(indepCols, noIndep, depCol, RowLabels, nCases, printDesc);
// Regress the squared residuals on the predictors
if WeightChk.Checked then
begin
if not Process_SquaredResidualsRegression(indepCols, RowLabels, xValues, weights) then
exit;
useOrigin := OriginChk.Checked;
end else
begin
// Read the weights from the user column
weights := CollectVecValues(OS3MainFrm.DataGrid, weightCol, indepCols);
useOrigin := Origin2Chk.Checked;
end;
// Do the weighted regression, finally
Process_WeightedRegression(indepCols, RowLabels, xValues, yValues, weights, useOrigin);
exit;
// *** Regress the squared residuals on the predictors ***
depCol := NoVariables;
Process_SquaredResidualsRegression(indepCols, noIndep, depCol, RowLabels,
BWeights, nCases, printDesc);
if SaveChk.Checked then //WeightChk.Checked then
AddPredictedStuffToGrid(indepCols, noIndep, BWeights);
// *** Display squared residuals for each independent variable ***
// NOTE: depCol points to the squared residuals column here
PlotSquaredResiduals(IndepCols, NoIndep, depCol, 0.95);
if WeightChk.Checked then
begin
// Weight variables and do OLS regression on weighted variables
DepCol := olddepcol;
// DepCol := olddepcol;
IndepCols[Noindep] := DepCol;
for i := 1 to NoCases do
begin
@ -326,7 +375,7 @@ begin
if UserWeightsChk.Checked then
begin
// Weight variables and do OLS regression on weighted variables
depCol := olddepcol;
// depCol := olddepcol;
indepCols[Noindep] := depCol; // wp: CALCULATION SHOULD NORMALIZE USER WEIGHTS HERE !!!
for i := 1 to NoCases do
begin
@ -536,7 +585,45 @@ end;
procedure TWLSFrm.PlotSquaredResiduals(AIndepCols: IntDyneVec;
ANumIndepCols, ADepCol: Integer; AConfLevel: Double);
ADepCol: Integer; const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec);
var
x, y: DblDyneVec;
i, xCol, yCol: Integer;
regressionRes: TBivariateRegressionResults;
memo: TMemo;
chartFrame: TChartFrame;
xLabel, yLabel: String;
numIndepCols: Integer;
begin
// We will plot the selected vector of the independent values vertically,
// and the dependent values horizontally.
xCol := ADepCol;
x := VecCopy(ADepValues);
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
numIndepCols := Length(AIndepCols);
for i := 0 to numIndepCols-1 do
begin
yCol := AIndepCols[i];
yLabel := OS3MainFrm.DataGrid.Cells[yCol, 0];
y := MatColVector(AIndepValues, yCol-1);
SortOnX(x, y);
// Regression
BivariateRegression(x, y, CONF_LEVEL, regressionRes);
// Create tab with chart and report controls
CreateOrGetChartFrame(yCol-1, yLabel, memo, chartFrame); // -1 because yCol i is in grid units
// Plot
PlotXY(chartFrame, x, y, regressionRes, xLabel, yLabel);
// Print the descriptive statistics
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
end;
end;
(*
var
xCol, yCol: Integer;
xLabel, yLabel: String;
@ -559,8 +646,8 @@ begin
colNoSelected[1] := yCol;
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
yLabel := OS3MainFrm.DataGrid.Cells[yCol, 0];
xPoints := CollectValues(OS3MainFrm.DataGrid, xCol, colNoSelected);
yPoints := CollectValues(OS3MainFrm.DataGrid, yCol, colNoSelected);
xPoints := CollectVecValues(OS3MainFrm.DataGrid, xCol, colNoSelected);
yPoints := CollectVecValues(OS3MainFrm.DataGrid, yCol, colNoSelected);
SortOnX(xPoints, yPoints);
// Regression
@ -576,7 +663,7 @@ begin
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
end;
end;
*)
procedure TWLSFrm.PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec;
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
@ -643,52 +730,42 @@ begin
end;
end;
{ Routine obtains predicted raw and standardized scores and their
residuals. It is assumed that the dependent variable is last in the
list of variable column pointers stored in the ColNoSelected vector.
Get the z predicted score and its residual }
procedure TWLSFrm.PredictIt(ColNoSelected: IntDyneVec; NoVars: integer;
Means, StdDevs, BetaWeights: DblDyneVec; StdErrEst: double; NoIndepVars: integer);
procedure TWLSFrm.Predict(const xData: DblDyneMat; const yData: DblDyneVec;
ARegressionResults: TMultipleRegressionResults);
var
col1, col2, col3, col4, col5, i, j, k, Index: integer;
predicted, zpredicted, z1, z2, resid, residsqr: double;
colName: String;
means, stddevs, variances: DblDyneVec;
i, j, n, m: Integer;
zPred: DblDyneVec = nil;
// zResid: DblDyneVec = nil;
rawPred: DblDyneVec = nil;
rawResid: DblDyneVec = nil;
sqrResid: DblDyneVec = nil;
begin
colName := 'Pred.z.';
col1 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
if col1 = -1 then
begin
col1 := NoVariables + 1;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
DictionaryFrm.DictGrid.ColCount := 8;
DictionaryFrm.NewVar(col1);
OS3MainFrm.DataGrid.Cells[col1, 0] := colName;
DictionaryFrm.DictGrid.Cells[1, col1] := colName;
end;
MatSize(xData, n, m);
MatColMeanVarStdDev(xData, means, variances, stddevs);
colName := 'z Resid.';
col2 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
if col2 = -1 then
begin
col2 := NoVariables + 1;
DictionaryFrm.NewVar(col2);
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
OS3MainFrm.DataGrid.Cells[col2, 0] := colName;
DictionaryFrm.DictGrid.Cells[1, col2] := colName;
end;
SetLength(zPred, n);
// SetLength(zResid, n);
SetLength(rawPred, n);
SetLength(rawResid, n);
SetLength(sqrResid, n);
for i := 1 to NoCases do
for i := 0 to n-1 do
begin
zpredicted := 0.0;
for j := 0 to NoIndepVars - 1 do
begin
k := ColNoSelected[j];
z1 := (StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]) - Means[j]) / StdDevs[j];
zpredicted := zpredicted + (z1 * BetaWeights[j]);
end;
OS3MainFrm.DataGrid.Cells[col1, i] := Format('%.4f',[zpredicted]);
zPred[i] := 0;
for j := 0 to m-1 do
zPred[i] := zPred[i] + (xData[i, j] - means[j]) / stdDevs[j] * ARegressionResults.Beta[j];
{
zResid[i] := (yData[i] - ARegressionResults.MeanY) / ARegressionResults.StdDevY;
w: THIS IS NOT CORRECT. Remove above line because it is not needed.
This is the code used by the original routine
if StdDevs[NoVars-1] <> 0.0 then
begin
Index := ColNoSelected[NoVars-1];
@ -696,79 +773,50 @@ begin
z2 := (z2 - Means[NoVars-1]) / StdDevs[NoVars-1]; // z score
OS3MainFrm.DataGrid.Cells[col2, i] := Format('%.4f',[z2 - zpredicted]); // z residual
end;
}
rawPred[i] := ARegressionResults.Coeffs[m]; // intercept
for j := 0 to m-1 do
rawPred[i] := rawPred[i] + ARegressionResults.Coeffs[j] * xData[i, j];
rawResid[i] := rawPred[i] - yData[i];
sqrResid[i] := sqr(rawResid[i]);
end;
// Get raw predicted and residuals
colName := 'Pred.Raw';
col3 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
if col3 = -1 then
begin
col3 := NoVariables + 1;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
DictionaryFrm.NewVar(col3);
DictionaryFrm.DictGrid.Cells[1, col3] := colName;
OS3MainFrm.DataGrid.Cells[col3, 0] := colName;
OS3MainFrm.DataGrid.ColCount := OS3MainFrm.DataGrid.ColCount + 1;
end;
// calculate raw predicted scores and store in DataGrid at col
for i := 1 to NoCases do
begin // predicted raw obtained from previously predicted z score
predicted := StrToFloat(OS3MainFrm.DataGrid.Cells[col1, i]) * StdDevs[NoVars-1] + Means[NoVars-1];
OS3MainFrm.DataGrid.Cells[col3, i] := Format('%.3f',[predicted]);
end;
// Calculate residuals of predicted raw scores end;
colName := 'Raw Resid.';
col4 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
if col4 = -1 then
begin
col4 := NoVariables + 1;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
DictionaryFrm.NewVar(col4);
DictionaryFrm.DictGrid.Cells[1, col4] := colName;
OS3MainFrm.DataGrid.Cells[col4, 0] := colName;
OS3MainFrm.DataGrid.ColCount := OS3MainFrm.DataGrid.ColCount + 1;
end;
for i := 1 to NoCases do
begin
Index := ColNoSelected[NoVars-1];
resid := StrToFloat(OS3MainFrm.DataGrid.Cells[col3, i]) - StrToFloat(OS3MainFrm.DataGrid.Cells[Index, i]);
OS3MainFrm.DataGrid.Cells[col4, i] := Format('%.3f', [resid]);
end;
// get square of raw residuals
colName := 'Resid Sqr';
col5 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
if col5 = -1 then
begin
col5 := NoVariables + 1;
DictionaryFrm.NewVar(col5);
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
DictionaryFrm.DictGrid.Cells[1, col5] := colName;
OS3MainFrm.DataGrid.Cells[col5, 0] := colName;
OS3MainFrm.DataGrid.ColCount := OS3MainFrm.DataGrid.ColCount + 1;
end;
for i := 1 to NoCases do
begin
residsqr := StrToFloat(OS3MainFrm.DataGrid.Cells[col4,i]);
residsqr := residsqr * residsqr;
OS3MainFrm.DataGrid.Cells[col5,i] := Format('%.3f',[residsqr]);
end;
AddVariable('z Pred', zPred, '%.4f');
// AddGridColumn('z Resid', zResid, '%.4f');
AddVariable('Raw Pred', rawPred, '%.3f');
AddVariable('Raw Resid', rawResid, '%.3f');
AddVariable('Sqr Resid', sqrResid, '%.3f');
end;
function TWLSFrm.PrepareData(out ADepCol, ANumIndepCols: Integer;
out AIndepCols: IntDyneVec; out AWeightCol: Integer;
out ARowLabels: StrDyneVec): Boolean;
{ Prepares the data for the analysis by extracting all needed data from the
grid:
- AIndepCols: integer array containing the grid column indexes of the
independent variables to be used
- ADepCol: grid column index of the dependent variable to be used
- AWeightCol: optional grid column index of the weight data to be used
- ARowLabels: string array containing the names of the independent variables
as well of the dependent variable (last)
- xValues: matrix with all independent values. The columns of the matrix
correspond to the variables, the row correspond to the cases.
- yValues: vector with the dependent variable values
}
function TWLSFrm.PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out AWeightCol: Integer; out ARowLabels: StrDyneVec;
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean;
var
i: Integer;
msg: String;
C: TWinControl;
numIndepCols: Integer;
begin
Result := false;
AIndepCols := nil;
ARowLabels := nil;
xValues := nil;
yvalues := nil;
if not Validate(msg, C) then
begin
@ -777,18 +825,14 @@ begin
exit;
end;
ANumIndepCols := IndVarList.Items.Count;
numIndepCols := IndVarList.Items.Count;
ADepCol := GetVariableIndex(OS3MainFrm.DataGrid, DepVarEdit.Text);
AWeightCol := GetVariableIndex(OS3MainFrm.DataGrid, WeightVarEdit.Text);
// The IndepCols store also other variables, in addition to the "real"
// independent variables. Until I know how this works, this array must be
// over-dimensions.
// ARowLabels alike.
SetLength(AIndepCols, NoVariables + 2);
SetLength(ARowLabels, NoVariables);
SetLength(AIndepCols, numIndepCols);
SetLength(ARowLabels, numIndepCols + 1); // +1 to add independent column label
for i := 0 to ANumIndepCols-1 do
for i := 0 to numIndepCols-1 do
begin
AIndepCols[i] := GetVariableIndex(OS3MainFrm.DataGrid, IndVarList.Items[i]);
if AIndepCols[i] = -1 then
@ -798,9 +842,7 @@ begin
end;
ARowLabels[i] := IndVarList.Items[i];
end;
// Append dependent column index to the independent columns vector.
AIndepCols[ANumIndepCols] := ADepCol;
ARowLabels[numIndepCols] := DepVarEdit.Text;
// Check variable types: all of them must be numeric (float or integer)
if not IsNumericCol(ADepCol) then
@ -809,7 +851,7 @@ begin
exit;
end;
for i := 0 to ANumIndepCols-1 do
for i := 0 to numIndepCols-1 do
if not IsNumericCol(AIndepCols[i]) then
begin
ErrorMsg('Incorrect data type of independent variable "%s"', [ARowLabels[i]]);
@ -822,56 +864,45 @@ begin
exit;
end;
xValues := CollectMatValues(OS3MainFrm.DataGrid, AIndepCols);
yValues := CollectVecValues(OS3MainFrm.DataGrid, ADepCol);
Result := true;
end;
{ Runs the ordinary least squares regression on the grid data }
function TWLSFrm.Process_OLSRegression(AIndepCols: IntDyneVec; ANumIndepCols, ADepCol: Integer;
ARowLabels: StrDyneVec; ANumCases: Integer; PrintAll: Boolean): Boolean;
function TWLSFrm.Process_OLSRegression(AIndepCols: IntDyneVec;
ADepCol: Integer; const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec): Boolean;
var
lReport: TStrings;
means: DblDyneVec = nil;
variances: DblDyneVec = nil;
stdDevs: DblDyneVec = nil;
BWeights: DblDyneVec = nil;
BetaWeights: DblDyneVec = nil;
BStdErrs: DblDyneVec = nil;
BtTests: DblDyneVec = nil;
tProbs: DblDyneVec = nil;
R2, stdErrEst: Double;
error: Boolean;
regressionRes: TMultipleRegressionResults;
i: Integer;
numIndepCols: Integer;
begin
Result := false;
numIndepCols := Length(AIndepCols);
lReport := TStringList.Create;
try
lReport.Add('ORDINARY LEAST SQUARES (OLS) REGRESSION RESULTS');
lReport.Add('');
lReport.Add('Dependent variable: ');
lReport.Add(' ' + OS3MainFrm.DataGrid.Cells[ADepCol, 0]);
lReport.Add('Independent variables:');
for i := 0 to numIndepCols-1 do
lReport.Add(' ' + ARowLabels[i]);
lReport.Add('');
SetLength(means, NoVariables + 2);
SetLength(variances, NoVariables + 2);
SetLength(stdDevs, NoVariables + 2);
SetLength(BWeights, NoVariables + 2);
SetLength(BetaWeights, NoVariables + 2);
SetLength(BStdErrs, NoVariables + 2);
SetLength(Bttests, NoVariables + 2);
SetLength(tprobs, NoVariables + 2);
Result := RegressionAndReport(ARowLabels, xValues, yValues, regressionRes, lReport);
MReg(ANumIndepCols, AIndepCols, ADepCol, ARowLabels, Means, Variances, StdDevs,
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
ANumCases, error, PrintAll, lReport);
if Result then
begin
Predict(xValues, yValues, regressionRes);
OLSReportFrame.DisplayReport(lReport);
end;
// if error then // wp: Why does MReg exit with error???
// exit;
// Get predicted z score, residual z score, predicted raw score,
// residual raw score and squared raw residual score. Place in the DataGrid
PredictIt(AIndepCols, ANumIndepCols+1, means, stdDevs, BetaWeights, stdErrEst, ANumIndepCols);
OLSReportFrame.DisplayReport(lReport);
Result := true;
finally
lReport.Free;
end;
@ -879,44 +910,137 @@ end;
function TWLSFrm.Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
ANumIndepCols, ADepCol: Integer; ARowLabels: StrDyneVec; BWeights: DblDyneVec;
ANumCases: Integer; PrintAll: Boolean): Boolean;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat; out AWeights: DblDyneVec): Boolean;
var
lReport: TStrings;
means: DblDyneVec = nil;
variances: DblDyneVec = nil;
stdDevs: DblDyneVec = nil;
BetaWeights: DblDyneVec = nil;
BStdErrs: DblDyneVec = nil;
BtTests: DblDyneVec = nil;
tProbs: DblDyneVec = nil;
R2, stdErrEst: Double;
error: Boolean;
sqrResiduals: DblDyneVec;
predSqrResiduals: DblDyneVec;
regressionRes: TMultipleRegressionResults;
i, depCol, numIndepCols: Integer;
begin
AWeights := nil;
ResidualsRegPage.TabVisible := WeightChk.Checked;
if not WeightChk.Checked then
exit;
numIndepCols := Length(AIndepCols);
// The last grid column (added by Process_ODSRegression) contains the
// squared residuals which will be fitted here.
depCol := NoVariables;
sqrResiduals := CollectVecValues(OS3MainFrm.DataGrid, depCol);
lReport := TStringList.Create;
try
lReport.Add('REGRESSION OF SQUARED RESIDUALS ON INDEPENDENT VARIABLES');
lReport.Add('');
lReport.Add('Dependent variable: ');
lReport.Add(' ' + ARowLabels[numIndepCols]);
lReport.Add('Independent variables:');
for i := 0 to numIndepCols-1 do
lReport.Add(' ' + ARowLabels[i]);
lReport.Add('');
SetLength(means, NoVariables + 2);
SetLength(variances, NoVariables + 2);
SetLength(stdDevs, NoVariables + 2);
SetLength(BetaWeights, NoVariables + 2);
SetLength(BStdErrs, NoVariables + 2);
SetLength(Bttests, NoVariables + 2);
SetLength(tprobs, NoVariables + 2);
Result := RegressionAndReport(ARowLabels, xValues, sqrResiduals, regressionRes, lReport);
MReg(ANumIndepCols, AIndepCols, ADepCol, ARowLabels, Means, Variances, StdDevs,
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
ANumCases, error, PrintAll, lReport);
if Result then
begin
// Display the results
ResidualsRegReportFrame.DisplayReport(lReport);
// Calculate weights and store them in the grid
CalcWeights(xValues, regressionRes.Coeffs, predSqrResiduals, AWeights);
// Display squared residuals for each independent variable
PlotSquaredResiduals(AIndepCols, depCol, xValues, sqrResiduals);
// Store weights to the grid
if SaveChk.Checked then
AddWeightsToGrid(predSqrResiduals, AWeights);
end;
ResidualsRegReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
end;
function TWLSFrm.Process_WeightedRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec; const AWeights: DblDyneVec; SubtractMeans: Boolean): Boolean;
var
i, j, n, m: Integer;
regressionRes: TMultipleRegressionResults;
lReport: TStrings;
means: DblDyneVec;
begin
MatSize(xValues, n, m);
for i :=0 to n-1 do
for j := 0 to m-1 do
xValues[i, j] := xValues[i, j] * AWeights[i];
if SubtractMeans then
begin
means := MatRowMeans(xValues);
for i := 0 to n-1 do
for j := 0 to m-1 do
xValues[i, j] := xValues[i, j] - means[i];
end;
lReport := TStringList.Create;
try
lReport.Add('WEIGHTED LEAST SQUARES (WLS) REGRESSION RESULTS');
lReport.Add('');
lReport.Add('Dependent variable: ');
lReport.Add(' ' + ARowLabels[m]);
lReport.Add('Independent variables:');
for i := 0 to m-1 do
lReport.Add(' ' + ARowLabels[i]);
lReport.Add('');
Result := RegressionAndReport(ARowLabels, xValues, yValues, regressionRes, lReport);
if Result then
WLSReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
end;
function TWLSFrm.RegressionAndReport(const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec;
out ARegressionResults: TMultipleRegressionResults; AReport: TStrings): Boolean;
var
err: TRegressionError;
begin
err := MultipleRegression(xValues, yValues, CONF_LEVEL, ARegressionResults);
case err of
regOK: ;
regTooFewValues: ErrorMsg('At least two values required for regression.');
regStdDevZero: ErrorMsg('Standard deviation is zero.');
end;
Result := (err = regOK);
ARegressionResults.WriteCoeffsReport(AReport, ARowLabels);
AReport.Add('');
AReport.Add('');
ARegressionResults.WriteANOVAReport(AReport);
AReport.Add('');
AReport.Add('');
ARegressionResults.WriteVarCovarReport(AReport, ARowLabels);
AReport.Add('');
AReport.Add('');
ARegressionResults.WriteCorrelationReport(AReport, ARowLabels);
end;
procedure TWLSFrm.Reset;
var
i: integer;
@ -1073,20 +1197,12 @@ var
begin
lReport := TStringList.Create;
try
{ not needed - requires too much space
lReport.Add('Data file: %s', [OS3MainFrm.FileNameEdit.Text]);
lReport.Add('');
lReport.Add('Variables:');
lReport.Add(' X: %s', [xLabel]);
lReport.Add(' Y: %s', [yLabel]);
lReport.Add('');
}
lReport.Add('Variable Mean Variance Std.Dev.');
lReport.Add('---------- -------- -------- --------');
lReport.Add(' Variable Mean Variance Std.Dev. ');
lReport.Add('------------ ------------ ------------ ------------');
with ARegressionResults do
begin
lReport.Add('%-10s %8.2f %8.2f %8.2f', [xLabel, XMean, XVariance, XStdDev]);
lReport.Add('%-10s %8.2f %8.2f %8.2f', [yLabel, YMean, YVariance, YStdDev]);
lReport.Add('%12s %12.2f %12.2f %12.2f', [xLabel, XMean, XVariance, XStdDev]);
lReport.Add('%12s %12.2f %12.2f %12.2f', [yLabel, YMean, YVariance, YStdDev]);
lReport.Add('');
lReport.Add('Regression:');
lReport.Add(' Correlation: %8.3f', [R]);

View File

@ -989,6 +989,9 @@ begin
if not (Sender = DataGrid) then
exit;
if aCol >= DictionaryFrm.DictGrid.RowCount then
exit;
ts := DataGrid.Canvas.TextStyle;
justif := DictionaryFrm.DictGrid.Cells[7, aCol];
if justif = '' then justif := 'L';

View File

@ -8,9 +8,11 @@ uses
Classes, SysUtils, Grids,
Globals, DictionaryUnit;
function CollectValues(AGrid: TStringGrid; AColIndex: Integer;
function CollectVecValues(AGrid: TStringGrid; AColIndex: Integer;
AColCheck: IntDyneVec = nil): DblDyneVec;
function CollectMatValues(AGrid: TStringGrid; AColIndices: IntDyneVec): DblDyneMat;
procedure GetMinMax(AGrid: TStringGrid; AColIndex: Integer;
const AColCheck: IntDyneVec; out AMin, AMax: Double);
@ -43,7 +45,7 @@ uses
Non-numeric values in the considered cell will raise an exception.
NOTE: AColCheck must not be overdimensioned! }
function CollectValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec;
function CollectVecValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec;
var
row, n: Integer;
val: Double;
@ -70,6 +72,33 @@ begin
end;
{ Extracts the grid values from the columns with indices given by AColIndices
and puts them into the columns of the result matrix.
This means: The result matrix contains the variables as columns and the
cases as rows. }
function CollectMatValues(AGrid: TStringGrid; AColIndices: IntDyneVec): DblDyneMat;
var
nr, r, c, i, j: Integer;
val: Double;
begin
SetLength(Result, AGrid.RowCount, Length(AColIndices));
nr := 0;
for r:= 1 to AGrid.RowCount-1 do
begin
if not GoodRecord(AGrid, r, AColIndices) then Continue;
i := r - 1;
for j := 0 to High(AColIndices) do
begin
c := AColIndices[j];
if TryStrToFloat(trim(AGrid.Cells[c, r]), val) then
Result[i, j] := val;
end;
inc(nr); // count the number of rows in the matrix.
end;
SetLength(Result, nr);
end;
{ Determines the minimum and maximum of the values in the specified column of
the grid. Rows with "invalid" data are ignored. If AColCheck contains other
column indices these cells must be "valid", too. }
@ -101,7 +130,10 @@ end;
the grid. }
function GetVariableIndex(AGrid: TStringGrid; const AVarName: String): Integer;
begin
Result := AGrid.Rows[0].IndexOf(AVarName);
if AVarName <> '' then
Result := AGrid.Rows[0].IndexOf(AVarName)
else
Result := -1;
end;
@ -151,9 +183,13 @@ var
missingCode: String;
value: String;
begin
missingCode := Trim(DictionaryFrm.DictGrid.Cells[6, ACol]);
value := Trim(AGrid.Cells[ACol, ARow]);
Result := (value = missingCode);
if ACol < DictionaryFrm.DictGrid.RowCount then
begin
missingCode := Trim(DictionaryFrm.DictGrid.Cells[6, ACol]);
value := Trim(AGrid.Cells[ACol, ARow]);
Result := (value = missingCode);
end else
Result := false;
end;
@ -163,8 +199,12 @@ function IsNumericCol(AColIndex: Integer): Boolean;
var
typeCode: String;
begin
typeCode := Trim(DictionaryFrm.DictGrid.Cells[4, AColIndex]);
Result := (typeCode = 'F') or (typeCode = 'I');
if AColIndex < DictionaryFrm.DictGrid.RowCount then
begin
typeCode := Trim(DictionaryFrm.DictGrid.Cells[4, AColIndex]);
Result := (typeCode = 'F') or (typeCode = 'I');
end else
Result := false;
end;

View File

@ -655,7 +655,6 @@ var
SSY, SSres, resvar, SSreg: double;
title: string;
deplabel: string;
errcode: boolean = false;
begin
Assert(OS3MainFrm <> nil);
@ -668,7 +667,7 @@ begin
SetLength(ColLabels, NCases);
// initialize
errcode := false;
ErrorCode := false;
for i := 0 to NCases do
begin
for j := 0 to NoIndep do X[i, j] := 0;
@ -721,7 +720,7 @@ begin
deplabel := OS3MainFrm.DataGrid.Cells[DepCol,0];
RowLabels[NoIndep] := 'Intercept';
VarY := SSY - (MeanY * MeanY / NCases);
VarY := SSY - sqr(MeanY) / NCases;
VarY := VarY / (NCases - 1);
SDY := sqrt(VarY);
@ -870,13 +869,15 @@ begin
//AReport.Add('Standard Error of Estimate = %8.2f', [stderrest]);
end;
// Needed for calculation of VIF and TOL
Correlations(NoIndep, IndepCols, XTX, Means, Variances, StdDevs, ErrorCode, NCases);
SVDinverse(XTX, NoIndep);
AReport.Add(DIVIDER_SMALL);
AReport.Add('');
RowLabels[N-1] := 'Intercept';
AReport.Add(' Variable Beta B Std.Err. t prob VIF TOL');
AReport.Add('---------- ---------- ---------- ---------- ---------- ---------- --------- ----------');
Correlations(NoIndep, IndepCols, XTX, Means, Variances, StdDevs, errcode, NCases);
SVDinverse(XTX, NoIndep);
for i := 0 to NoIndep do
begin
VIF := XTX[i,i];

View File

@ -18,6 +18,18 @@ type
// Vectors
operator + (A, B: TDblVector): TDblVector;
operator - (A, B: TDblVector): TDblVector;
operator * (A, B: TDblVector): Double;
operator * (A: TDblVector; b: Double): TDblVector;
operator * (a: Double; B: TDblVector): TDblVector;
procedure VecCheck(A, B: TDblVector; out n: Integer);
function VecCopy(A: TDblVector): TDblVector;
function VecMultiply(A, B: TDblVector): TDblVector;
function VecOnes(n: Integer): TDblVector;
procedure VecSize(A: TDblVector; out n: Integer);
procedure VecMaxMin(const AData: TDblVector;
out AMax, AMin: Double);
procedure VecMeanStdDev(const AData: TDblVector;
@ -36,6 +48,7 @@ function VecMedian(const AData: TDblVector): Double;
operator + (A, B: TDblMatrix): TDblMatrix;
operator - (A, B: TDblMatrix): TDblMatrix;
operator * (A, B: TDblMatrix): TDblMatrix;
operator * (A: TDblMatrix; v: TDblVector): TDblVector;
{ NOTE: Indices follow math convention:
- 1st index is the row index, i.e. runs vertically
@ -44,6 +57,9 @@ operator * (A, B: TDblMatrix): TDblMatrix;
function MatAppendColVector(A: TDblMatrix; v: TDblVector): TDblMatrix;
procedure MatCheck(A: TDblMatrix);
procedure MatCheckSquare(A: TDblMatrix; out n: Integer);
procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector);
function MatColMeans(A: TDblMatrix): TDblVector;
function MatColVector(A: TDblMatrix; AColIndex: Integer): TDblVector;
function MatCopy(A: TDblMatrix): TDblMatrix;
function MatDeterminant(A: TDblMatrix): double;
function MatEqualSize(A, B: TDblMatrix): Boolean;
@ -53,9 +69,10 @@ function MatInverse(A: TDblMatrix): TDblMatrix;
function MatIsSquare(A: TDblMatrix; out n: Integer): Boolean;
function MatNumCols(A: TDblMatrix): Integer;
function MatNumRows(A: TDblMatrix): Integer;
function MatRowMeans(A: TDblMatrix): TDblVector;
function MatRowVector(A: TDblMatrix; ARowIndex: Integer): TDblVector;
procedure MatSize(A: TDblMatrix; out n, m: Integer);
function MatTranspose(A: TDblMatrix): TDblMatrix;
function MatTransposed(A: TDblMatrix): TDblMatrix;
// Sorting
procedure Exchange(var a, b: Double); overload;
@ -88,6 +105,110 @@ uses
Math;
operator + (A, B: TDblVector): TDblVector;
var
i, n: Integer;
begin
VecCheck(A, B, n);
SetLength(Result, n);
for i := 0 to n-1 do
Result[i] := A[i] + B[i];
end;
operator - (A, B: TDblVector): TDblVector;
var
i, n: Integer;
begin
VecCheck(A, B, n);
SetLength(Result, n);
for i := 0 to n-1 do
Result[i] := A[i] - B[i];
end;
// Vector dot product
operator * (A, B: TDblVector): Double;
var
i, n: Integer;
begin
VecCheck(A, B, n);
Result := 0;
for i := 0 to n-1 do
Result := Result + A[i] * B[i];
end;
// Multiplication of a vector by a scalar
operator * (A: TDblVector; b: Double): TDblVector;
var
i, n: Integer;
begin
n := Length(A);
SetLength(Result, n);
for i := 0 to n-1 do
Result[i] := A[i] * b;
end;
operator * (a: Double; B: TDblVector): TDblVector;
var
i, n: Integer;
begin
n := Length(B);
SetLength(Result, n);
for i := 0 to n-1 do
Result[i] := a * B[i];
end;
procedure VecCheck(A, B: TDblVector; out n: Integer);
var
na, nb: Integer;
begin
na := Length(A);
nb := Length(B);
if na <> nb then
raise EMatrix.Create('Dimension error.')
else
n := na;
end;
function VecCopy(A: TDblVector): TDblVector;
var
i: Integer;
begin
SetLength(Result, Length(A));
for i := 0 to High(A) do Result[i] := A[i];
end;
function VecMultiply(A, B: TDblVector): TDblVector;
var
i, n: Integer;
begin
VecCheck(A, B, n);
SetLength(Result, n);
for i := 0 to n-1 do
Result[i] := A[i] * B[i];
end;
function VecOnes(n: Integer): TDblVector;
var
i: Integer;
begin
SetLength(Result, n);
for i := 0 to n-1 do Result[i] := 1;
end;
procedure VecSize(A: TDblVector; out n: Integer);
begin
n := Length(A);
end;
{===============================================================================
Statistical vector operations
===============================================================================}
@ -134,7 +255,7 @@ begin
if n = 1 then
exit;
AVariance := ((ss - sqr(AMean)) / n) / (n - 1);
AVariance := (ss - sqr(AMean) * n) / (n - 1);
AStdDev := sqrt(AVariance);
end;
@ -229,6 +350,7 @@ begin
end;
{ Product of two matrices }
operator * (A, B: TDblMatrix): TDblMatrix;
var
na, ma, nb, mb, i, j, k: Integer;
@ -251,6 +373,28 @@ begin
end;
{ Product of an n x m matrix with an m vector }
operator * (A: TDblMatrix; v: TDblVector): TDblVector;
var
na, ma, nv, i, j: Integer;
sum: Double;
begin
MatSize(A, na, ma);
VecSize(v, nv);
if ma <> nv then
raise EMatrix.Create('Dimension error.');
SetLength(Result, na);
for i := 0 to na-1 do
begin
sum := 0;
for j := 0 to ma-1 do
sum := sum + A[i, j] * v[j];
Result[i] := sum;
end;
end;
{ Adds the elements of the vector v to the rows of the matrix A, i.e. the number
of columns increases by 1 }
function MatAppendColVector(A: TDblMatrix; v: TDblVector): TDblMatrix;
@ -288,6 +432,59 @@ begin
end;
procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector);
var
n, m, i, j: Integer;
s, ss: Double;
begin
MatSize(A, n, m);
SetLength(AMeans, m);
SetLength(AVariances, m);
SetLength(AStdDevs, m);
for j := 0 to m-1 do
begin
s := 0;
ss := 0;
for i := 0 to n-1 do
begin
s := s + A[i, j];
ss := ss + sqr(A[i, j]);
end;
AMeans[j] := s / n;
AVariances[j] := (ss - sqr(AMeans[j]) * n) / (n - 1);
AStdDevs[j] := sqrt(AVariances[j]);
end;
end;
function MatColMeans(A: TDblMatrix): TDblVector;
var
i, j, n, m: Integer;
sum: Double;
begin
MatSize(A, n, m);
SetLength(Result, m);
for j := 0 to m-1 do
begin
sum := 0;
for i := 0 to n-1 do
sum := sum + A[i, j];
Result[j] := sum / n;
end;
end;
function MatColVector(A: TDblMatrix; AColIndex: Integer): TDblVector;
var
i, n, m: Integer;
begin
MatSize(A, n,m);
SetLength(Result, n);
for i := 0 to n-1 do
Result[i] := A[i, AColIndex];
end;
function MatCopy(A: TDblMatrix): TDblMatrix;
var
n, m, i, j: Integer;
@ -405,6 +602,34 @@ begin
end;
function MatRowMeans(A: TDblMatrix): TDblVector;
var
i, j, n, m: Integer;
sum: Double;
begin
MatSize(A, n,m);
SetLength(Result, n);
for i := 0 to n-1 do
begin
sum := 0;
for j := 0 to m-1 do
sum := sum + A[i, j];
Result[i] := sum / m;
end;
end;
function MatRowVector(A: TDblMatrix; ARowIndex: Integer): TDblVector;
var
j, n, m: Integer;
begin
MatSize(A, n,m);
SetLength(Result, m);
for j := 0 to m-1 do
Result[j] := A[ARowIndex, j];
end;
procedure MatSize(A: TDblMatrix; out n, m: Integer);
begin
n := Length(A);
@ -412,15 +637,15 @@ begin
end;
function MatTranspose(A: TDblMatrix): TDblMatrix;
function MatTransposed(A: TDblMatrix): TDblMatrix;
var
n, i, j: Integer;
n, m, i, j: Integer;
begin
MatCheck(A);
MatCheckSquare(A, n);
SetLength(Result, n, n);
MatSize(A, n, m);
SetLength(Result, m, n);
for i := 0 to n-1 do
for j := 0 to n-1 do
for j := 0 to m-1 do
Result[j, i] := A[i, j];
end;

View File

@ -9,8 +9,15 @@ uses
Classes, SysUtils,
MatrixUnit;
type
ERegression = class(Exception);
type
TRegressionError = (regOK, regTooFewValues, regStdDevZero);
TBivariateRegressionResults = record
public
Slope, Intercept: Double;
XMean, YMean: Double;
XVariance, YVariance: Double;
@ -28,15 +35,53 @@ procedure BivariateRegression(const xData, yData: TDblVector;
type
TMultipleRegressionResults = record
private
procedure WriteMatrixReport(AReport: TStrings; AMatrix: TDblMatrix;
ATitle: String; AVarNames: TStringArray);
public
Coeffs: TDblVector; // Coefficients of the factors
CoeffStdErrors: TDblVector; // Standard errors of the coefficients
Beta: TDblVector; // Standardized coefficients
VarCovar: TDblMatrix; // Variance-covariance matrix
Correlations: TDblMatrix; // Correlations matrix
SStotal, SSreg, SSresid: Double; // Sum of squares
NumCases, NumDepVars: Integer; // number of cases, number of dependent variables
R2: Double; // coefficient of determination
AdjR2: Double; // Adjusted coefficient of determination
StdErrorEstimate: Double; // standard error of estimate
t: TDblVector;
p: TDblVector; // Probability for > t
F: Double;
Probability: Double; // Probability for > F
Means: TDblVector;
Variances: TDblVector;
StdDevs: TDblVector;
(*
XMeans: TDblVector; // Means of the x columns
XVariances: TDblVector; // Variances of the x columns
XStdDevs: TDblVector; // Std Deviations of the x columns
*)
MeanY: Double; // Mean of y column
VarianceY: Double; // Variance of y column
StdDevY: Double; // Std Deviation of y column
procedure WriteANOVAReport(AReport: TStrings);
procedure WriteCoeffsReport(AReport: TStrings; AVarNames: TStringArray);
procedure WriteCorrelationReport(AReport: TStrings; AVarNames: TStringArray);
procedure WriteVarCovarReport(AReport: TStrings; AVarNames: TStringArray);
end;
procedure MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
AConfLevel: Double; out AResults: TMultipleRegressionResults);
function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError;
implementation
uses
StrUtils,
MathUnit;
{===============================================================================
@ -95,12 +140,241 @@ end;
{===============================================================================
Multiple regression
===============================================================================}
procedure MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
AConfLevel: Double; out AResults: TMultipleRegressionResults);
function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError;
var
i, j, nx, mx, ny: Integer;
X, XT, XT_X, inv_XT_X: TDblMatrix;
XT_Y: TDblVector;
SY, SSY: Double;
DFresid: Integer;
begin
MatCheck(xData);
MatSize(xData, nx, mx);
ny := Length(yData);
if nx <> ny then
raise ERegression.Create('Dimension error.');
if nx < 2 then begin
Result := regTooFewValues;
exit;
end;
// Augmented x matrix: append ones vector for intercept
X := MatAppendColVector(xData, VecOnes(ny));
// Transposed augmented matrix
XT := MatTransposed(X);
// Product of augmented matrix with augmented Y
XT_Y := XT * yData;
// Product of transposed augmented with augmented matrix
XT_X := XT * X;
// Product of inverse of XT_X with XT_Y yields the coefficients. Intercept is last.
inv_XT_X := MatInverse(XT_X);
AResults.Coeffs := inv_XT_X * XT_Y;
// THe XT_X matrix contains the sum of squares (diagonal elements), sum of
// cross-products (off-diagonal elements and the sum the variable values
// (augmented column).
SetLength(AResults.Means, mx+1);
SetLength(AResults.Variances, mx+1);
SetLength(AResults.StdDevs, mx+1);
for i := 0 to mx do
begin
AResults.Means[i] := XT_X[i, mx] / nx;
AResults.Variances[i] := (XT_X[i, i] - sqr(AResults.Means[i]) * nx) / (nx - 1);
AResults.StdDevs[i] := sqrt(AResults.Variances[i]);
{
if AResults.StdDevs[i] = 0.0 then
begin
Result := regStdDevZero;
exit;
end;
}
end;
// Variance-covariance matrix
SetLength(AResults.VarCovar, mx, mx);
SetLength(AResults.Correlations, mx, mx);
for i := 0 to mx-1 do
for j := 0 to mx-1 do
begin
AResults.VarCovar[i, j] := (XT_X[i, j] - XT_X[i, mx] * XT_X[j, mx] / nx) / (nx - 1);
AResults.Correlations[i, j] := AResults.VarCovar[i, j] / (AResults.StdDevs[i] * AResults.StdDevs[j]);
end;
with AResults do
begin
NumCases := nx;
NumDepVars := mx;
DFresid := NumCases - NumDepVars - 1;
// Calculate column means of xData
// MatColMeanVarStdDev(X, XMeans, XVariances, XStdDevs);
// Basic stats of y data column
VecSumSS(yData, SY, SSY);
MeanY := SY / NumCases;
SStotal := SSY - sqr(MeanY) * NumCases; // needed for ANOVA
VarianceY := SStotal / (NumCases - 1);
StdDevY := sqrt(VarianceY);
{
if StdDevY = 0 then
begin
Result := regStdDevZero;
exit;
end;
}
// Standardized coefficients
Beta := VecMultiply(Coeffs, StdDevs) * (1.0/StdDevY);
// Get standard errors, squared multiple correlation, tests of significance
SetLength(CoeffStdErrors, mx+1);
SetLength(t, mx+1);
SetLength(p, mx+1);
SSresid := SSY - Coeffs * XT_Y;
SSreg := SStotal - SSresid;
StdErrorEstimate := sqrt(SSresid / DFresid);
R2 := SSreg / SStotal;
AdjR2 := 1.0 - (1.0 - R2) * (NumCases - 1) / DFresid;
F := (SSreg / NumDepVars) / (SSresid / DFresid);
Probability := ProbF(F, NumDepVars, DFresid);
for i := 0 to mx do
begin
CoeffStdErrors[i] := StdErrorEstimate * sqrt(inv_XT_X[i, i]);
t[i] := Coeffs[i] / CoeffStdErrors[i];
p[i] := ProbT(t[i], DFresid);
end;
end;
Result := regOK;
end;
{ Writes the ANOVA of the regression to a "report". }
procedure TMultipleRegressionResults.WriteANOVAReport(AReport: TStrings);
var
DFreg, DFresid, DFtotal: Integer;
begin
DFreg := NumDepVars;
DFtotal := NumCases - 1;
DFresid := DFtotal - DFreg;
AReport.Add('ANOVA');
AReport.Add('------------------------------------------------------------------------------');
AReport.Add('SOURCE DF Sum of Squares Mean Square F Probability');
AReport.Add('---------- --- ---------------- ---------------- ------------ -----------');
AReport.Add('Regression %3d %16.3f %16.3f %12.4f %10.4f', [DFreg, SSreg, SSreg/DFreg, F, Probability]);
AReport.Add('Residual %3d %16.3f %16.3f', [DFresid, SSresid, SSresid/DFresid]);
AReport.Add('Total %3d %16.3f', [DFtotal, SStotal]);
AReport.Add('------------------------------------------------------------------------------');
AReport.Add('');
AReport.Add('R2: %10.4f', [R2]);
AReport.Add('Adjusted R2: %10.4f', [AdjR2]);
AReport.Add('F: %10.2f (with %d and %d degrees of freedom)', [F, DFreg, DFresid]);
AReport.Add('Probability > F: %10.4f', [Probability]);
AReport.Add('Standard Error of Estimate: %10.2f', [StdErrorEstimate]);
end;
{ Writes the determined coefficients and their statistics to a "report".
The names of the independent variables are needed in the array AVarNames. }
procedure TMultipleRegressionResults.WriteCoeffsReport(AReport: TStrings;
AVarNames: TStringArray);
var
varName: String;
i, first, last: Integer;
begin
AReport.Add('COEFFICIENTS');
AReport.Add('------------------------------------------------------------------------------');
AReport.Add(' Variable Unstandardized Std.Error Standardized t p ');
AReport.Add('------------ -------------- ------------ ------------ --------- ---------');
first := 0;
last := High(Coeffs);
for i := first to last do
begin
if i = last then
varName := '(Intercept)'
else
if i <= High(AVarNames) then
varName := AVarNames[i]
else
varName := Format('VAR.%d', [i+1]);
AReport.Add('%12s %14.3f %12.3f %12.3f %9.4f %9.4f',
[varName, Coeffs[i], CoeffStdErrors[i], Beta[i], t[i], p[i]]);
end;
AReport.Add('------------------------------------------------------------------------------');
end;
{ Writes the correlation matrix to a "report".
The names of the independent variables are needed in the array AVarNames. }
procedure TMultipleRegressionResults.WriteCorrelationReport(AReport: TStrings;
AVarNames: TStringArray);
begin
WriteMatrixReport(AReport, Correlations, 'CORRELATION MATRIX', AVarNames);
end;
procedure TMultipleRegressionResults.WriteMatrixReport(AReport: TStrings;
AMatrix: TDblMatrix; ATitle: String; AVarNames: TStringArray);
function GetVarName(i: Integer): String;
begin
if i < High(AVarNames) then
Result := AVarNames[i]
else
Result := Format('VAR.%d', [i+1]);
end;
const
SPACE = ' ';
var
i, j, first, last: Integer;
s: String;
begin
first := 0;
last := High(AMatrix);
AReport.Add(ATitle);
s := DupeString(' ', 12);
for j := first to last do
s := s + SPACE + Format('%12s', [GetVarName(j)]);
AReport.Add(DupeString('-', Length(s)));
AReport.Add(s);
s := DupeString(' ', 12);
for j := first to last do
s := s + SPACE + DupeString('-', 12);
AReport.Add(s);
for i := first to last do
begin
s := Format('%12s', [GetVarName(i)]);
for j := first to last do
s := s + SPACE + Format('%12.3f', [AMatrix[i, j]]);
AReport.Add(s);
end;
AReport.Add(DupeString('-', Length(s)));
end;
{ Writes the variance/covariance matrix to a "report".
The names of the independent variables are needed in the array AVarNames. }
procedure TMultipleRegressionResults.WriteVarCovarReport(AReport: TStrings;
AVarNames: TStringArray);
begin
WriteMatrixReport(AReport, VarCovar, 'VARIANCE-COVARIANCE MATRIX', AVarNames);
end;
end.