You've already forked lazarus-ccr
LazStats: Massive refactoring of WLSUnit avoiding overwriting of grid data. Fix crash when accessing non-existing cells in DictionaryFrm.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7780 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -47,7 +47,6 @@
|
||||
<Linking>
|
||||
<Debugging>
|
||||
<DebugInfoType Value="dsDwarf2Set"/>
|
||||
<UseHeaptrc Value="True"/>
|
||||
<TrashVariables Value="True"/>
|
||||
</Debugging>
|
||||
</Linking>
|
||||
|
@ -146,9 +146,9 @@ begin
|
||||
FColIndex := AColIndex;
|
||||
|
||||
if Length(FColsSelected) = 0 then
|
||||
FValues := CollectValues(FDataGrid, AColIndex, FColsSelected)
|
||||
FValues := CollectVecValues(FDataGrid, AColIndex, FColsSelected)
|
||||
else
|
||||
FValues := CollectValues(FDataGrid, AColIndex);
|
||||
FValues := CollectVecValues(FDataGrid, AColIndex);
|
||||
FNumCases := Length(FValues);
|
||||
|
||||
SortOnX(FValues);
|
||||
|
@ -267,8 +267,8 @@ begin
|
||||
ColNoSelected[0] := xCol;
|
||||
ColNoSelected[1] := yCol;
|
||||
|
||||
XData := CollectValues(ADataGrid, xCol, colNoSelected);
|
||||
YData := CollectValues(ADataGrid, ycol, colNoSelected);
|
||||
XData := CollectVecValues(ADataGrid, xCol, colNoSelected);
|
||||
YData := CollectVecValues(ADataGrid, ycol, colNoSelected);
|
||||
N := Length(XData);
|
||||
if N < 3 then
|
||||
begin
|
||||
|
@ -361,8 +361,8 @@ begin
|
||||
ColNoSelected[0] := xCol;
|
||||
ColNoSelected[1] := yCol;
|
||||
|
||||
XData := CollectValues(ADataGrid, xCol, colNoSelected);
|
||||
YData := CollectValues(ADataGrid, ycol, colNoSelected);
|
||||
XData := CollectVecValues(ADataGrid, xCol, colNoSelected);
|
||||
YData := CollectVecValues(ADataGrid, ycol, colNoSelected);
|
||||
N := Length(XData);
|
||||
if N < 3 then
|
||||
begin
|
||||
|
@ -128,7 +128,7 @@ begin
|
||||
lReport.Add('');
|
||||
|
||||
// Stores values of the variable
|
||||
values := CollectValues(OS3MainFrm.DataGrid, k);
|
||||
values := CollectVecValues(OS3MainFrm.DataGrid, k);
|
||||
nCases := Length(values);
|
||||
SetLength(valueString, nCases);
|
||||
|
||||
|
@ -119,10 +119,10 @@ begin
|
||||
end;
|
||||
|
||||
// Extract x and y values
|
||||
xValues := CollectValues(OS3MainFrm.DataGrid, xCol, selected);
|
||||
xValues := CollectVecValues(OS3MainFrm.DataGrid, xCol, selected);
|
||||
SetLength(yValues, nY);
|
||||
for i := 0 to nY-1 do
|
||||
yValues[i] := CollectValues(OS3MainFrm.DataGrid, selected[i], selected);
|
||||
yValues[i] := CollectVecValues(OS3MainFrm.DataGrid, selected[i], selected);
|
||||
|
||||
// Make sure that all y columns have the same length
|
||||
N := Length(yValues[0]);
|
||||
|
@ -42,26 +42,24 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideLeft.Control = ParamsPanel
|
||||
AnchorSideBottom.Control = ButtonBevel
|
||||
Left = 0
|
||||
Height = 147
|
||||
Top = 265
|
||||
Height = 131
|
||||
Top = 281
|
||||
Width = 266
|
||||
Anchors = [akLeft, akBottom]
|
||||
AutoSize = True
|
||||
Caption = 'Options'
|
||||
ClientHeight = 127
|
||||
ClientHeight = 111
|
||||
ClientWidth = 262
|
||||
TabOrder = 10
|
||||
object SaveChk: TCheckBox
|
||||
AnchorSideLeft.Control = OptionsGroup
|
||||
AnchorSideTop.Control = OptionsBevel
|
||||
AnchorSideLeft.Control = WeightChk
|
||||
AnchorSideTop.Control = OriginChk
|
||||
AnchorSideTop.Side = asrBottom
|
||||
Left = 12
|
||||
Left = 40
|
||||
Height = 19
|
||||
Top = 100
|
||||
Top = 42
|
||||
Width = 180
|
||||
BorderSpacing.Left = 12
|
||||
BorderSpacing.Top = 8
|
||||
BorderSpacing.Bottom = 8
|
||||
BorderSpacing.Left = 24
|
||||
Caption = 'Save estimated weights in grid'
|
||||
TabOrder = 0
|
||||
end
|
||||
@ -83,8 +81,9 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideTop.Side = asrBottom
|
||||
Left = 40
|
||||
Height = 19
|
||||
Top = 65
|
||||
Top = 84
|
||||
Width = 119
|
||||
BorderSpacing.Bottom = 8
|
||||
Caption = 'Through the origin'
|
||||
TabOrder = 2
|
||||
end
|
||||
@ -104,32 +103,17 @@ inherited WLSFrm: TWLSFrm
|
||||
end
|
||||
object UserWeightsChk: TRadioButton
|
||||
AnchorSideLeft.Control = WeightChk
|
||||
AnchorSideTop.Control = OriginChk
|
||||
AnchorSideTop.Control = SaveChk
|
||||
AnchorSideTop.Side = asrBottom
|
||||
Left = 16
|
||||
Height = 19
|
||||
Top = 46
|
||||
Top = 65
|
||||
Width = 156
|
||||
BorderSpacing.Top = 4
|
||||
Caption = 'Use weights from column'
|
||||
OnChange = UserWeightsChkChange
|
||||
TabOrder = 4
|
||||
end
|
||||
object OptionsBevel: TBevel
|
||||
AnchorSideLeft.Control = OptionsGroup
|
||||
AnchorSideTop.Control = Origin2Chk
|
||||
AnchorSideTop.Side = asrBottom
|
||||
AnchorSideRight.Control = OptionsGroup
|
||||
AnchorSideRight.Side = asrBottom
|
||||
Left = 12
|
||||
Height = 8
|
||||
Top = 84
|
||||
Width = 238
|
||||
Anchors = [akTop, akLeft, akRight]
|
||||
BorderSpacing.Left = 12
|
||||
BorderSpacing.Right = 12
|
||||
Shape = bsBottomLine
|
||||
end
|
||||
end
|
||||
object Label1: TLabel[6]
|
||||
AnchorSideLeft.Control = ParamsPanel
|
||||
@ -169,7 +153,7 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideBottom.Control = WeightVarEdit
|
||||
Left = 161
|
||||
Height = 15
|
||||
Top = 205
|
||||
Top = 221
|
||||
Width = 77
|
||||
Anchors = [akLeft, akBottom]
|
||||
BorderSpacing.Bottom = 2
|
||||
@ -184,7 +168,7 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideRight.Control = DepInBtn
|
||||
AnchorSideBottom.Control = OptionsGroup
|
||||
Left = 0
|
||||
Height = 240
|
||||
Height = 256
|
||||
Top = 17
|
||||
Width = 119
|
||||
Anchors = [akTop, akLeft, akRight, akBottom]
|
||||
@ -266,7 +250,7 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideBottom.Control = WeightOutBtn
|
||||
Left = 127
|
||||
Height = 26
|
||||
Top = 201
|
||||
Top = 217
|
||||
Width = 26
|
||||
Anchors = [akLeft, akBottom]
|
||||
BorderSpacing.Bottom = 4
|
||||
@ -283,7 +267,7 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideBottom.Side = asrBottom
|
||||
Left = 127
|
||||
Height = 26
|
||||
Top = 231
|
||||
Top = 247
|
||||
Width = 26
|
||||
Anchors = [akLeft, akBottom]
|
||||
Images = MainDataModule.ImageList
|
||||
@ -320,7 +304,7 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideRight.Side = asrBottom
|
||||
AnchorSideBottom.Control = WeightInBtn
|
||||
Left = 161
|
||||
Height = 63
|
||||
Height = 79
|
||||
Top = 122
|
||||
Width = 119
|
||||
Anchors = [akTop, akLeft, akRight, akBottom]
|
||||
@ -342,7 +326,7 @@ inherited WLSFrm: TWLSFrm
|
||||
AnchorSideBottom.Side = asrBottom
|
||||
Left = 161
|
||||
Height = 23
|
||||
Top = 222
|
||||
Top = 238
|
||||
Width = 119
|
||||
Anchors = [akLeft, akRight, akBottom]
|
||||
BorderSpacing.Left = 8
|
||||
@ -362,13 +346,13 @@ inherited WLSFrm: TWLSFrm
|
||||
Height = 453
|
||||
Top = 8
|
||||
Width = 715
|
||||
ActivePage = OLSPage
|
||||
ActivePage = ResidualsRegPage
|
||||
Align = alClient
|
||||
BorderSpacing.Left = 4
|
||||
BorderSpacing.Top = 8
|
||||
BorderSpacing.Right = 8
|
||||
BorderSpacing.Bottom = 8
|
||||
TabIndex = 0
|
||||
TabIndex = 1
|
||||
TabOrder = 2
|
||||
object OLSPage: TTabSheet
|
||||
Caption = 'OLS Regression'
|
||||
|
@ -15,7 +15,6 @@ type
|
||||
{ TWLSFrm }
|
||||
|
||||
TWLSFrm = class(TBasicStatsParamsForm)
|
||||
OptionsBevel: TBevel;
|
||||
DepInBtn: TBitBtn;
|
||||
DepOutBtn: TBitBtn;
|
||||
IndInBtn: TBitBtn;
|
||||
@ -58,34 +57,47 @@ type
|
||||
ResidualsRegReportFrame: TReportFrame;
|
||||
WLSReportFrame: TReportFrame;
|
||||
|
||||
procedure AddPredictedStuffToGrid(AIndepCols: IntDyneVec; ANumIndepCols: Integer;
|
||||
BWeights: DblDyneVec);
|
||||
procedure AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String);
|
||||
|
||||
procedure AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec);
|
||||
|
||||
procedure CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec;
|
||||
out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec);
|
||||
|
||||
procedure CreateOrGetChartFrame(AColIndex: Integer; AVarName: String;
|
||||
out AMemo: TMemo; out AChartFrame: TChartFrame);
|
||||
|
||||
function GetPageCaption(AVarName: String): String;
|
||||
|
||||
procedure PlotSquaredResiduals(AIndepCols: IntDyneVec;
|
||||
ANumIndepCols, ADepCol: Integer; AConfLevel: Double);
|
||||
procedure PlotSquaredResiduals(AIndepCols: IntDyneVec; ADepCol: Integer;
|
||||
const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec);
|
||||
|
||||
procedure PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec;
|
||||
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
|
||||
|
||||
procedure PredictIt(ColNoSelected: IntDyneVec; NoVars: integer;
|
||||
Means, StdDevs, BetaWeights: DblDyneVec;
|
||||
StdErrEst: double; NoIndepVars: integer);
|
||||
procedure Predict(const xData: DblDyneMat; const yData: DblDyneVec;
|
||||
ARegressionResults: TMultipleRegressionResults);
|
||||
|
||||
function PrepareData(out ADepCol, ANumIndepCols: Integer;
|
||||
out AIndepCols: IntDyneVec; out AWeightCol: Integer;
|
||||
out ARowLabels: StrDyneVec): Boolean;
|
||||
function PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
|
||||
out AWeightCol: Integer; out ARowLabels: StrDyneVec;
|
||||
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean;
|
||||
|
||||
function Process_OLSRegression(AIndepCols: IntDyneVec; ANumIndepCols, ADepCol: Integer;
|
||||
ARowLabels: StrDyneVec; ANumCases: Integer; PrintAll: Boolean): Boolean;
|
||||
function Process_OLSRegression(AIndepCols: IntDyneVec; ADepCol: Integer;
|
||||
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
|
||||
const yValues: DblDyneVec): Boolean;
|
||||
|
||||
function Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
|
||||
ANumIndepCols, ADepCol: Integer; ARowLabels: StrDyneVec;
|
||||
BWeights: DblDyneVec; ANumCases: Integer; PrintAll: Boolean): Boolean;
|
||||
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
|
||||
out AWeights: DblDyneVec): Boolean;
|
||||
|
||||
function Process_WeightedRegression(AIndepCols: IntDyneVec;
|
||||
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
|
||||
const yValues: DblDyneVec; const AWeights: DblDyneVec;
|
||||
SubtractMeans: Boolean): Boolean;
|
||||
|
||||
function RegressionAndReport(const ARowLabels: StrDyneVec;
|
||||
const xValues: DblDyneMat; const yValues: DblDyneVec;
|
||||
out ARegressionResults: TMultipleRegressionResults; AReport: TStrings): Boolean;
|
||||
|
||||
procedure WriteDescriptiveReport(AMemo: TMemo;
|
||||
const ARegressionResults: TBivariateRegressionResults;
|
||||
@ -116,6 +128,9 @@ uses
|
||||
TAChartUtils, TAChartAxisUtils, TALegend, TASources, TACustomSeries,
|
||||
Utils, MatrixUnit, GridProcs;
|
||||
|
||||
const
|
||||
CONF_LEVEL = DEFAULT_CONFIDENCE_LEVEL_PERCENT / 100.0;
|
||||
|
||||
{ TWLSFrm }
|
||||
|
||||
constructor TWLSFrm.Create(AOwner: TComponent);
|
||||
@ -152,50 +167,70 @@ begin
|
||||
WLSReportFrame.BorderSpacing.Bottom := 0;
|
||||
WLSReportFrame.BorderSpacing.Right := 0;
|
||||
InitToolbar(WLSReportFrame.ReportToolbar, tpRight);
|
||||
|
||||
PageControl.ActivePageIndex := 0;
|
||||
end;
|
||||
|
||||
|
||||
{ Get predicted squared residuals and save recipricols to grid as weights }
|
||||
procedure TWLSFrm.AddPredictedStuffToGrid(AIndepCols: IntDyneVec;
|
||||
ANumIndepCols: Integer; BWeights: DblDyneVec);
|
||||
{ Adds a new variable names AColTitle after the last grid column,
|
||||
and writes the specified data to the grid (in the specified number format). }
|
||||
procedure TWLSFrm.AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String);
|
||||
var
|
||||
col: Integer;
|
||||
i, j: Integer;
|
||||
X, predicted: Double;
|
||||
i, colIndex: Integer;
|
||||
begin
|
||||
col := NoVariables + 1;
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
DictionaryFrm.NewVar(col);
|
||||
DictionaryFrm.DictGrid.Cells[1, col] := 'PredResid2';
|
||||
OS3MainFrm.DataGrid.Cells[col, 0] := 'PredResid2';
|
||||
|
||||
col := NoVariables + 1;
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
DictionaryFrm.NewVar(col);
|
||||
DictionaryFrm.DictGrid.Cells[1, col] := 'WEIGHT';
|
||||
OS3MainFrm.DataGrid.Cells[col, 0] := 'WEIGHT';
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
|
||||
for i := 1 to NoCases do
|
||||
colIndex := GetVariableIndex(OS3MainFrm.DataGrid, AVarname);
|
||||
if colIndex = -1 then
|
||||
begin
|
||||
if (DataProcs.ValidValue(i, col-2)) then // do we have a valid squared OLS residual?
|
||||
begin
|
||||
predicted := 0.0;
|
||||
for j := 0 to ANumIndepCols - 1 do
|
||||
begin
|
||||
X := StrToFloat(OS3MainFrm.DataGrid.Cells[AIndepCols[j], i]);
|
||||
predicted := predicted + BWeights[j] * X;
|
||||
end;
|
||||
predicted := predicted + BWeights[ANumIndepCols];
|
||||
predicted := abs(predicted);
|
||||
OS3MainFrm.DataGrid.Cells[col-1, i] := Format('%.3f', [predicted]);
|
||||
if (predicted > 0.0) then
|
||||
predicted := 1.0 / sqrt(predicted)
|
||||
else
|
||||
predicted := 0.0;
|
||||
OS3MainFrm.DataGrid.Cells[col, i] := Format('%.3f', [predicted]);
|
||||
end;
|
||||
colIndex := NoVariables + 1;
|
||||
DictionaryFrm.NewVar(colIndex);
|
||||
DictionaryFrm.DictGrid.Cells[1, colIndex] := AVarName;
|
||||
DictionaryFrm.DictGrid.Cells[7, colIndex] := 'R';
|
||||
OS3MainFrm.DataGrid.Cells[colIndex, 0] := AVarName;
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
end;
|
||||
for i := 0 to High(AData) do
|
||||
OS3MainFrm.DataGrid.Cells[colIndex, i+1] := Format(ANumFormat, [AData[i]]);
|
||||
end;
|
||||
|
||||
|
||||
{ Calculate predicted squared residuals and save recipricols to grid as weights }
|
||||
procedure TWLSFrm.AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec);
|
||||
begin
|
||||
// Create new variables and add to grid
|
||||
AddVariable('Pred SqrResid', ASqrPredictedResiduals, '%.3f');
|
||||
AddVariable('WEIGHTS', AWeights, '%.3f');
|
||||
end;
|
||||
|
||||
|
||||
{ Calculate predicted values of the squared residuals, as well as the weights }
|
||||
procedure TWLSFrm.CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec;
|
||||
out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec);
|
||||
var
|
||||
i, j, n, m: Integer;
|
||||
sum: Double;
|
||||
begin
|
||||
ASquaredPredictedResiduals := nil;
|
||||
AWeights := nil;
|
||||
|
||||
MatSize(xValues, n,m);
|
||||
SetLength(ASquaredPredictedResiduals, n);
|
||||
SetLength(AWeights, n);
|
||||
|
||||
sum := 0;
|
||||
for i := 0 to n-1 do
|
||||
begin
|
||||
ASquaredPredictedResiduals[i] := ACoeffs[m]; // intercept value
|
||||
for j := 0 to m-1 do
|
||||
ASquaredPredictedResiduals[i] += abs(xValues[i, j] * ACoeffs[j]);
|
||||
if ASquaredPredictedResiduals[i] <> 0 then
|
||||
AWeights[i] := 1 / ASquaredPredictedResiduals[i]
|
||||
else
|
||||
AWeights[i] := 0;
|
||||
sum := sum + AWeights[i];
|
||||
end;
|
||||
|
||||
// Normalize weights to 1.0
|
||||
AWeights := AWeights * (1.0 / sum);
|
||||
end;
|
||||
|
||||
|
||||
@ -212,9 +247,7 @@ end;
|
||||
|
||||
procedure TWLSFrm.Compute;
|
||||
var
|
||||
i, j, noIndep, depCol, weightCol, oldDepCol, NCases, pos, col: integer;
|
||||
IndepCols: IntDyneVec = nil;
|
||||
RowLabels: StrDyneVec = nil;
|
||||
i, j, noIndep, NCases, pos, col: integer;
|
||||
X, weight: double;
|
||||
Means: DblDyneVec = nil;
|
||||
Variances: DblDyneVec = nil;
|
||||
@ -229,6 +262,15 @@ var
|
||||
R2: Double = 0.0;
|
||||
errorcode: Boolean = false;
|
||||
PrintDesc: boolean = true;
|
||||
|
||||
indepCols: IntDyneVec = nil;
|
||||
rowLabels: StrDyneVec = nil;
|
||||
weights: DblDyneVec = nil;
|
||||
xValues: DblDyneMat = nil;
|
||||
yValues: DblDyneVec = nil;
|
||||
depCol: Integer;
|
||||
weightCol: Integer = -1;
|
||||
useOrigin: Boolean;
|
||||
begin
|
||||
SetLength(Means, NoVariables + 2);
|
||||
SetLength(Variances, NoVariables + 2);
|
||||
@ -244,31 +286,38 @@ begin
|
||||
NCases := NoCases;
|
||||
|
||||
// Get column indexes and do some validation checks.
|
||||
// NOTE that the Length(indepCols) is different from NoIndep.
|
||||
if not PrepareData(depCol, noIndep, indepCols, weightCol, RowLabels) then
|
||||
if not PrepareData(indepCols, depCol, weightCol, RowLabels, xValues, yValues) then
|
||||
exit;
|
||||
|
||||
// Save dependent column so we can re-use DepCol
|
||||
oldDepCol := depCol;
|
||||
// Do the OLS regression
|
||||
if not Process_OLSRegression(indepCols, depCol, RowLabels, xValues, yValues) then
|
||||
exit;
|
||||
|
||||
// *** Get OLS regression ***
|
||||
Process_OLSRegression(indepCols, noIndep, depCol, RowLabels, nCases, printDesc);
|
||||
// Regress the squared residuals on the predictors
|
||||
if WeightChk.Checked then
|
||||
begin
|
||||
if not Process_SquaredResidualsRegression(indepCols, RowLabels, xValues, weights) then
|
||||
exit;
|
||||
useOrigin := OriginChk.Checked;
|
||||
end else
|
||||
begin
|
||||
// Read the weights from the user column
|
||||
weights := CollectVecValues(OS3MainFrm.DataGrid, weightCol, indepCols);
|
||||
useOrigin := Origin2Chk.Checked;
|
||||
end;
|
||||
|
||||
// Do the weighted regression, finally
|
||||
Process_WeightedRegression(indepCols, RowLabels, xValues, yValues, weights, useOrigin);
|
||||
|
||||
|
||||
exit;
|
||||
|
||||
// *** Regress the squared residuals on the predictors ***
|
||||
depCol := NoVariables;
|
||||
Process_SquaredResidualsRegression(indepCols, noIndep, depCol, RowLabels,
|
||||
BWeights, nCases, printDesc);
|
||||
if SaveChk.Checked then //WeightChk.Checked then
|
||||
AddPredictedStuffToGrid(indepCols, noIndep, BWeights);
|
||||
|
||||
// *** Display squared residuals for each independent variable ***
|
||||
// NOTE: depCol points to the squared residuals column here
|
||||
PlotSquaredResiduals(IndepCols, NoIndep, depCol, 0.95);
|
||||
|
||||
if WeightChk.Checked then
|
||||
begin
|
||||
// Weight variables and do OLS regression on weighted variables
|
||||
DepCol := olddepcol;
|
||||
// DepCol := olddepcol;
|
||||
IndepCols[Noindep] := DepCol;
|
||||
for i := 1 to NoCases do
|
||||
begin
|
||||
@ -326,7 +375,7 @@ begin
|
||||
if UserWeightsChk.Checked then
|
||||
begin
|
||||
// Weight variables and do OLS regression on weighted variables
|
||||
depCol := olddepcol;
|
||||
// depCol := olddepcol;
|
||||
indepCols[Noindep] := depCol; // wp: CALCULATION SHOULD NORMALIZE USER WEIGHTS HERE !!!
|
||||
for i := 1 to NoCases do
|
||||
begin
|
||||
@ -536,7 +585,45 @@ end;
|
||||
|
||||
|
||||
procedure TWLSFrm.PlotSquaredResiduals(AIndepCols: IntDyneVec;
|
||||
ANumIndepCols, ADepCol: Integer; AConfLevel: Double);
|
||||
ADepCol: Integer; const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec);
|
||||
var
|
||||
x, y: DblDyneVec;
|
||||
i, xCol, yCol: Integer;
|
||||
regressionRes: TBivariateRegressionResults;
|
||||
memo: TMemo;
|
||||
chartFrame: TChartFrame;
|
||||
xLabel, yLabel: String;
|
||||
numIndepCols: Integer;
|
||||
begin
|
||||
// We will plot the selected vector of the independent values vertically,
|
||||
// and the dependent values horizontally.
|
||||
|
||||
xCol := ADepCol;
|
||||
x := VecCopy(ADepValues);
|
||||
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
|
||||
numIndepCols := Length(AIndepCols);
|
||||
|
||||
for i := 0 to numIndepCols-1 do
|
||||
begin
|
||||
yCol := AIndepCols[i];
|
||||
yLabel := OS3MainFrm.DataGrid.Cells[yCol, 0];
|
||||
y := MatColVector(AIndepValues, yCol-1);
|
||||
SortOnX(x, y);
|
||||
|
||||
// Regression
|
||||
BivariateRegression(x, y, CONF_LEVEL, regressionRes);
|
||||
|
||||
// Create tab with chart and report controls
|
||||
CreateOrGetChartFrame(yCol-1, yLabel, memo, chartFrame); // -1 because yCol i is in grid units
|
||||
|
||||
// Plot
|
||||
PlotXY(chartFrame, x, y, regressionRes, xLabel, yLabel);
|
||||
|
||||
// Print the descriptive statistics
|
||||
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
|
||||
end;
|
||||
end;
|
||||
(*
|
||||
var
|
||||
xCol, yCol: Integer;
|
||||
xLabel, yLabel: String;
|
||||
@ -559,8 +646,8 @@ begin
|
||||
colNoSelected[1] := yCol;
|
||||
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
|
||||
yLabel := OS3MainFrm.DataGrid.Cells[yCol, 0];
|
||||
xPoints := CollectValues(OS3MainFrm.DataGrid, xCol, colNoSelected);
|
||||
yPoints := CollectValues(OS3MainFrm.DataGrid, yCol, colNoSelected);
|
||||
xPoints := CollectVecValues(OS3MainFrm.DataGrid, xCol, colNoSelected);
|
||||
yPoints := CollectVecValues(OS3MainFrm.DataGrid, yCol, colNoSelected);
|
||||
SortOnX(xPoints, yPoints);
|
||||
|
||||
// Regression
|
||||
@ -576,7 +663,7 @@ begin
|
||||
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
|
||||
end;
|
||||
end;
|
||||
|
||||
*)
|
||||
|
||||
procedure TWLSFrm.PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec;
|
||||
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
|
||||
@ -643,52 +730,42 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
{ Routine obtains predicted raw and standardized scores and their
|
||||
residuals. It is assumed that the dependent variable is last in the
|
||||
list of variable column pointers stored in the ColNoSelected vector.
|
||||
Get the z predicted score and its residual }
|
||||
procedure TWLSFrm.PredictIt(ColNoSelected: IntDyneVec; NoVars: integer;
|
||||
Means, StdDevs, BetaWeights: DblDyneVec; StdErrEst: double; NoIndepVars: integer);
|
||||
procedure TWLSFrm.Predict(const xData: DblDyneMat; const yData: DblDyneVec;
|
||||
ARegressionResults: TMultipleRegressionResults);
|
||||
var
|
||||
col1, col2, col3, col4, col5, i, j, k, Index: integer;
|
||||
predicted, zpredicted, z1, z2, resid, residsqr: double;
|
||||
colName: String;
|
||||
means, stddevs, variances: DblDyneVec;
|
||||
i, j, n, m: Integer;
|
||||
zPred: DblDyneVec = nil;
|
||||
// zResid: DblDyneVec = nil;
|
||||
rawPred: DblDyneVec = nil;
|
||||
rawResid: DblDyneVec = nil;
|
||||
sqrResid: DblDyneVec = nil;
|
||||
begin
|
||||
colName := 'Pred.z.';
|
||||
col1 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
|
||||
if col1 = -1 then
|
||||
begin
|
||||
col1 := NoVariables + 1;
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
DictionaryFrm.DictGrid.ColCount := 8;
|
||||
DictionaryFrm.NewVar(col1);
|
||||
OS3MainFrm.DataGrid.Cells[col1, 0] := colName;
|
||||
DictionaryFrm.DictGrid.Cells[1, col1] := colName;
|
||||
end;
|
||||
MatSize(xData, n, m);
|
||||
MatColMeanVarStdDev(xData, means, variances, stddevs);
|
||||
|
||||
colName := 'z Resid.';
|
||||
col2 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
|
||||
if col2 = -1 then
|
||||
begin
|
||||
col2 := NoVariables + 1;
|
||||
DictionaryFrm.NewVar(col2);
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
OS3MainFrm.DataGrid.Cells[col2, 0] := colName;
|
||||
DictionaryFrm.DictGrid.Cells[1, col2] := colName;
|
||||
end;
|
||||
SetLength(zPred, n);
|
||||
// SetLength(zResid, n);
|
||||
SetLength(rawPred, n);
|
||||
SetLength(rawResid, n);
|
||||
SetLength(sqrResid, n);
|
||||
|
||||
for i := 1 to NoCases do
|
||||
for i := 0 to n-1 do
|
||||
begin
|
||||
zpredicted := 0.0;
|
||||
for j := 0 to NoIndepVars - 1 do
|
||||
begin
|
||||
k := ColNoSelected[j];
|
||||
z1 := (StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]) - Means[j]) / StdDevs[j];
|
||||
zpredicted := zpredicted + (z1 * BetaWeights[j]);
|
||||
end;
|
||||
OS3MainFrm.DataGrid.Cells[col1, i] := Format('%.4f',[zpredicted]);
|
||||
zPred[i] := 0;
|
||||
for j := 0 to m-1 do
|
||||
zPred[i] := zPred[i] + (xData[i, j] - means[j]) / stdDevs[j] * ARegressionResults.Beta[j];
|
||||
|
||||
{
|
||||
zResid[i] := (yData[i] - ARegressionResults.MeanY) / ARegressionResults.StdDevY;
|
||||
|
||||
w: THIS IS NOT CORRECT. Remove above line because it is not needed.
|
||||
|
||||
This is the code used by the original routine
|
||||
if StdDevs[NoVars-1] <> 0.0 then
|
||||
begin
|
||||
Index := ColNoSelected[NoVars-1];
|
||||
@ -696,79 +773,50 @@ begin
|
||||
z2 := (z2 - Means[NoVars-1]) / StdDevs[NoVars-1]; // z score
|
||||
OS3MainFrm.DataGrid.Cells[col2, i] := Format('%.4f',[z2 - zpredicted]); // z residual
|
||||
end;
|
||||
}
|
||||
|
||||
rawPred[i] := ARegressionResults.Coeffs[m]; // intercept
|
||||
for j := 0 to m-1 do
|
||||
rawPred[i] := rawPred[i] + ARegressionResults.Coeffs[j] * xData[i, j];
|
||||
|
||||
rawResid[i] := rawPred[i] - yData[i];
|
||||
sqrResid[i] := sqr(rawResid[i]);
|
||||
end;
|
||||
|
||||
// Get raw predicted and residuals
|
||||
colName := 'Pred.Raw';
|
||||
col3 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
|
||||
if col3 = -1 then
|
||||
begin
|
||||
col3 := NoVariables + 1;
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
DictionaryFrm.NewVar(col3);
|
||||
DictionaryFrm.DictGrid.Cells[1, col3] := colName;
|
||||
OS3MainFrm.DataGrid.Cells[col3, 0] := colName;
|
||||
OS3MainFrm.DataGrid.ColCount := OS3MainFrm.DataGrid.ColCount + 1;
|
||||
end;
|
||||
|
||||
// calculate raw predicted scores and store in DataGrid at col
|
||||
for i := 1 to NoCases do
|
||||
begin // predicted raw obtained from previously predicted z score
|
||||
predicted := StrToFloat(OS3MainFrm.DataGrid.Cells[col1, i]) * StdDevs[NoVars-1] + Means[NoVars-1];
|
||||
OS3MainFrm.DataGrid.Cells[col3, i] := Format('%.3f',[predicted]);
|
||||
end;
|
||||
|
||||
// Calculate residuals of predicted raw scores end;
|
||||
colName := 'Raw Resid.';
|
||||
col4 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
|
||||
if col4 = -1 then
|
||||
begin
|
||||
col4 := NoVariables + 1;
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
DictionaryFrm.NewVar(col4);
|
||||
DictionaryFrm.DictGrid.Cells[1, col4] := colName;
|
||||
OS3MainFrm.DataGrid.Cells[col4, 0] := colName;
|
||||
OS3MainFrm.DataGrid.ColCount := OS3MainFrm.DataGrid.ColCount + 1;
|
||||
end;
|
||||
for i := 1 to NoCases do
|
||||
begin
|
||||
Index := ColNoSelected[NoVars-1];
|
||||
resid := StrToFloat(OS3MainFrm.DataGrid.Cells[col3, i]) - StrToFloat(OS3MainFrm.DataGrid.Cells[Index, i]);
|
||||
OS3MainFrm.DataGrid.Cells[col4, i] := Format('%.3f', [resid]);
|
||||
end;
|
||||
|
||||
// get square of raw residuals
|
||||
colName := 'Resid Sqr';
|
||||
col5 := GetVariableIndex(OS3MainFrm.DataGrid, colName);
|
||||
if col5 = -1 then
|
||||
begin
|
||||
col5 := NoVariables + 1;
|
||||
DictionaryFrm.NewVar(col5);
|
||||
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
|
||||
DictionaryFrm.DictGrid.Cells[1, col5] := colName;
|
||||
OS3MainFrm.DataGrid.Cells[col5, 0] := colName;
|
||||
OS3MainFrm.DataGrid.ColCount := OS3MainFrm.DataGrid.ColCount + 1;
|
||||
end;
|
||||
for i := 1 to NoCases do
|
||||
begin
|
||||
residsqr := StrToFloat(OS3MainFrm.DataGrid.Cells[col4,i]);
|
||||
residsqr := residsqr * residsqr;
|
||||
OS3MainFrm.DataGrid.Cells[col5,i] := Format('%.3f',[residsqr]);
|
||||
end;
|
||||
AddVariable('z Pred', zPred, '%.4f');
|
||||
// AddGridColumn('z Resid', zResid, '%.4f');
|
||||
AddVariable('Raw Pred', rawPred, '%.3f');
|
||||
AddVariable('Raw Resid', rawResid, '%.3f');
|
||||
AddVariable('Sqr Resid', sqrResid, '%.3f');
|
||||
end;
|
||||
|
||||
|
||||
function TWLSFrm.PrepareData(out ADepCol, ANumIndepCols: Integer;
|
||||
out AIndepCols: IntDyneVec; out AWeightCol: Integer;
|
||||
out ARowLabels: StrDyneVec): Boolean;
|
||||
{ Prepares the data for the analysis by extracting all needed data from the
|
||||
grid:
|
||||
- AIndepCols: integer array containing the grid column indexes of the
|
||||
independent variables to be used
|
||||
- ADepCol: grid column index of the dependent variable to be used
|
||||
- AWeightCol: optional grid column index of the weight data to be used
|
||||
- ARowLabels: string array containing the names of the independent variables
|
||||
as well of the dependent variable (last)
|
||||
- xValues: matrix with all independent values. The columns of the matrix
|
||||
correspond to the variables, the row correspond to the cases.
|
||||
- yValues: vector with the dependent variable values
|
||||
}
|
||||
function TWLSFrm.PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
|
||||
out AWeightCol: Integer; out ARowLabels: StrDyneVec;
|
||||
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean;
|
||||
var
|
||||
i: Integer;
|
||||
msg: String;
|
||||
C: TWinControl;
|
||||
numIndepCols: Integer;
|
||||
begin
|
||||
Result := false;
|
||||
AIndepCols := nil;
|
||||
ARowLabels := nil;
|
||||
xValues := nil;
|
||||
yvalues := nil;
|
||||
|
||||
if not Validate(msg, C) then
|
||||
begin
|
||||
@ -777,18 +825,14 @@ begin
|
||||
exit;
|
||||
end;
|
||||
|
||||
ANumIndepCols := IndVarList.Items.Count;
|
||||
numIndepCols := IndVarList.Items.Count;
|
||||
ADepCol := GetVariableIndex(OS3MainFrm.DataGrid, DepVarEdit.Text);
|
||||
AWeightCol := GetVariableIndex(OS3MainFrm.DataGrid, WeightVarEdit.Text);
|
||||
|
||||
// The IndepCols store also other variables, in addition to the "real"
|
||||
// independent variables. Until I know how this works, this array must be
|
||||
// over-dimensions.
|
||||
// ARowLabels alike.
|
||||
SetLength(AIndepCols, NoVariables + 2);
|
||||
SetLength(ARowLabels, NoVariables);
|
||||
SetLength(AIndepCols, numIndepCols);
|
||||
SetLength(ARowLabels, numIndepCols + 1); // +1 to add independent column label
|
||||
|
||||
for i := 0 to ANumIndepCols-1 do
|
||||
for i := 0 to numIndepCols-1 do
|
||||
begin
|
||||
AIndepCols[i] := GetVariableIndex(OS3MainFrm.DataGrid, IndVarList.Items[i]);
|
||||
if AIndepCols[i] = -1 then
|
||||
@ -798,9 +842,7 @@ begin
|
||||
end;
|
||||
ARowLabels[i] := IndVarList.Items[i];
|
||||
end;
|
||||
|
||||
// Append dependent column index to the independent columns vector.
|
||||
AIndepCols[ANumIndepCols] := ADepCol;
|
||||
ARowLabels[numIndepCols] := DepVarEdit.Text;
|
||||
|
||||
// Check variable types: all of them must be numeric (float or integer)
|
||||
if not IsNumericCol(ADepCol) then
|
||||
@ -809,7 +851,7 @@ begin
|
||||
exit;
|
||||
end;
|
||||
|
||||
for i := 0 to ANumIndepCols-1 do
|
||||
for i := 0 to numIndepCols-1 do
|
||||
if not IsNumericCol(AIndepCols[i]) then
|
||||
begin
|
||||
ErrorMsg('Incorrect data type of independent variable "%s"', [ARowLabels[i]]);
|
||||
@ -822,56 +864,45 @@ begin
|
||||
exit;
|
||||
end;
|
||||
|
||||
xValues := CollectMatValues(OS3MainFrm.DataGrid, AIndepCols);
|
||||
yValues := CollectVecValues(OS3MainFrm.DataGrid, ADepCol);
|
||||
|
||||
Result := true;
|
||||
end;
|
||||
|
||||
|
||||
{ Runs the ordinary least squares regression on the grid data }
|
||||
function TWLSFrm.Process_OLSRegression(AIndepCols: IntDyneVec; ANumIndepCols, ADepCol: Integer;
|
||||
ARowLabels: StrDyneVec; ANumCases: Integer; PrintAll: Boolean): Boolean;
|
||||
function TWLSFrm.Process_OLSRegression(AIndepCols: IntDyneVec;
|
||||
ADepCol: Integer; const ARowLabels: StrDyneVec;
|
||||
const xValues: DblDyneMat; const yValues: DblDyneVec): Boolean;
|
||||
var
|
||||
lReport: TStrings;
|
||||
means: DblDyneVec = nil;
|
||||
variances: DblDyneVec = nil;
|
||||
stdDevs: DblDyneVec = nil;
|
||||
BWeights: DblDyneVec = nil;
|
||||
BetaWeights: DblDyneVec = nil;
|
||||
BStdErrs: DblDyneVec = nil;
|
||||
BtTests: DblDyneVec = nil;
|
||||
tProbs: DblDyneVec = nil;
|
||||
R2, stdErrEst: Double;
|
||||
error: Boolean;
|
||||
regressionRes: TMultipleRegressionResults;
|
||||
i: Integer;
|
||||
numIndepCols: Integer;
|
||||
begin
|
||||
Result := false;
|
||||
numIndepCols := Length(AIndepCols);
|
||||
|
||||
lReport := TStringList.Create;
|
||||
try
|
||||
lReport.Add('ORDINARY LEAST SQUARES (OLS) REGRESSION RESULTS');
|
||||
lReport.Add('');
|
||||
lReport.Add('Dependent variable: ');
|
||||
lReport.Add(' ' + OS3MainFrm.DataGrid.Cells[ADepCol, 0]);
|
||||
lReport.Add('Independent variables:');
|
||||
for i := 0 to numIndepCols-1 do
|
||||
lReport.Add(' ' + ARowLabels[i]);
|
||||
lReport.Add('');
|
||||
|
||||
SetLength(means, NoVariables + 2);
|
||||
SetLength(variances, NoVariables + 2);
|
||||
SetLength(stdDevs, NoVariables + 2);
|
||||
SetLength(BWeights, NoVariables + 2);
|
||||
SetLength(BetaWeights, NoVariables + 2);
|
||||
SetLength(BStdErrs, NoVariables + 2);
|
||||
SetLength(Bttests, NoVariables + 2);
|
||||
SetLength(tprobs, NoVariables + 2);
|
||||
Result := RegressionAndReport(ARowLabels, xValues, yValues, regressionRes, lReport);
|
||||
|
||||
MReg(ANumIndepCols, AIndepCols, ADepCol, ARowLabels, Means, Variances, StdDevs,
|
||||
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
|
||||
ANumCases, error, PrintAll, lReport);
|
||||
if Result then
|
||||
begin
|
||||
Predict(xValues, yValues, regressionRes);
|
||||
OLSReportFrame.DisplayReport(lReport);
|
||||
end;
|
||||
|
||||
// if error then // wp: Why does MReg exit with error???
|
||||
// exit;
|
||||
|
||||
// Get predicted z score, residual z score, predicted raw score,
|
||||
// residual raw score and squared raw residual score. Place in the DataGrid
|
||||
PredictIt(AIndepCols, ANumIndepCols+1, means, stdDevs, BetaWeights, stdErrEst, ANumIndepCols);
|
||||
|
||||
OLSReportFrame.DisplayReport(lReport);
|
||||
|
||||
Result := true;
|
||||
finally
|
||||
lReport.Free;
|
||||
end;
|
||||
@ -879,44 +910,137 @@ end;
|
||||
|
||||
|
||||
function TWLSFrm.Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
|
||||
ANumIndepCols, ADepCol: Integer; ARowLabels: StrDyneVec; BWeights: DblDyneVec;
|
||||
ANumCases: Integer; PrintAll: Boolean): Boolean;
|
||||
const ARowLabels: StrDyneVec; const xValues: DblDyneMat; out AWeights: DblDyneVec): Boolean;
|
||||
var
|
||||
lReport: TStrings;
|
||||
means: DblDyneVec = nil;
|
||||
variances: DblDyneVec = nil;
|
||||
stdDevs: DblDyneVec = nil;
|
||||
BetaWeights: DblDyneVec = nil;
|
||||
BStdErrs: DblDyneVec = nil;
|
||||
BtTests: DblDyneVec = nil;
|
||||
tProbs: DblDyneVec = nil;
|
||||
R2, stdErrEst: Double;
|
||||
error: Boolean;
|
||||
sqrResiduals: DblDyneVec;
|
||||
predSqrResiduals: DblDyneVec;
|
||||
regressionRes: TMultipleRegressionResults;
|
||||
i, depCol, numIndepCols: Integer;
|
||||
begin
|
||||
AWeights := nil;
|
||||
ResidualsRegPage.TabVisible := WeightChk.Checked;
|
||||
|
||||
if not WeightChk.Checked then
|
||||
exit;
|
||||
|
||||
numIndepCols := Length(AIndepCols);
|
||||
|
||||
// The last grid column (added by Process_ODSRegression) contains the
|
||||
// squared residuals which will be fitted here.
|
||||
depCol := NoVariables;
|
||||
sqrResiduals := CollectVecValues(OS3MainFrm.DataGrid, depCol);
|
||||
|
||||
lReport := TStringList.Create;
|
||||
try
|
||||
lReport.Add('REGRESSION OF SQUARED RESIDUALS ON INDEPENDENT VARIABLES');
|
||||
lReport.Add('');
|
||||
lReport.Add('Dependent variable: ');
|
||||
lReport.Add(' ' + ARowLabels[numIndepCols]);
|
||||
lReport.Add('Independent variables:');
|
||||
for i := 0 to numIndepCols-1 do
|
||||
lReport.Add(' ' + ARowLabels[i]);
|
||||
lReport.Add('');
|
||||
|
||||
SetLength(means, NoVariables + 2);
|
||||
SetLength(variances, NoVariables + 2);
|
||||
SetLength(stdDevs, NoVariables + 2);
|
||||
SetLength(BetaWeights, NoVariables + 2);
|
||||
SetLength(BStdErrs, NoVariables + 2);
|
||||
SetLength(Bttests, NoVariables + 2);
|
||||
SetLength(tprobs, NoVariables + 2);
|
||||
Result := RegressionAndReport(ARowLabels, xValues, sqrResiduals, regressionRes, lReport);
|
||||
|
||||
MReg(ANumIndepCols, AIndepCols, ADepCol, ARowLabels, Means, Variances, StdDevs,
|
||||
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
|
||||
ANumCases, error, PrintAll, lReport);
|
||||
if Result then
|
||||
begin
|
||||
// Display the results
|
||||
ResidualsRegReportFrame.DisplayReport(lReport);
|
||||
|
||||
// Calculate weights and store them in the grid
|
||||
CalcWeights(xValues, regressionRes.Coeffs, predSqrResiduals, AWeights);
|
||||
|
||||
// Display squared residuals for each independent variable
|
||||
PlotSquaredResiduals(AIndepCols, depCol, xValues, sqrResiduals);
|
||||
|
||||
// Store weights to the grid
|
||||
if SaveChk.Checked then
|
||||
AddWeightsToGrid(predSqrResiduals, AWeights);
|
||||
end;
|
||||
|
||||
ResidualsRegReportFrame.DisplayReport(lReport);
|
||||
finally
|
||||
lReport.Free;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function TWLSFrm.Process_WeightedRegression(AIndepCols: IntDyneVec;
|
||||
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
|
||||
const yValues: DblDyneVec; const AWeights: DblDyneVec; SubtractMeans: Boolean): Boolean;
|
||||
var
|
||||
i, j, n, m: Integer;
|
||||
regressionRes: TMultipleRegressionResults;
|
||||
lReport: TStrings;
|
||||
means: DblDyneVec;
|
||||
begin
|
||||
MatSize(xValues, n, m);
|
||||
|
||||
for i :=0 to n-1 do
|
||||
for j := 0 to m-1 do
|
||||
xValues[i, j] := xValues[i, j] * AWeights[i];
|
||||
|
||||
if SubtractMeans then
|
||||
begin
|
||||
means := MatRowMeans(xValues);
|
||||
for i := 0 to n-1 do
|
||||
for j := 0 to m-1 do
|
||||
xValues[i, j] := xValues[i, j] - means[i];
|
||||
end;
|
||||
|
||||
lReport := TStringList.Create;
|
||||
try
|
||||
lReport.Add('WEIGHTED LEAST SQUARES (WLS) REGRESSION RESULTS');
|
||||
lReport.Add('');
|
||||
lReport.Add('Dependent variable: ');
|
||||
lReport.Add(' ' + ARowLabels[m]);
|
||||
lReport.Add('Independent variables:');
|
||||
for i := 0 to m-1 do
|
||||
lReport.Add(' ' + ARowLabels[i]);
|
||||
lReport.Add('');
|
||||
|
||||
Result := RegressionAndReport(ARowLabels, xValues, yValues, regressionRes, lReport);
|
||||
|
||||
if Result then
|
||||
WLSReportFrame.DisplayReport(lReport);
|
||||
|
||||
finally
|
||||
lReport.Free;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function TWLSFrm.RegressionAndReport(const ARowLabels: StrDyneVec;
|
||||
const xValues: DblDyneMat; const yValues: DblDyneVec;
|
||||
out ARegressionResults: TMultipleRegressionResults; AReport: TStrings): Boolean;
|
||||
var
|
||||
err: TRegressionError;
|
||||
begin
|
||||
err := MultipleRegression(xValues, yValues, CONF_LEVEL, ARegressionResults);
|
||||
case err of
|
||||
regOK: ;
|
||||
regTooFewValues: ErrorMsg('At least two values required for regression.');
|
||||
regStdDevZero: ErrorMsg('Standard deviation is zero.');
|
||||
end;
|
||||
Result := (err = regOK);
|
||||
|
||||
ARegressionResults.WriteCoeffsReport(AReport, ARowLabels);
|
||||
AReport.Add('');
|
||||
AReport.Add('');
|
||||
|
||||
ARegressionResults.WriteANOVAReport(AReport);
|
||||
AReport.Add('');
|
||||
AReport.Add('');
|
||||
|
||||
ARegressionResults.WriteVarCovarReport(AReport, ARowLabels);
|
||||
AReport.Add('');
|
||||
AReport.Add('');
|
||||
|
||||
ARegressionResults.WriteCorrelationReport(AReport, ARowLabels);
|
||||
end;
|
||||
|
||||
|
||||
procedure TWLSFrm.Reset;
|
||||
var
|
||||
i: integer;
|
||||
@ -1073,20 +1197,12 @@ var
|
||||
begin
|
||||
lReport := TStringList.Create;
|
||||
try
|
||||
{ not needed - requires too much space
|
||||
lReport.Add('Data file: %s', [OS3MainFrm.FileNameEdit.Text]);
|
||||
lReport.Add('');
|
||||
lReport.Add('Variables:');
|
||||
lReport.Add(' X: %s', [xLabel]);
|
||||
lReport.Add(' Y: %s', [yLabel]);
|
||||
lReport.Add('');
|
||||
}
|
||||
lReport.Add('Variable Mean Variance Std.Dev.');
|
||||
lReport.Add('---------- -------- -------- --------');
|
||||
lReport.Add(' Variable Mean Variance Std.Dev. ');
|
||||
lReport.Add('------------ ------------ ------------ ------------');
|
||||
with ARegressionResults do
|
||||
begin
|
||||
lReport.Add('%-10s %8.2f %8.2f %8.2f', [xLabel, XMean, XVariance, XStdDev]);
|
||||
lReport.Add('%-10s %8.2f %8.2f %8.2f', [yLabel, YMean, YVariance, YStdDev]);
|
||||
lReport.Add('%12s %12.2f %12.2f %12.2f', [xLabel, XMean, XVariance, XStdDev]);
|
||||
lReport.Add('%12s %12.2f %12.2f %12.2f', [yLabel, YMean, YVariance, YStdDev]);
|
||||
lReport.Add('');
|
||||
lReport.Add('Regression:');
|
||||
lReport.Add(' Correlation: %8.3f', [R]);
|
||||
|
@ -989,6 +989,9 @@ begin
|
||||
if not (Sender = DataGrid) then
|
||||
exit;
|
||||
|
||||
if aCol >= DictionaryFrm.DictGrid.RowCount then
|
||||
exit;
|
||||
|
||||
ts := DataGrid.Canvas.TextStyle;
|
||||
justif := DictionaryFrm.DictGrid.Cells[7, aCol];
|
||||
if justif = '' then justif := 'L';
|
||||
|
@ -8,9 +8,11 @@ uses
|
||||
Classes, SysUtils, Grids,
|
||||
Globals, DictionaryUnit;
|
||||
|
||||
function CollectValues(AGrid: TStringGrid; AColIndex: Integer;
|
||||
function CollectVecValues(AGrid: TStringGrid; AColIndex: Integer;
|
||||
AColCheck: IntDyneVec = nil): DblDyneVec;
|
||||
|
||||
function CollectMatValues(AGrid: TStringGrid; AColIndices: IntDyneVec): DblDyneMat;
|
||||
|
||||
procedure GetMinMax(AGrid: TStringGrid; AColIndex: Integer;
|
||||
const AColCheck: IntDyneVec; out AMin, AMax: Double);
|
||||
|
||||
@ -43,7 +45,7 @@ uses
|
||||
Non-numeric values in the considered cell will raise an exception.
|
||||
|
||||
NOTE: AColCheck must not be overdimensioned! }
|
||||
function CollectValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec;
|
||||
function CollectVecValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec;
|
||||
var
|
||||
row, n: Integer;
|
||||
val: Double;
|
||||
@ -70,6 +72,33 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
{ Extracts the grid values from the columns with indices given by AColIndices
|
||||
and puts them into the columns of the result matrix.
|
||||
This means: The result matrix contains the variables as columns and the
|
||||
cases as rows. }
|
||||
function CollectMatValues(AGrid: TStringGrid; AColIndices: IntDyneVec): DblDyneMat;
|
||||
var
|
||||
nr, r, c, i, j: Integer;
|
||||
val: Double;
|
||||
begin
|
||||
SetLength(Result, AGrid.RowCount, Length(AColIndices));
|
||||
nr := 0;
|
||||
for r:= 1 to AGrid.RowCount-1 do
|
||||
begin
|
||||
if not GoodRecord(AGrid, r, AColIndices) then Continue;
|
||||
i := r - 1;
|
||||
for j := 0 to High(AColIndices) do
|
||||
begin
|
||||
c := AColIndices[j];
|
||||
if TryStrToFloat(trim(AGrid.Cells[c, r]), val) then
|
||||
Result[i, j] := val;
|
||||
end;
|
||||
inc(nr); // count the number of rows in the matrix.
|
||||
end;
|
||||
SetLength(Result, nr);
|
||||
end;
|
||||
|
||||
|
||||
{ Determines the minimum and maximum of the values in the specified column of
|
||||
the grid. Rows with "invalid" data are ignored. If AColCheck contains other
|
||||
column indices these cells must be "valid", too. }
|
||||
@ -101,7 +130,10 @@ end;
|
||||
the grid. }
|
||||
function GetVariableIndex(AGrid: TStringGrid; const AVarName: String): Integer;
|
||||
begin
|
||||
Result := AGrid.Rows[0].IndexOf(AVarName);
|
||||
if AVarName <> '' then
|
||||
Result := AGrid.Rows[0].IndexOf(AVarName)
|
||||
else
|
||||
Result := -1;
|
||||
end;
|
||||
|
||||
|
||||
@ -151,9 +183,13 @@ var
|
||||
missingCode: String;
|
||||
value: String;
|
||||
begin
|
||||
missingCode := Trim(DictionaryFrm.DictGrid.Cells[6, ACol]);
|
||||
value := Trim(AGrid.Cells[ACol, ARow]);
|
||||
Result := (value = missingCode);
|
||||
if ACol < DictionaryFrm.DictGrid.RowCount then
|
||||
begin
|
||||
missingCode := Trim(DictionaryFrm.DictGrid.Cells[6, ACol]);
|
||||
value := Trim(AGrid.Cells[ACol, ARow]);
|
||||
Result := (value = missingCode);
|
||||
end else
|
||||
Result := false;
|
||||
end;
|
||||
|
||||
|
||||
@ -163,8 +199,12 @@ function IsNumericCol(AColIndex: Integer): Boolean;
|
||||
var
|
||||
typeCode: String;
|
||||
begin
|
||||
typeCode := Trim(DictionaryFrm.DictGrid.Cells[4, AColIndex]);
|
||||
Result := (typeCode = 'F') or (typeCode = 'I');
|
||||
if AColIndex < DictionaryFrm.DictGrid.RowCount then
|
||||
begin
|
||||
typeCode := Trim(DictionaryFrm.DictGrid.Cells[4, AColIndex]);
|
||||
Result := (typeCode = 'F') or (typeCode = 'I');
|
||||
end else
|
||||
Result := false;
|
||||
end;
|
||||
|
||||
|
||||
|
@ -655,7 +655,6 @@ var
|
||||
SSY, SSres, resvar, SSreg: double;
|
||||
title: string;
|
||||
deplabel: string;
|
||||
errcode: boolean = false;
|
||||
begin
|
||||
Assert(OS3MainFrm <> nil);
|
||||
|
||||
@ -668,7 +667,7 @@ begin
|
||||
SetLength(ColLabels, NCases);
|
||||
|
||||
// initialize
|
||||
errcode := false;
|
||||
ErrorCode := false;
|
||||
for i := 0 to NCases do
|
||||
begin
|
||||
for j := 0 to NoIndep do X[i, j] := 0;
|
||||
@ -721,7 +720,7 @@ begin
|
||||
|
||||
deplabel := OS3MainFrm.DataGrid.Cells[DepCol,0];
|
||||
RowLabels[NoIndep] := 'Intercept';
|
||||
VarY := SSY - (MeanY * MeanY / NCases);
|
||||
VarY := SSY - sqr(MeanY) / NCases;
|
||||
VarY := VarY / (NCases - 1);
|
||||
SDY := sqrt(VarY);
|
||||
|
||||
@ -870,13 +869,15 @@ begin
|
||||
//AReport.Add('Standard Error of Estimate = %8.2f', [stderrest]);
|
||||
end;
|
||||
|
||||
// Needed for calculation of VIF and TOL
|
||||
Correlations(NoIndep, IndepCols, XTX, Means, Variances, StdDevs, ErrorCode, NCases);
|
||||
SVDinverse(XTX, NoIndep);
|
||||
|
||||
AReport.Add(DIVIDER_SMALL);
|
||||
AReport.Add('');
|
||||
RowLabels[N-1] := 'Intercept';
|
||||
AReport.Add(' Variable Beta B Std.Err. t prob VIF TOL');
|
||||
AReport.Add('---------- ---------- ---------- ---------- ---------- ---------- --------- ----------');
|
||||
Correlations(NoIndep, IndepCols, XTX, Means, Variances, StdDevs, errcode, NCases);
|
||||
SVDinverse(XTX, NoIndep);
|
||||
for i := 0 to NoIndep do
|
||||
begin
|
||||
VIF := XTX[i,i];
|
||||
|
@ -18,6 +18,18 @@ type
|
||||
|
||||
// Vectors
|
||||
|
||||
operator + (A, B: TDblVector): TDblVector;
|
||||
operator - (A, B: TDblVector): TDblVector;
|
||||
operator * (A, B: TDblVector): Double;
|
||||
operator * (A: TDblVector; b: Double): TDblVector;
|
||||
operator * (a: Double; B: TDblVector): TDblVector;
|
||||
|
||||
procedure VecCheck(A, B: TDblVector; out n: Integer);
|
||||
function VecCopy(A: TDblVector): TDblVector;
|
||||
function VecMultiply(A, B: TDblVector): TDblVector;
|
||||
function VecOnes(n: Integer): TDblVector;
|
||||
procedure VecSize(A: TDblVector; out n: Integer);
|
||||
|
||||
procedure VecMaxMin(const AData: TDblVector;
|
||||
out AMax, AMin: Double);
|
||||
procedure VecMeanStdDev(const AData: TDblVector;
|
||||
@ -36,6 +48,7 @@ function VecMedian(const AData: TDblVector): Double;
|
||||
operator + (A, B: TDblMatrix): TDblMatrix;
|
||||
operator - (A, B: TDblMatrix): TDblMatrix;
|
||||
operator * (A, B: TDblMatrix): TDblMatrix;
|
||||
operator * (A: TDblMatrix; v: TDblVector): TDblVector;
|
||||
|
||||
{ NOTE: Indices follow math convention:
|
||||
- 1st index is the row index, i.e. runs vertically
|
||||
@ -44,6 +57,9 @@ operator * (A, B: TDblMatrix): TDblMatrix;
|
||||
function MatAppendColVector(A: TDblMatrix; v: TDblVector): TDblMatrix;
|
||||
procedure MatCheck(A: TDblMatrix);
|
||||
procedure MatCheckSquare(A: TDblMatrix; out n: Integer);
|
||||
procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector);
|
||||
function MatColMeans(A: TDblMatrix): TDblVector;
|
||||
function MatColVector(A: TDblMatrix; AColIndex: Integer): TDblVector;
|
||||
function MatCopy(A: TDblMatrix): TDblMatrix;
|
||||
function MatDeterminant(A: TDblMatrix): double;
|
||||
function MatEqualSize(A, B: TDblMatrix): Boolean;
|
||||
@ -53,9 +69,10 @@ function MatInverse(A: TDblMatrix): TDblMatrix;
|
||||
function MatIsSquare(A: TDblMatrix; out n: Integer): Boolean;
|
||||
function MatNumCols(A: TDblMatrix): Integer;
|
||||
function MatNumRows(A: TDblMatrix): Integer;
|
||||
function MatRowMeans(A: TDblMatrix): TDblVector;
|
||||
function MatRowVector(A: TDblMatrix; ARowIndex: Integer): TDblVector;
|
||||
procedure MatSize(A: TDblMatrix; out n, m: Integer);
|
||||
|
||||
function MatTranspose(A: TDblMatrix): TDblMatrix;
|
||||
function MatTransposed(A: TDblMatrix): TDblMatrix;
|
||||
|
||||
// Sorting
|
||||
procedure Exchange(var a, b: Double); overload;
|
||||
@ -88,6 +105,110 @@ uses
|
||||
Math;
|
||||
|
||||
|
||||
operator + (A, B: TDblVector): TDblVector;
|
||||
var
|
||||
i, n: Integer;
|
||||
begin
|
||||
VecCheck(A, B, n);
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do
|
||||
Result[i] := A[i] + B[i];
|
||||
end;
|
||||
|
||||
|
||||
operator - (A, B: TDblVector): TDblVector;
|
||||
var
|
||||
i, n: Integer;
|
||||
begin
|
||||
VecCheck(A, B, n);
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do
|
||||
Result[i] := A[i] - B[i];
|
||||
end;
|
||||
|
||||
|
||||
// Vector dot product
|
||||
operator * (A, B: TDblVector): Double;
|
||||
var
|
||||
i, n: Integer;
|
||||
begin
|
||||
VecCheck(A, B, n);
|
||||
Result := 0;
|
||||
for i := 0 to n-1 do
|
||||
Result := Result + A[i] * B[i];
|
||||
end;
|
||||
|
||||
|
||||
// Multiplication of a vector by a scalar
|
||||
operator * (A: TDblVector; b: Double): TDblVector;
|
||||
var
|
||||
i, n: Integer;
|
||||
begin
|
||||
n := Length(A);
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do
|
||||
Result[i] := A[i] * b;
|
||||
end;
|
||||
|
||||
operator * (a: Double; B: TDblVector): TDblVector;
|
||||
var
|
||||
i, n: Integer;
|
||||
begin
|
||||
n := Length(B);
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do
|
||||
Result[i] := a * B[i];
|
||||
end;
|
||||
|
||||
|
||||
procedure VecCheck(A, B: TDblVector; out n: Integer);
|
||||
var
|
||||
na, nb: Integer;
|
||||
begin
|
||||
na := Length(A);
|
||||
nb := Length(B);
|
||||
if na <> nb then
|
||||
raise EMatrix.Create('Dimension error.')
|
||||
else
|
||||
n := na;
|
||||
end;
|
||||
|
||||
|
||||
function VecCopy(A: TDblVector): TDblVector;
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
SetLength(Result, Length(A));
|
||||
for i := 0 to High(A) do Result[i] := A[i];
|
||||
end;
|
||||
|
||||
|
||||
function VecMultiply(A, B: TDblVector): TDblVector;
|
||||
var
|
||||
i, n: Integer;
|
||||
begin
|
||||
VecCheck(A, B, n);
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do
|
||||
Result[i] := A[i] * B[i];
|
||||
end;
|
||||
|
||||
|
||||
function VecOnes(n: Integer): TDblVector;
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do Result[i] := 1;
|
||||
end;
|
||||
|
||||
|
||||
procedure VecSize(A: TDblVector; out n: Integer);
|
||||
begin
|
||||
n := Length(A);
|
||||
end;
|
||||
|
||||
|
||||
{===============================================================================
|
||||
Statistical vector operations
|
||||
===============================================================================}
|
||||
@ -134,7 +255,7 @@ begin
|
||||
if n = 1 then
|
||||
exit;
|
||||
|
||||
AVariance := ((ss - sqr(AMean)) / n) / (n - 1);
|
||||
AVariance := (ss - sqr(AMean) * n) / (n - 1);
|
||||
AStdDev := sqrt(AVariance);
|
||||
end;
|
||||
|
||||
@ -229,6 +350,7 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
{ Product of two matrices }
|
||||
operator * (A, B: TDblMatrix): TDblMatrix;
|
||||
var
|
||||
na, ma, nb, mb, i, j, k: Integer;
|
||||
@ -251,6 +373,28 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
{ Product of an n x m matrix with an m vector }
|
||||
operator * (A: TDblMatrix; v: TDblVector): TDblVector;
|
||||
var
|
||||
na, ma, nv, i, j: Integer;
|
||||
sum: Double;
|
||||
begin
|
||||
MatSize(A, na, ma);
|
||||
VecSize(v, nv);
|
||||
if ma <> nv then
|
||||
raise EMatrix.Create('Dimension error.');
|
||||
|
||||
SetLength(Result, na);
|
||||
for i := 0 to na-1 do
|
||||
begin
|
||||
sum := 0;
|
||||
for j := 0 to ma-1 do
|
||||
sum := sum + A[i, j] * v[j];
|
||||
Result[i] := sum;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
{ Adds the elements of the vector v to the rows of the matrix A, i.e. the number
|
||||
of columns increases by 1 }
|
||||
function MatAppendColVector(A: TDblMatrix; v: TDblVector): TDblMatrix;
|
||||
@ -288,6 +432,59 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
procedure MatColMeanVarStdDev(A: TDblMatrix; out AMeans, AVariances, AStdDevs: TDblVector);
|
||||
var
|
||||
n, m, i, j: Integer;
|
||||
s, ss: Double;
|
||||
begin
|
||||
MatSize(A, n, m);
|
||||
SetLength(AMeans, m);
|
||||
SetLength(AVariances, m);
|
||||
SetLength(AStdDevs, m);
|
||||
for j := 0 to m-1 do
|
||||
begin
|
||||
s := 0;
|
||||
ss := 0;
|
||||
for i := 0 to n-1 do
|
||||
begin
|
||||
s := s + A[i, j];
|
||||
ss := ss + sqr(A[i, j]);
|
||||
end;
|
||||
AMeans[j] := s / n;
|
||||
AVariances[j] := (ss - sqr(AMeans[j]) * n) / (n - 1);
|
||||
AStdDevs[j] := sqrt(AVariances[j]);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function MatColMeans(A: TDblMatrix): TDblVector;
|
||||
var
|
||||
i, j, n, m: Integer;
|
||||
sum: Double;
|
||||
begin
|
||||
MatSize(A, n, m);
|
||||
SetLength(Result, m);
|
||||
for j := 0 to m-1 do
|
||||
begin
|
||||
sum := 0;
|
||||
for i := 0 to n-1 do
|
||||
sum := sum + A[i, j];
|
||||
Result[j] := sum / n;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function MatColVector(A: TDblMatrix; AColIndex: Integer): TDblVector;
|
||||
var
|
||||
i, n, m: Integer;
|
||||
begin
|
||||
MatSize(A, n,m);
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do
|
||||
Result[i] := A[i, AColIndex];
|
||||
end;
|
||||
|
||||
|
||||
function MatCopy(A: TDblMatrix): TDblMatrix;
|
||||
var
|
||||
n, m, i, j: Integer;
|
||||
@ -405,6 +602,34 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
function MatRowMeans(A: TDblMatrix): TDblVector;
|
||||
var
|
||||
i, j, n, m: Integer;
|
||||
sum: Double;
|
||||
begin
|
||||
MatSize(A, n,m);
|
||||
SetLength(Result, n);
|
||||
for i := 0 to n-1 do
|
||||
begin
|
||||
sum := 0;
|
||||
for j := 0 to m-1 do
|
||||
sum := sum + A[i, j];
|
||||
Result[i] := sum / m;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function MatRowVector(A: TDblMatrix; ARowIndex: Integer): TDblVector;
|
||||
var
|
||||
j, n, m: Integer;
|
||||
begin
|
||||
MatSize(A, n,m);
|
||||
SetLength(Result, m);
|
||||
for j := 0 to m-1 do
|
||||
Result[j] := A[ARowIndex, j];
|
||||
end;
|
||||
|
||||
|
||||
procedure MatSize(A: TDblMatrix; out n, m: Integer);
|
||||
begin
|
||||
n := Length(A);
|
||||
@ -412,15 +637,15 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
function MatTranspose(A: TDblMatrix): TDblMatrix;
|
||||
function MatTransposed(A: TDblMatrix): TDblMatrix;
|
||||
var
|
||||
n, i, j: Integer;
|
||||
n, m, i, j: Integer;
|
||||
begin
|
||||
MatCheck(A);
|
||||
MatCheckSquare(A, n);
|
||||
SetLength(Result, n, n);
|
||||
MatSize(A, n, m);
|
||||
SetLength(Result, m, n);
|
||||
for i := 0 to n-1 do
|
||||
for j := 0 to n-1 do
|
||||
for j := 0 to m-1 do
|
||||
Result[j, i] := A[i, j];
|
||||
end;
|
||||
|
||||
|
@ -9,8 +9,15 @@ uses
|
||||
Classes, SysUtils,
|
||||
MatrixUnit;
|
||||
|
||||
|
||||
type
|
||||
ERegression = class(Exception);
|
||||
|
||||
type
|
||||
TRegressionError = (regOK, regTooFewValues, regStdDevZero);
|
||||
|
||||
TBivariateRegressionResults = record
|
||||
public
|
||||
Slope, Intercept: Double;
|
||||
XMean, YMean: Double;
|
||||
XVariance, YVariance: Double;
|
||||
@ -28,15 +35,53 @@ procedure BivariateRegression(const xData, yData: TDblVector;
|
||||
|
||||
type
|
||||
TMultipleRegressionResults = record
|
||||
private
|
||||
procedure WriteMatrixReport(AReport: TStrings; AMatrix: TDblMatrix;
|
||||
ATitle: String; AVarNames: TStringArray);
|
||||
public
|
||||
Coeffs: TDblVector; // Coefficients of the factors
|
||||
CoeffStdErrors: TDblVector; // Standard errors of the coefficients
|
||||
|
||||
Beta: TDblVector; // Standardized coefficients
|
||||
VarCovar: TDblMatrix; // Variance-covariance matrix
|
||||
Correlations: TDblMatrix; // Correlations matrix
|
||||
|
||||
SStotal, SSreg, SSresid: Double; // Sum of squares
|
||||
NumCases, NumDepVars: Integer; // number of cases, number of dependent variables
|
||||
R2: Double; // coefficient of determination
|
||||
AdjR2: Double; // Adjusted coefficient of determination
|
||||
StdErrorEstimate: Double; // standard error of estimate
|
||||
t: TDblVector;
|
||||
p: TDblVector; // Probability for > t
|
||||
F: Double;
|
||||
Probability: Double; // Probability for > F
|
||||
|
||||
Means: TDblVector;
|
||||
Variances: TDblVector;
|
||||
StdDevs: TDblVector;
|
||||
(*
|
||||
XMeans: TDblVector; // Means of the x columns
|
||||
XVariances: TDblVector; // Variances of the x columns
|
||||
XStdDevs: TDblVector; // Std Deviations of the x columns
|
||||
*)
|
||||
MeanY: Double; // Mean of y column
|
||||
VarianceY: Double; // Variance of y column
|
||||
StdDevY: Double; // Std Deviation of y column
|
||||
|
||||
procedure WriteANOVAReport(AReport: TStrings);
|
||||
procedure WriteCoeffsReport(AReport: TStrings; AVarNames: TStringArray);
|
||||
procedure WriteCorrelationReport(AReport: TStrings; AVarNames: TStringArray);
|
||||
procedure WriteVarCovarReport(AReport: TStrings; AVarNames: TStringArray);
|
||||
end;
|
||||
|
||||
procedure MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
|
||||
AConfLevel: Double; out AResults: TMultipleRegressionResults);
|
||||
function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
|
||||
AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError;
|
||||
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
StrUtils,
|
||||
MathUnit;
|
||||
|
||||
{===============================================================================
|
||||
@ -95,12 +140,241 @@ end;
|
||||
{===============================================================================
|
||||
Multiple regression
|
||||
===============================================================================}
|
||||
procedure MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
|
||||
AConfLevel: Double; out AResults: TMultipleRegressionResults);
|
||||
function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector;
|
||||
AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError;
|
||||
var
|
||||
i, j, nx, mx, ny: Integer;
|
||||
X, XT, XT_X, inv_XT_X: TDblMatrix;
|
||||
XT_Y: TDblVector;
|
||||
SY, SSY: Double;
|
||||
DFresid: Integer;
|
||||
begin
|
||||
MatCheck(xData);
|
||||
MatSize(xData, nx, mx);
|
||||
ny := Length(yData);
|
||||
if nx <> ny then
|
||||
raise ERegression.Create('Dimension error.');
|
||||
|
||||
if nx < 2 then begin
|
||||
Result := regTooFewValues;
|
||||
exit;
|
||||
end;
|
||||
|
||||
// Augmented x matrix: append ones vector for intercept
|
||||
X := MatAppendColVector(xData, VecOnes(ny));
|
||||
|
||||
// Transposed augmented matrix
|
||||
XT := MatTransposed(X);
|
||||
|
||||
// Product of augmented matrix with augmented Y
|
||||
XT_Y := XT * yData;
|
||||
|
||||
// Product of transposed augmented with augmented matrix
|
||||
XT_X := XT * X;
|
||||
|
||||
// Product of inverse of XT_X with XT_Y yields the coefficients. Intercept is last.
|
||||
inv_XT_X := MatInverse(XT_X);
|
||||
AResults.Coeffs := inv_XT_X * XT_Y;
|
||||
|
||||
// THe XT_X matrix contains the sum of squares (diagonal elements), sum of
|
||||
// cross-products (off-diagonal elements and the sum the variable values
|
||||
// (augmented column).
|
||||
SetLength(AResults.Means, mx+1);
|
||||
SetLength(AResults.Variances, mx+1);
|
||||
SetLength(AResults.StdDevs, mx+1);
|
||||
for i := 0 to mx do
|
||||
begin
|
||||
AResults.Means[i] := XT_X[i, mx] / nx;
|
||||
AResults.Variances[i] := (XT_X[i, i] - sqr(AResults.Means[i]) * nx) / (nx - 1);
|
||||
AResults.StdDevs[i] := sqrt(AResults.Variances[i]);
|
||||
{
|
||||
if AResults.StdDevs[i] = 0.0 then
|
||||
begin
|
||||
Result := regStdDevZero;
|
||||
exit;
|
||||
end;
|
||||
}
|
||||
end;
|
||||
|
||||
// Variance-covariance matrix
|
||||
SetLength(AResults.VarCovar, mx, mx);
|
||||
SetLength(AResults.Correlations, mx, mx);
|
||||
for i := 0 to mx-1 do
|
||||
for j := 0 to mx-1 do
|
||||
begin
|
||||
AResults.VarCovar[i, j] := (XT_X[i, j] - XT_X[i, mx] * XT_X[j, mx] / nx) / (nx - 1);
|
||||
AResults.Correlations[i, j] := AResults.VarCovar[i, j] / (AResults.StdDevs[i] * AResults.StdDevs[j]);
|
||||
end;
|
||||
|
||||
with AResults do
|
||||
begin
|
||||
NumCases := nx;
|
||||
NumDepVars := mx;
|
||||
DFresid := NumCases - NumDepVars - 1;
|
||||
|
||||
// Calculate column means of xData
|
||||
// MatColMeanVarStdDev(X, XMeans, XVariances, XStdDevs);
|
||||
|
||||
// Basic stats of y data column
|
||||
VecSumSS(yData, SY, SSY);
|
||||
MeanY := SY / NumCases;
|
||||
SStotal := SSY - sqr(MeanY) * NumCases; // needed for ANOVA
|
||||
VarianceY := SStotal / (NumCases - 1);
|
||||
StdDevY := sqrt(VarianceY);
|
||||
{
|
||||
if StdDevY = 0 then
|
||||
begin
|
||||
Result := regStdDevZero;
|
||||
exit;
|
||||
end;
|
||||
}
|
||||
|
||||
// Standardized coefficients
|
||||
Beta := VecMultiply(Coeffs, StdDevs) * (1.0/StdDevY);
|
||||
|
||||
// Get standard errors, squared multiple correlation, tests of significance
|
||||
SetLength(CoeffStdErrors, mx+1);
|
||||
SetLength(t, mx+1);
|
||||
SetLength(p, mx+1);
|
||||
|
||||
SSresid := SSY - Coeffs * XT_Y;
|
||||
SSreg := SStotal - SSresid;
|
||||
StdErrorEstimate := sqrt(SSresid / DFresid);
|
||||
R2 := SSreg / SStotal;
|
||||
AdjR2 := 1.0 - (1.0 - R2) * (NumCases - 1) / DFresid;
|
||||
F := (SSreg / NumDepVars) / (SSresid / DFresid);
|
||||
Probability := ProbF(F, NumDepVars, DFresid);
|
||||
|
||||
for i := 0 to mx do
|
||||
begin
|
||||
CoeffStdErrors[i] := StdErrorEstimate * sqrt(inv_XT_X[i, i]);
|
||||
t[i] := Coeffs[i] / CoeffStdErrors[i];
|
||||
p[i] := ProbT(t[i], DFresid);
|
||||
end;
|
||||
end;
|
||||
|
||||
Result := regOK;
|
||||
end;
|
||||
|
||||
|
||||
{ Writes the ANOVA of the regression to a "report". }
|
||||
procedure TMultipleRegressionResults.WriteANOVAReport(AReport: TStrings);
|
||||
var
|
||||
DFreg, DFresid, DFtotal: Integer;
|
||||
begin
|
||||
DFreg := NumDepVars;
|
||||
DFtotal := NumCases - 1;
|
||||
DFresid := DFtotal - DFreg;
|
||||
AReport.Add('ANOVA');
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
AReport.Add('SOURCE DF Sum of Squares Mean Square F Probability');
|
||||
AReport.Add('---------- --- ---------------- ---------------- ------------ -----------');
|
||||
AReport.Add('Regression %3d %16.3f %16.3f %12.4f %10.4f', [DFreg, SSreg, SSreg/DFreg, F, Probability]);
|
||||
AReport.Add('Residual %3d %16.3f %16.3f', [DFresid, SSresid, SSresid/DFresid]);
|
||||
AReport.Add('Total %3d %16.3f', [DFtotal, SStotal]);
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
AReport.Add('');
|
||||
AReport.Add('R2: %10.4f', [R2]);
|
||||
AReport.Add('Adjusted R2: %10.4f', [AdjR2]);
|
||||
AReport.Add('F: %10.2f (with %d and %d degrees of freedom)', [F, DFreg, DFresid]);
|
||||
AReport.Add('Probability > F: %10.4f', [Probability]);
|
||||
AReport.Add('Standard Error of Estimate: %10.2f', [StdErrorEstimate]);
|
||||
end;
|
||||
|
||||
|
||||
{ Writes the determined coefficients and their statistics to a "report".
|
||||
The names of the independent variables are needed in the array AVarNames. }
|
||||
procedure TMultipleRegressionResults.WriteCoeffsReport(AReport: TStrings;
|
||||
AVarNames: TStringArray);
|
||||
var
|
||||
varName: String;
|
||||
i, first, last: Integer;
|
||||
begin
|
||||
AReport.Add('COEFFICIENTS');
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
AReport.Add(' Variable Unstandardized Std.Error Standardized t p ');
|
||||
AReport.Add('------------ -------------- ------------ ------------ --------- ---------');
|
||||
|
||||
first := 0;
|
||||
last := High(Coeffs);
|
||||
for i := first to last do
|
||||
begin
|
||||
if i = last then
|
||||
varName := '(Intercept)'
|
||||
else
|
||||
if i <= High(AVarNames) then
|
||||
varName := AVarNames[i]
|
||||
else
|
||||
varName := Format('VAR.%d', [i+1]);
|
||||
|
||||
AReport.Add('%12s %14.3f %12.3f %12.3f %9.4f %9.4f',
|
||||
[varName, Coeffs[i], CoeffStdErrors[i], Beta[i], t[i], p[i]]);
|
||||
end;
|
||||
AReport.Add('------------------------------------------------------------------------------');
|
||||
end;
|
||||
|
||||
|
||||
{ Writes the correlation matrix to a "report".
|
||||
The names of the independent variables are needed in the array AVarNames. }
|
||||
procedure TMultipleRegressionResults.WriteCorrelationReport(AReport: TStrings;
|
||||
AVarNames: TStringArray);
|
||||
begin
|
||||
WriteMatrixReport(AReport, Correlations, 'CORRELATION MATRIX', AVarNames);
|
||||
end;
|
||||
|
||||
|
||||
procedure TMultipleRegressionResults.WriteMatrixReport(AReport: TStrings;
|
||||
AMatrix: TDblMatrix; ATitle: String; AVarNames: TStringArray);
|
||||
|
||||
function GetVarName(i: Integer): String;
|
||||
begin
|
||||
if i < High(AVarNames) then
|
||||
Result := AVarNames[i]
|
||||
else
|
||||
Result := Format('VAR.%d', [i+1]);
|
||||
end;
|
||||
|
||||
const
|
||||
SPACE = ' ';
|
||||
var
|
||||
i, j, first, last: Integer;
|
||||
s: String;
|
||||
begin
|
||||
first := 0;
|
||||
last := High(AMatrix);
|
||||
|
||||
AReport.Add(ATitle);
|
||||
|
||||
s := DupeString(' ', 12);
|
||||
for j := first to last do
|
||||
s := s + SPACE + Format('%12s', [GetVarName(j)]);
|
||||
AReport.Add(DupeString('-', Length(s)));
|
||||
AReport.Add(s);
|
||||
|
||||
s := DupeString(' ', 12);
|
||||
for j := first to last do
|
||||
s := s + SPACE + DupeString('-', 12);
|
||||
AReport.Add(s);
|
||||
|
||||
for i := first to last do
|
||||
begin
|
||||
s := Format('%12s', [GetVarName(i)]);
|
||||
for j := first to last do
|
||||
s := s + SPACE + Format('%12.3f', [AMatrix[i, j]]);
|
||||
AReport.Add(s);
|
||||
end;
|
||||
AReport.Add(DupeString('-', Length(s)));
|
||||
end;
|
||||
|
||||
|
||||
{ Writes the variance/covariance matrix to a "report".
|
||||
The names of the independent variables are needed in the array AVarNames. }
|
||||
procedure TMultipleRegressionResults.WriteVarCovarReport(AReport: TStrings;
|
||||
AVarNames: TStringArray);
|
||||
begin
|
||||
WriteMatrixReport(AReport, VarCovar, 'VARIANCE-COVARIANCE MATRIX', AVarNames);
|
||||
end;
|
||||
|
||||
|
||||
end.
|
||||
|
||||
|
Reference in New Issue
Block a user