From b93c6936bfb02304c15ed21fffef66a380cca529 Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Mon, 19 Oct 2020 08:24:46 +0000 Subject: [PATCH] LazStats: Massive refactoring of LSMRegUnit re-using general regression routines. git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7782 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- applications/lazstats/source/LazStats.lpi | 5 + .../analysis/multiple_regression/lsmrunit.lfm | 117 +-- .../analysis/multiple_regression/lsmrunit.pas | 696 +++++++++++------- .../analysis/multiple_regression/wlsunit.pas | 4 +- .../lazstats/source/units/dataprocs.pas | 14 +- .../lazstats/source/units/matrixlib.pas | 60 +- .../lazstats/source/units/matrixunit.pas | 21 + .../lazstats/source/units/regressionunit.pas | 190 +++-- 8 files changed, 737 insertions(+), 370 deletions(-) diff --git a/applications/lazstats/source/LazStats.lpi b/applications/lazstats/source/LazStats.lpi index af33e2c08..bc3275f9e 100644 --- a/applications/lazstats/source/LazStats.lpi +++ b/applications/lazstats/source/LazStats.lpi @@ -49,6 +49,11 @@ + + + + + diff --git a/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.lfm b/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.lfm index 90cc74978..bd909c545 100644 --- a/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.lfm +++ b/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.lfm @@ -50,9 +50,9 @@ inherited LSMregForm: TLSMregForm AnchorSideTop.Control = Label1 AnchorSideTop.Side = asrBottom AnchorSideRight.Control = AllBtn - AnchorSideBottom.Control = InProb + AnchorSideBottom.Control = InProbEdit Left = 0 - Height = 203 + Height = 182 Top = 17 Width = 173 Anchors = [akTop, akLeft, akRight, akBottom] @@ -69,7 +69,7 @@ inherited LSMregForm: TLSMregForm AnchorSideLeft.Control = AllBtn AnchorSideLeft.Side = asrCenter AnchorSideTop.Control = VarList - AnchorSideRight.Control = DepVar + AnchorSideRight.Control = DepVarEdit Left = 187 Height = 26 Top = 17 @@ -81,7 +81,7 @@ inherited LSMregForm: TLSMregForm TabOrder = 1 end object Label2: TLabel[8] - AnchorSideLeft.Control = DepVar + AnchorSideLeft.Control = DepVarEdit AnchorSideTop.Control = DepInBtn Left = 227 Height = 15 @@ -91,7 +91,7 @@ inherited LSMregForm: TLSMregForm Caption = 'Dependent Variable' ParentColor = False end - object DepVar: TEdit[9] + object DepVarEdit: TEdit[9] AnchorSideLeft.Control = IndepVars AnchorSideTop.Control = Label2 AnchorSideTop.Side = asrBottom @@ -106,7 +106,7 @@ inherited LSMregForm: TLSMregForm BorderSpacing.Bottom = 12 ReadOnly = True TabOrder = 3 - Text = 'DepVar' + Text = 'DepVarEdit' end object Label3: TLabel[10] AnchorSideLeft.Control = IndepVars @@ -122,11 +122,11 @@ inherited LSMregForm: TLSMregForm end object Label5: TLabel[11] AnchorSideLeft.Control = ParamsPanel - AnchorSideTop.Control = InProb + AnchorSideTop.Control = InProbEdit AnchorSideTop.Side = asrCenter Left = 0 Height = 15 - Top = 232 + Top = 211 Width = 192 BorderSpacing.Right = 8 Caption = 'Minimum probability to enter block:' @@ -138,8 +138,8 @@ inherited LSMregForm: TLSMregForm AnchorSideRight.Side = asrBottom AnchorSideBottom.Control = ButtonBevel Left = 0 - Height = 114 - Top = 259 + Height = 135 + Top = 238 Width = 404 Anchors = [akLeft, akBottom] AutoSize = True @@ -151,72 +151,80 @@ inherited LSMregForm: TLSMregForm ChildSizing.VerticalSpacing = 2 ChildSizing.Layout = cclLeftToRightThenTopToBottom ChildSizing.ControlsPerLine = 2 - ClientHeight = 94 + ClientHeight = 115 ClientWidth = 400 TabOrder = 9 - object CPChkBox: TCheckBox + object ANOVAChk: TCheckBox Left = 12 Height = 19 Top = 6 + Width = 198 + Caption = 'Show ANOVA' + TabOrder = 0 + end + object CrossProductsChk: TCheckBox + Left = 218 + Height = 19 + Top = 6 Width = 170 Caption = 'Show Cross-Products Matrix' - TabOrder = 0 - end - object CovChkBox: TCheckBox - Left = 190 - Height = 19 - Top = 6 - Width = 198 - Caption = 'Show Variance-Covariance Matrix' TabOrder = 1 end - object CorrsChkBox: TCheckBox + object CovChk: TCheckBox Left = 12 Height = 19 Top = 27 + Width = 198 + Caption = 'Show Variance-Covariance Matrix' + TabOrder = 2 + end + object CorrsChk: TCheckBox + Left = 218 + Height = 19 + Top = 27 Width = 170 Caption = 'Show Intercorrelation Matrix' - TabOrder = 2 - end - object MeansChkBox: TCheckBox - Left = 190 - Height = 19 - Top = 27 - Width = 198 - Caption = 'Show Means' TabOrder = 3 end - object VarChkBox: TCheckBox + object MeansChk: TCheckBox Left = 12 Height = 19 Top = 48 + Width = 198 + Caption = 'Show Means' + TabOrder = 4 + end + object VarChk: TCheckBox + Left = 218 + Height = 19 + Top = 48 Width = 170 Caption = 'Show Variances' - TabOrder = 4 - end - object StdDevChkBox: TCheckBox - Left = 190 - Height = 19 - Top = 48 - Width = 198 - Caption = 'Show Standard Deviations' TabOrder = 5 end - object MatSaveChkBox: TCheckBox + object StdDevChk: TCheckBox Left = 12 Height = 19 Top = 69 + Width = 198 + Caption = 'Show Standard Deviations' + TabOrder = 6 + end + object MatSaveChk: TCheckBox + Left = 218 + Height = 19 + Top = 69 Width = 170 Caption = 'Save Correlation Matrix' - TabOrder = 6 + TabOrder = 7 end - object PredictChkBox: TCheckBox - Left = 190 + object PredictChk: TCheckBox + Left = 12 Height = 19 - Top = 69 + Top = 90 Width = 198 Caption = 'Predictions,residuals,C.I.''s to Grid' - TabOrder = 7 + TabOrder = 8 end end object IndepVars: TListBox[13] @@ -229,7 +237,7 @@ inherited LSMregForm: TLSMregForm AnchorSideBottom.Control = VarList AnchorSideBottom.Side = asrBottom Left = 227 - Height = 98 + Height = 77 Top = 122 Width = 174 Anchors = [akTop, akLeft, akRight, akBottom] @@ -290,21 +298,21 @@ inherited LSMregForm: TLSMregForm Spacing = 0 TabOrder = 5 end - object InProb: TEdit[17] + object InProbEdit: TEdit[17] AnchorSideLeft.Control = Label5 AnchorSideLeft.Side = asrBottom AnchorSideRight.Side = asrBottom AnchorSideBottom.Control = OptionsGroup Left = 200 Height = 23 - Top = 228 + Top = 207 Width = 50 Alignment = taRightJustify Anchors = [akLeft, akBottom] BorderSpacing.Top = 8 BorderSpacing.Right = 8 TabOrder = 8 - Text = 'InProb' + Text = 'InProbEdit' end object AllBtn: TBitBtn[18] AnchorSideLeft.Control = ParamsPanel @@ -331,19 +339,22 @@ inherited LSMregForm: TLSMregForm Height = 414 Top = 8 Width = 580 - ActivePage = MeanVarStddevPage + ActivePage = RegressionPage Align = alClient BorderSpacing.Left = 4 BorderSpacing.Top = 8 BorderSpacing.Right = 8 BorderSpacing.Bottom = 8 - TabIndex = 4 + TabIndex = 0 TabOrder = 2 object RegressionPage: TTabSheet Caption = 'Regression' end + object ANOVAPage: TTabSheet + Caption = 'ANOVA' + end object CrossProductsPage: TTabSheet - Caption = 'Cross-Products Matrix' + Caption = 'Cross-Products' end object VarCovarPage: TTabSheet Caption = 'Variance-Covariance' @@ -355,8 +366,4 @@ inherited LSMregForm: TLSMregForm Caption = 'Mean, Variance, StdDev' end end - object SaveDialog: TSaveDialog[3] - Left = 45 - Top = 357 - end end diff --git a/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.pas b/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.pas index d97c900be..1d4b6075e 100644 --- a/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.pas +++ b/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.pas @@ -7,7 +7,7 @@ interface uses Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls, Buttons, ExtCtrls, ComCtrls, Globals, MainUnit, MatrixLib, - DataProcs, DictionaryUnit, BasicStatsParamsFormUnit, ReportFrameUnit; + DataProcs, BasicStatsParamsFormUnit, ReportFrameUnit, RegressionUnit; type @@ -15,33 +15,34 @@ type TLSMregForm = class(TBasicStatsParamsForm) AllBtn: TBitBtn; + ANOVAChk: TCheckBox; IndepVars: TListBox; - CorrsChkBox: TCheckBox; - CovChkBox: TCheckBox; - CPChkBox: TCheckBox; + CorrsChk: TCheckBox; + CovChk: TCheckBox; + CrossProductsChk: TCheckBox; DepInBtn: TBitBtn; DepOutBtn: TBitBtn; - DepVar: TEdit; + DepVarEdit: TEdit; OptionsGroup: TGroupBox; InBtn: TBitBtn; - InProb: TEdit; + InProbEdit: TEdit; Label1: TLabel; Label2: TLabel; Label3: TLabel; Label5: TLabel; - MatSaveChkBox: TCheckBox; - MeansChkBox: TCheckBox; + MatSaveChk: TCheckBox; + MeansChk: TCheckBox; PageControl: TPageControl; - SaveDialog: TSaveDialog; OutBtn: TBitBtn; - PredictChkBox: TCheckBox; - StdDevChkBox: TCheckBox; + PredictChk: TCheckBox; + StdDevChk: TCheckBox; RegressionPage: TTabSheet; CrossProductsPage: TTabSheet; CorrelationsPage: TTabSheet; MeanVarStddevPage: TTabSheet; + ANOVAPage: TTabSheet; VarCovarPage: TTabSheet; - VarChkBox: TCheckBox; + VarChk: TCheckBox; VarList: TListBox; procedure AllBtnClick(Sender: TObject); procedure DepInBtnClick(Sender: TObject); @@ -51,9 +52,11 @@ type procedure OutBtnClick(Sender: TObject); procedure VarListDblClick(Sender: TObject); procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean); + private { private declarations } FRegressionFrame: TReportFrame; + FAnovaFrame: TReportFrame; FCrossProductsFrame: TReportFrame; FCorrelationsFrame: TReportFrame; FVarCovarFrame: TReportFrame; @@ -61,11 +64,31 @@ type IndepVarsCols: IntDyneVec; NoVars: integer; NoBlocks: integer; + procedure HideTabs; + + procedure PredictionToGrid(xData: DblDyneMat; yData: DblDyneVec; + const ARegressionResults: TMultipleRegressionResults; ABadRows: IntDyneVec); + + function PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer; + out ARowLabels: StrDyneVec; out xValues: DblDyneMat; + out yValues: DblDyneVec; out ABadRows: IntDyneVec): Boolean; + + procedure Process_Regression( + const ARowLabels: StrDyneVec; const xValues: DblDyneMat; + const yValues: DblDyneVec; const ABadRows: IntDyneVec); + + procedure WriteMeanVarStddevReport(AReport: TStrings; AVarNames: StrDyneVec; + const AMeans, AVars, AStdDevs: DblDyneVec; Flags: Integer); + + procedure WriteReportHeader(AReport: TStrings; AVarNames: StrDyneVec); + protected procedure AdjustConstraints; override; procedure Compute; override; procedure UpdateBtnStates; override; + function Validate(out AMsg: String; out AControl: TWinControl): Boolean; override; + public constructor Create(AOwner: TComponent); override; procedure Reset; override; @@ -80,8 +103,8 @@ implementation {$R *.lfm} uses - Math, - Utils, MathUnit; + Math, StrUtils, + Utils, GridProcs, MathUnit, MatrixUnit; { TLSMregForm } @@ -100,6 +123,16 @@ begin FRegressionFrame.BorderSpacing.Right := 0; InitToolbar(FRegressionFrame.ReportToolbar, tpRight); + FAnovaFrame := TReportFrame.Create(self); + FAnovaFrame.Name := ''; + FAnovaFrame.Parent := AnovaPage; + FAnovaFrame.Align := alClient; + FAnovaFrame.BorderSpacing.Left := 0; + FAnovaFrame.BorderSpacing.Top := 0; + FAnovaFrame.BorderSpacing.Bottom := 0; + FAnovaFrame.BorderSpacing.Right := 0; + InitToolbar(FAnovaFrame.ReportToolbar, tpRight); + FCrossProductsFrame := TReportFrame.Create(self); FCrossProductsFrame.Name := ''; FCrossProductsFrame.Parent := CrossProductsPage; @@ -151,7 +184,7 @@ begin 4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left ); ParamsPanel.Constraints.MinHeight := AllBtn.Top + AllBtn.Height + - VarList.BorderSpacing.Bottom + InProb.Height + + VarList.BorderSpacing.Bottom + InProbEdit.Height + OptionsGroup.BorderSpacing.Top + OptionsGroup.Height + ButtonBevel.Height + CloseBtn.BorderSpacing.Top + CloseBtn.Height; end; @@ -168,234 +201,17 @@ begin end; -procedure TLSMregForm.Compute; +procedure TLSMRegForm.Compute; var - i, j, NCases: integer; - NoIndepVars, DepVarCol, NEntered: integer; - R2, df1, df2: double; - StdErrEst, F, FProbF, OldR2: double; - pdf1, probin, prout: double; - errorcode: boolean; - BetaWeights: DblDyneVec = nil; - BWeights: DblDyneVec = nil; - BStdErrs: DblDyneVec = nil; - Bttests: DblDyneVec = nil; - tProbs: DblDyneVec = nil; - cellstring: string; - corrs: DblDyneMat = nil; - Means: DblDyneVec = nil; - Variances: DblDyneVec = nil; - StdDevs: DblDyneVec = nil; - title: string; - IndRowLabels: StrDyneVec = nil; - IndColLabels: StrDyneVec = nil; - IndepInverse: DblDyneMat = nil; - ColEntered: IntDyneVec = nil; - filename: string; - constant: double; - errcode: boolean = false; - anerror: Integer = 0; - lReport: TStrings; - - procedure AddHeaderToReport; - begin - lReport.Clear; - lReport.Add('LEAST SQUARES MULTIPLE REGRESSION by Bill Miller'); - lReport.Add(''); - end; - + indepCols: IntDyneVec = nil; + RowLabels: StrDyneVec = nil; + xValues: DblDyneMat = nil; + yValues: DblDyneVec = nil; + badRows: IntDyneVec = nil; + depCol: Integer; begin - NCases := NoCases; - SetLength(corrs,NoVariables+1,NoVariables+1); - SetLength(IndepInverse,NoVariables,NoVariables+1); - SetLength(IndepVarsCols,NoVariables+1); - SetLength(BWeights,NoVariables+1); - SetLength(BStdErrs,NoVariables+1); - SetLength(Bttests,NoVariables+1); - SetLength(tProbs,NoVariables+1); - SetLength(Means,NoVariables+1); - SetLength(Variances,NoVariables+1); - SetLength(StdDevs,NoVariables+1); - SetLength(IndepVarsCols,NoVariables+1); - SetLength(IndColLabels,NoVariables+1); - SetLength(IndRowLabels,NoVariables+1); - SetLength(BetaWeights,NoVariables+1); - SetLength(ColEntered,NoVariables+2); - - probin := StrToFloat(InProb.Text); // probability to include a block - prout := 1.0; - - if DepVar.Text = '' then - begin - ErrorMsg('No dependent variable selected.'); - exit; - end; - if IndepVars.Items.Count = 0 then - begin - ErrorMsg('No independent variable selected.'); - exit; - end; - - lReport := TStringList.Create; - try - errorcode := false; - - { get dependendent variable column } - DepVarCol := 0; - NoVars := NoVars + 1; - for j := 1 to NoVariables do - if DepVar.Text = OS3MainFrm.DataGrid.Cells[j,0] then DepVarCol := j; - - R2 := 0.0; - OldR2 := 0.0; - pdf1 := 0.0; - NEntered := 0; - - { get independendent variable column } - for i := 0 to IndepVars.Count-1 do - begin - //cellstring := OS3Mainfrm.DataGrid.Cells[i+1,0]; // bug - cellstring := IndepVars.Items[i]; //Bugfix by tatamata - for j := 1 to NoVariables do - begin - if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then - begin - IndepVarsCols[i] := j; - ColEntered[i] := j; - NEntered := NEntered + 1; - IndRowLabels[NEntered-1] := cellstring; - IndColLabels[NEntered-1] := cellstring; - end; - end; - end; - NEntered := NEntered + 1; // dependent variable last - ColEntered[NEntered-1] := DepVarCol; - IndRowLabels[NEntered-1] := OS3MainFrm.DataGrid.Cells[DepVarCol,0]; - IndColLabels[NEntered-1] := OS3MainFrm.DataGrid.Cells[DepVarCol,0]; - - if CPChkBox.Checked then - begin - title := 'Cross-Products Matrix'; - AddHeaderToReport; - GridXProd(NEntered, ColEntered, Corrs, errcode, NCases); - MatPrint(Corrs, NEntered, NEntered, title, IndRowLabels, IndColLabels, NCases, lReport); - FCrossProductsFrame.DisplayReport(lReport); - CrossProductsPage.TabVisible := true; - end else - CrossProductsPage.TabVisible := false; - - if CovChkBox.Checked then - begin - title := 'Variance-Covariance Matrix'; - AddHeaderToReport; - GridCovar(NEntered,ColEntered, Corrs, Means, Variances, StdDevs, errcode, NCases); - MatPrint(Corrs, NEntered, NEntered, title, IndRowLabels, IndColLabels, NCases, lReport); - FVarCovarFrame.DisplayReport(lReport); - VarCovarPage.TabVisible := true; -// lReport.Add(DIVIDER_SMALL); - end; - - Correlations(NEntered,ColEntered,Corrs,Means,Variances, StdDevs,errcode,NCases); - if CorrsChkBox.Checked then - begin - title := 'Product-Moment Correlations Matrix'; - AddHeaderToReport; - MatPrint(Corrs, NEntered, NEntered, title, IndRowLabels, IndColLabels, NCases, lReport); - FCorrelationsFrame.DisplayReport(lReport); - CorrelationsPage.TabVisible := true; - end else - CorrelationsPage.TabVisible := false; - - if MeansChkBox.Checked or VarChkBox.Checked or StdDevChkBox.Checked then - begin - AddHeaderToReport; - - if MeansChkBox.Checked then - begin - title := 'MEANS'; - DynVectorPrint(Means, NEntered, title, IndColLabels, NCases, lReport); - lReport.Add(DIVIDER_SMALL); - lReport.Add(''); - end; - - if VarChkBox.Checked then - begin - title := 'VARIANCES'; - DynVectorPrint(Variances, NEntered, title, IndColLabels, NCases, lReport); - lReport.Add(DIVIDER_SMALL); - lReport.Add(''); - end; - - if StdDevChkBox.Checked then - begin - title := 'STANDARD DEVIATIONS'; - DynVectorPrint(StdDevs, NEntered, title, IndColLabels, NCases, lReport); - lReport.Add(DIVIDER_SMALL); - lReport.Add(''); - end; - - FMeanVarStddevFrame.DisplayReport(lReport); - MeanVarStddevPage.TabVisible := true; - end else - MeanVarStddevPage.TabVisible := false; - - if errorcode then - begin - ErrorMsg('A selected variable has no variability. Run aborted.'); - exit; - end; - NoIndepVars := NEntered - 1; - - AddHeaderToReport; - - MReg(NoIndepVars, ColEntered, DepVarCol, IndRowLabels, Means, Variances, - StdDevs, BWeights, BetaWeights, BStdErrs, Bttests, tProbs, R2, - StdErrEst, NCases, errorcode, true, lReport); - - df1 := NoIndepVars - pdf1; - df2 := NCases - NoIndepVars - 1; - F := ((R2 - OldR2) / (1.0 - R2)) * df2 / df1; - FProbF := ProbF(F,df1,df2); - lReport.Add(''); - if FProbF < probIn then - lReport.Add('Entry requirements met') - else - lReport.Add('Entry requirements not met'); - - lReport.Add(''); - lReport.Add(DIVIDER); - lReport.Add(''); - - { add [predicted scores, residual scores, etc. to grid if options elected } - if PredictChkBox.Checked then - begin - prout := 1.0; - Correlations(NEntered, ColEntered, Corrs, Means, Variances, StdDevs, errcode, NCases); - - MReg2(NCases, NEntered, NoIndepVars, ColEntered, corrs, IndepInverse, - IndRowLabels, R2, BetaWeights, Means, Variances, anerror, - StdErrEst, constant, prout, true, false, false, lReport); - - Predict(ColEntered, NEntered, IndepInverse, Means, StdDevs, - BetaWeights, StdErrEst, IndepVarsCols, NoIndepVars); - end; - - if MatSaveChkBox.Checked then - begin - SaveDialog.Filter := 'LazStats matrix files (*.mat)|*.mat;*.MAT|All files (*.*)|*.*'; - SaveDialog.FilterIndex := 1; - if SaveDialog.Execute then - begin - filename := SaveDialog.FileName; - MatSave(Corrs, NoVars, NoVars, Means, StdDevs, NCases, IndRowLabels, IndColLabels, filename); - end; - end; - - FRegressionFrame.DisplayReport(lReport); - - finally - lReport.Free; - end; + if PrepareData(indepCols, depCol, RowLabels, xValues, yValues, badRows) then + Process_Regression(RowLabels, xValues, yValues, badRows); end; @@ -404,9 +220,9 @@ var index: integer; begin index := VarList.ItemIndex; - if (index > -1) and (DepVar.Text = '') then + if (index > -1) and (DepVarEdit.Text = '') then begin - DepVar.Text := VarList.Items[index]; + DepVarEdit.Text := VarList.Items[index]; VarList.Items.Delete(index); end; UpdateBtnStates; @@ -415,10 +231,10 @@ end; procedure TLSMregForm.DepOutBtnClick(Sender: TObject); begin - if DepVar.Text <> '' then + if DepVarEdit.Text <> '' then begin - VarList.Items.Add(DepVar.Text); - DepVar.Text := ''; + VarList.Items.Add(DepVarEdit.Text); + DepVarEdit.Text := ''; end; UpdateBtnStates; end; @@ -483,6 +299,235 @@ begin UpdateBtnStates; end; + +procedure TLSMregForm.PredictionToGrid(xData: DblDyneMat; yData: DblDyneVec; + const ARegressionResults: TMultipleRegressionResults; ABadRows: IntDyneVec); +var + zPred, zResid, rawPred, rawResid, stdErrPred, hi95, lo95: DblDyneVec; +begin + PredictMR(xData, yData, ARegressionResults, + zPred, zResid, RawPred, RawResid, StdErrPred, Hi95, Lo95); + + AddVariable('Pred.z', zPred, '%8.4f', ABadRows); + AddVariable('z Resid', zResid, '%8.4f', ABadRows); + AddVariable('Raw Pred', rawPred, '%8.3f', ABadRows); + AddVariable('Raw Resid', rawResid, '%8.3f', ABadRows); + AddVariable('StdErr Pred', stdErrPred, '%8.3f', ABadRows); + AddVariable('Low 95%', lo95, '%8.3f', ABadRows); + AddVariable('Top 95%', hi95, '%8.3f', ABadRows); +end; + + +{ Prepares the data for the analysis by extracting all needed data from the + grid: + - AIndepCols: integer array containing the grid column indexes of the + independent variables to be used + - ADepCol: grid column index of the dependent variable to be used + - ARowLabels: string array containing the names of the independent variables + as well of the dependent variable (last) + - xValues: matrix with all independent values. The columns of the matrix + correspond to the variables, the row correspond to the cases. + - yValues: vector with the dependent variable values +} +function TLSMregForm.PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer; + out ARowLabels: StrDyneVec; out xValues: DblDyneMat; out yValues: DblDyneVec; + out ABadRows: IntDyneVec): Boolean; +var + i, n: Integer; + msg: String; + C: TWinControl; + numIndepCols: Integer; + cols: IntDyneVec = nil; +begin + Result := false; + AIndepCols := nil; + ARowLabels := nil; + xValues := nil; + yvalues := nil; + ABadRows := nil; + + if not Validate(msg, C) then + begin + C.SetFocus; + ErrorMsg(msg); + exit; + end; + + numIndepCols := IndepVars.Items.Count; + ADepCol := GetVariableIndex(OS3MainFrm.DataGrid, DepVarEdit.Text); + + SetLength(AIndepCols, numIndepCols); + SetLength(ARowLabels, numIndepCols + 1); // +1 to add independent column label + + for i := 0 to numIndepCols-1 do + begin + AIndepCols[i] := GetVariableIndex(OS3MainFrm.DataGrid, IndepVars.Items[i]); + if AIndepCols[i] = -1 then + begin + ErrorMsg('Dependent variable %s not found.', [IndepVars.Items[i]]); + exit; + end; + ARowLabels[i] := IndepVars.Items[i]; + end; + ARowLabels[numIndepCols] := DepVarEdit.Text; + + // Check variable types: all of them must be numeric (float or integer) + if not IsNumericCol(ADepCol) then + begin + ErrorMsg('Incorrect data type of dependent variable.'); + exit; + end; + + for i := 0 to numIndepCols-1 do + if not IsNumericCol(AIndepCols[i]) then + begin + ErrorMsg('Incorrect data type of independent variable "%s"', [ARowLabels[i]]); + exit; + end; + + // Prepare list of all column indices to be loaded: x, y + // ADepCol will follow immediately after the x columns. + SetLength(cols, NumIndepCols + 1); + cols[numIndepCols] := ADepCol; + for i := 0 to numIndepCols-1 do cols[i] := AIndepCols[i]; + + // Determine list of indices of rows containing invalid entries. + SetLength(ABadRows, OS3MainFrm.DataGrid.RowCount); + n := 0; + for i := 1 to OS3MainFrm.DataGrid.RowCount-1 do + if not GoodRecord(OS3MainFrm.DataGrid, i, cols) then + begin + ABadRows[n] := i; + inc(n); + end; + SetLength(ABadRows, n); + + // Extract data values; take care to skip invalid values in both x and y + xValues := CollectMatValues(OS3MainFrm.DataGrid, cols); + // The y column has index numIndepCols, i.e. follows after the x columns. + yValues := MatColVector(xValues, numIndepCols); + MatColDelete(xValues, numIndepCols); + + Result := true; +end; + + +{ Runs the least squares regression on the data in xValues and yValues } +procedure TLSMregForm.Process_Regression(const ARowLabels: StrDyneVec; + const xValues: DblDyneMat; const yValues: DblDyneVec; + const ABadRows: IntDyneVec); +var + lReport: TStrings; + regressionRes: TMultipleRegressionResults; + n: Integer; + confLevel: Double; + err: TRegressionError; + flags: Integer; + means: DblDyneVec = nil; + vars: DblDyneVec = nil; + stdDevs: DblDyneVec = nil; +begin + confLevel := 1.0 - StrToFloat(InProbEdit.Text); + + err := MultipleRegression(xValues, yValues, confLevel, regressionRes); + case err of + regOK : ; + regTooFewValues : ErrorMsg('At least two values required for regression.'); + end; + if err <> regOK then + exit; + + // Calculate means, variances, stddevs of all required variables and put them + // in vectors means, vars, stddevs, y data at end. + MatColMeanVarStdDev(xValues, means, vars, stdDevs); + n := Length(means); + SetLength(means, n+1); + SetLength(vars, n+1); + SetLength(stddevs, n+1); + VecMeanVarStdDev(yValues, means[n], vars[n], stddevs[n]); + + lReport := TStringList.Create; + try + WriteReportHeader(lReport, ARowLabels); + regressionRes.WriteCoeffsReport(lReport, ARowLabels); + FRegressionFrame.DisplayReport(lReport); + + if AnovaChk.Checked then + begin + lReport.Clear; + WriteReportHeader(lReport, ARowLabels); + regressionRes.WriteAnovaReport(lReport); + FAnovaFrame.DisplayReport(lReport); + end; + AnovaPage.TabVisible := AnovaChk.Checked; + + if CrossProductsChk.Checked then + begin + lReport.Clear; + WriteReportHeader(lReport, ARowLabels); + regressionRes.WriteCrossProductsReport(lReport, ARowLabels); + FCrossProductsFrame.DisplayReport(lReport); + end; + CrossProductsPage.TabVisible := CrossProductsChk.Checked; + + if CovChk.Checked then + begin + lReport.Clear; + WriteReportHeader(lReport, ARowLabels); + regressionRes.WriteVarCovarReport(lReport, ARowLabels); + FVarCovarFrame.DisplayReport(lReport); + end; + VarCovarPage.TabVisible := CovChk.Checked; + + if CorrsChk.Checked then + begin + lReport.Clear; + WriteReportHeader(lReport, ARowLabels); + regressionRes.WriteCorrelationReport(lReport, ARowLabels); + FCorrelationsFrame.DisplayReport(lReport); + end; + CorrelationsPage.TabVisible := CorrsChk.Checked; + + if MeansChk.Checked or VarChk.Checked or StdDevChk.Checked then + begin + lReport.Clear; + WriteReportHeader(lReport, ARowLabels); + flags := 0; + if MeansChk.Checked then inc(flags, 1); + if VarChk.Checked then inc(flags, 2); + if StdDevChk.Checked then inc(flags, 4); + WriteMeanVarStddevReport(lReport, ARowLabels, means, vars, stdDevs, flags); + FMeanVarStdDevFrame.displayReport(lReport); + end; + MeanVarStdDevPage.TabVisible := MeansChk.Checked or VarChk.Checked or StdDevChk.Checked; + + if PredictChk.Checked then + PredictionToGrid(xValues, yValues, regressionRes, ABadRows); + + if MatSaveChk.Checked then + begin + Application.ProcessMessages; + with TSaveDialog.Create(nil) do + try + Filter := 'LazStats matrix files (*.mat)|*.mat;*.MAT|All files (*.*)|*.*'; + FilterIndex := 1; + if Execute then + begin + n := Length(means); + MatSave(RegressionRes.Correlations, n-1, n-1, means, stdDevs, regressionRes.NumCases, ARowLabels, ARowLabels, Filename); + end; + finally + Free; + end; + MatSaveChk.Checked := false; + end; + + finally + lReport.Free; + end; +end; + + procedure TLSMregForm.Reset; var i: integer; @@ -496,18 +541,19 @@ begin for i := 1 to NoVariables do VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); - CPChkBox.Checked := false; - CovChkBox.Checked := false; - CorrsChkBox.Checked := true; - MeansChkBox.Checked := true; - VarChkBox.Checked := false; - StdDevChkBox.Checked := true; - MatSaveChkBox.Checked := false; - PredictChkBox.Checked := false; + AnovaChk.Checked := true; + CrossProductsChk.Checked := false; + CovChk.Checked := true; + CorrsChk.Checked := true; + MeansChk.Checked := true; + VarChk.Checked := false; + StdDevChk.Checked := true; + MatSaveChk.Checked := false; + PredictChk.Checked := false; NoVars := 0; - DepVar.Text := ''; - InProb.Text := FormatFloat('0.00', DEFAULT_ALPHA_LEVEL); + DepVarEdit.Text := ''; + InProbEdit.Text := FormatFloat('0.00', DEFAULT_ALPHA_LEVEL); SetLength(IndepVarsCols, NoVariables+1); HideTabs; @@ -523,6 +569,8 @@ begin if Assigned(FRegressionFrame) then FRegressionFrame.UpdateBtnStates; + if Assigned(FAnovaFrame) then + FAnovaFrame.UpdateBtnStates; if Assigned(FCrossProductsFrame) then FCrossProductsFrame.UpdateBtnStates; if Assigned(FCorrelationsFrame) then @@ -536,11 +584,48 @@ begin DepInBtn.Enabled := lSelected; InBtn.Enabled := lSelected; OutBtn.Enabled := AnySelected(IndepVars); - DepOutBtn.Enabled := DepVar.Text <> ''; + DepOutBtn.Enabled := DepVarEdit.Text <> ''; AllBtn.Enabled := VarList.Items.Count > 0; end; +function TLSMregForm.Validate(out AMsg: String; out AControl: TWinControl): Boolean; +var + x: double; +begin + Result := false; + + if DepVarEdit.Text = '' then + begin + AControl := DepVarEdit; + AMsg := 'No dependent variable selected.'; + exit; + end; + + if IndepVars.Items.Count = 0 then + begin + AControl := IndepVars; + AMsg := 'No independent variables selected.'; + exit; + end; + + if InProbEdit.Text = '' then + begin + AControl := InProbEdit; + AMsg := 'This field cannot be empty.'; + exit; + end; + if not TryStrToFloat(InProbEdit.Text, x) then + begin + AControl := InProbEdit; + AMsg := 'Non-numeric value.'; + exit; + end; + + Result := true; +end; + + procedure TLSMregForm.VarListDblClick(Sender: TObject); var index: Integer; @@ -548,8 +633,8 @@ begin index := VarList.ItemIndex; if index > -1 then begin - if DepVar.Text = '' then - DepVar.Text := VarList.Items[index] + if DepVarEdit.Text = '' then + DepVarEdit.Text := VarList.Items[index] else IndepVars.Items.Add(VarList.Items[index]); VarList.Items.Delete(index); @@ -564,5 +649,98 @@ begin end; +{ Flags = 1 ... mean; Flags = 2 ... variacne; Flags = 4 = stdDev } +procedure TLSMRegForm.WriteMeanVarStddevReport(AReport: TStrings; + AVarNames: StrDyneVec; const AMeans, AVars, AStdDevs: DblDyneVec; + Flags: Integer); +const + W = 15; + SPACE = ' '; + MASK = SPACE + '%*.3f'; +var + s, sL, sLL: String; + i, n: Integer; +begin + s := ''; + if Flags and 1 <> 0 then + s := s + 'MEANS, '; + if Flags and 2 <> 0 then + s := s + 'VARIANCES, '; + if Flags and 4 <> 0 then + s := s + 'STANDARD DEVIATIONS, '; + SetLength(s, Length(s)-2); // remove training ', ' + + //Caption + AReport.Add(s); + + n := 1; + s := CenterString('Variable', W); + sL := DupeString('-', W); + if Flags and 1 <> 0 then + begin + s := s + SPACE + CenterString('Mean', W); + sL := sL + SPACE + Dupestring('-', W); + inc(n); + end; + if Flags and 2 <> 0 then + begin + s := s + SPACE + CenterString('Variance', W); + sL := sL + SPACE + Dupestring('-', W); + inc(n); + end; + if Flags and 4 <> 0 then + begin + s := s + SPACE + CenterString('Std.Deviation', W); + sL := sL + SPACE + Dupestring('-', W); + inc(n); + end; + + // Divider below caption + sLL := DupeString('-', n*W + (n-1) * Length(SPACE)); + AReport.Add(sLL); + + // Table headers + AReport.Add(s); + // Table header separating line + AReport.Add(sL); + + // Table cells + n := Length(AMeans); + for i := 0 to n-1 do + begin + s := Format('%*s', [W, AVarNames[i]]); + if Flags and 1 <> 0 then + s := s + Format(MASK, [W, AMeans[i]]); + if Flags and 2 <> 0 then + s := s + Format(MASK, [W, AVars[i]]); + if Flags and 4 <> 0 then + s := s + Format(MASK, [W, AStdDevs[i]]); + AReport.Add(s); + if i = n-2 then + AReport.Add(sL); + end; + + // Final dividing line below table + AReport.Add(sLL); +end; + + +procedure TLSMRegForm.WriteReportHeader(AReport: TStrings; AVarNames: StrDyneVec); +var + i, n: Integer; +begin + n := Length(AVarNames); + AReport.Clear; + AReport.Add('LEAST SQUARES REGRESSION RESULTS'); + AReport.Add(''); + AReport.Add('Dependent variable: '); + AReport.Add(' ' + AVarNames[n-1]); + AReport.Add('Independent variables:'); + for i := 0 to n-2 do + AReport.Add(' ' + AVarNames[i]); + AReport.Add(''); +end; + + end. diff --git a/applications/lazstats/source/forms/analysis/multiple_regression/wlsunit.pas b/applications/lazstats/source/forms/analysis/multiple_regression/wlsunit.pas index d262a807f..16b5f37d3 100644 --- a/applications/lazstats/source/forms/analysis/multiple_regression/wlsunit.pas +++ b/applications/lazstats/source/forms/analysis/multiple_regression/wlsunit.pas @@ -728,9 +728,9 @@ begin end; SetLength(ABadRows, n); - // Extract data values; take care to omit invalid values in both x and y + // Extract data values; take care to skip invalid values in both x and y xValues := CollectMatValues(OS3MainFrm.DataGrid, cols); - // The y column has index numIndepCols, i.e. follows the x columns. + // The y column has index numIndepCols, i.e. follows after the x columns. yValues := MatColVector(xValues, numIndepCols); MatColDelete(xValues, numIndepCols); if AWeightCol > -1 then diff --git a/applications/lazstats/source/units/dataprocs.pas b/applications/lazstats/source/units/dataprocs.pas index ae55ffb7e..894579fc2 100644 --- a/applications/lazstats/source/units/dataprocs.pas +++ b/applications/lazstats/source/units/dataprocs.pas @@ -845,13 +845,13 @@ begin WriteLn(mat_file, NoCols); WriteLn(mat_file, NCases); - for i := 1 to NoRows do WriteLn(mat_file, RowLabels[i-1]); - for i := 1 to NoCols do WriteLn(mat_file, ColLabels[i-1]); - for i := 1 to NoCols do WriteLn(mat_file, Means[i-1]); - for i := 1 to NoCols do WriteLn(mat_file, StdDevs[i-1]); - for i := 1 to NoRows do - for j := 1 to NoCols do - WriteLn(mat_file, a[i-1, j-1]); + for i := 0 to NoRows-1 do WriteLn(mat_file, RowLabels[i]); + for i := 0 to NoCols-1 do WriteLn(mat_file, ColLabels[i]); + for i := 0 to NoCols-1 do WriteLn(mat_file, Means[i]); + for i := 0 to NoCols-1 do WriteLn(mat_file, StdDevs[i]); + for i := 0 to NoRows-1 do + for j := 0 to NoCols-1 do + WriteLn(mat_file, a[i, j]); CloseFile(mat_file); end; { matrix save routine } diff --git a/applications/lazstats/source/units/matrixlib.pas b/applications/lazstats/source/units/matrixlib.pas index deadbb5d5..3440694d5 100644 --- a/applications/lazstats/source/units/matrixlib.pas +++ b/applications/lazstats/source/units/matrixlib.pas @@ -6,7 +6,7 @@ interface uses Classes, SysUtils, Dialogs, - Globals, DictionaryUnit, FunctionsLib, DataProcs, MainUnit; + Globals, DictionaryUnit, FunctionsLib, GridProcs, DataProcs, MainUnit; //type // TRegItem = (riMeanVarStdDev, riXTX); @@ -142,6 +142,9 @@ procedure DynIntMatPrint(Mat: IntDyneMat; Rows, Cols: integer; YTitle: string; procedure SymMatRoots(A : DblDyneMat; M : integer; VAR E : DblDyneVec; VAR V : DblDyneMat); procedure matinv(a, vtimesw, v, w: DblDyneMat; n: integer); +procedure AddVariable(AVarName: String; AData: DblDyneVec; + ANumFormat: String; const ABadRows: IntDyneVec); + implementation @@ -1113,10 +1116,12 @@ begin DictionaryFrm.NewVar(col); DictionaryFrm.DictGrid.Cells[1,col] := 'Pred.z'; OS3MainFrm.DataGrid.Cells[col,0] := 'Pred.z'; + col := NoVariables + 1; DictionaryFrm.NewVar(col); DictionaryFrm.DictGrid.Cells[1,col] := 'z Resid.'; OS3MainFrm.DataGrid.Cells[col,0] := 'z Resid.'; + for i := 1 to NoCases do begin zpredicted := 0.0; @@ -1134,10 +1139,12 @@ begin z2 := (z2 - Means[NoVars-1]) / StdDevs[NoVars-1]; OS3MainFrm.DataGrid.Cells[col,i] := format('%8.4f',[(z2 - zpredicted)]); end; + col := NoVariables + 1; DictionaryFrm.NewVar(col); DictionaryFrm.DictGrid.Cells[1,col] := 'Pred.Raw'; OS3MainFrm.DataGrid.Cells[col,0] := 'Pred.Raw'; + { calculate raw predicted scores and store in grid at col } for i := 1 to NoCases do begin @@ -1145,11 +1152,13 @@ begin StdDevs[NoVars-1] + Means[NoVars-1]; OS3MainFrm.DataGrid.Cells[col,i] := format('%8.3f',[predicted]); end; + { Calculate residuals of predicted raw scores } col := NoVariables +1; DictionaryFrm.NewVar(col); DictionaryFrm.DictGrid.Cells[1,col] := 'Raw Resid.'; OS3MainFrm.DataGrid.Cells[col,0] := 'Raw Resid.'; + for i := 1 to NoCases do begin Index := ColNoSelected[NoVars-1]; @@ -1157,19 +1166,23 @@ begin StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[Index,i])); OS3MainFrm.DataGrid.Cells[col,i] := Format('%8.3f',[resid]); end; + { Calculate Confidence Interval for raw predicted score } col := NoVariables + 1; DictionaryFrm.NewVar(col); DictionaryFrm.DictGrid.Cells[1,col] := 'StdErrPred'; OS3MainFrm.DataGrid.Cells[col,0] := 'StdErrPred'; + col := NoVariables + 1; DictionaryFrm.NewVar(col); DictionaryFrm.DictGrid.Cells[1,col] := 'Low 95%'; OS3MainFrm.DataGrid.Cells[col,0] := 'Low 95%'; + col := NoVariables + 1; DictionaryFrm.NewVar(col); DictionaryFrm.DictGrid.Cells[1,col] := 'Top 95%'; OS3MainFrm.DataGrid.Cells[col,0] := 'Top 95%'; + for i := 1 to NoCases do begin { get term1 of the std. err. prediction } @@ -2348,5 +2361,50 @@ begin rv1 := nil; end; +{ Adds a new variable named AColTitle after the last grid column + and writes the specified data to it in the specified number format. + Rows mentioned in ABadRows must be omitted because they are not contained in + AData. } +procedure AddVariable(AVarName: String; AData: DblDyneVec; + ANumFormat: String; const ABadRows: IntDyneVec); + + function IsBadRow(ARow: Integer): Boolean; + var + j: Integer; + begin + for j := 0 to High(ABadRows) do + if ARow = ABadRows[j] then + begin + Result := true; + exit; + end; + Result := false; + end; + +var + i, j, colIndex, row: Integer; +begin + colIndex := GetVariableIndex(OS3MainFrm.DataGrid, AVarname); + if colIndex = -1 then + begin + colIndex := NoVariables + 1; + DictionaryFrm.NewVar(colIndex); + DictionaryFrm.DictGrid.Cells[1, colIndex] := AVarName; + DictionaryFrm.DictGrid.Cells[7, colIndex] := 'R'; + OS3MainFrm.DataGrid.Cells[colIndex, 0] := AVarName; + OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables); + end; + row := 1; + for i := 0 to High(AData) do + begin + while IsBadRow(row) do inc(row); + if row >= OS3MainFrm.DataGrid.RowCount then + raise Exception.Create('Bad row error.'); + OS3MainFrm.DataGrid.Cells[colIndex, row] := Format(ANumFormat, [AData[i]]); + inc(row); + end; +end; + + end. diff --git a/applications/lazstats/source/units/matrixunit.pas b/applications/lazstats/source/units/matrixunit.pas index f126387b5..9274ec853 100644 --- a/applications/lazstats/source/units/matrixunit.pas +++ b/applications/lazstats/source/units/matrixunit.pas @@ -76,6 +76,8 @@ function MatRowVector(A: TDblMatrix; ARowIndex: Integer): TDblVector; procedure MatSize(A: TDblMatrix; out n, m: Integer); function MatTransposed(A: TDblMatrix): TDblMatrix; +function SubMatrix(A: TDblMatrix; i1,j1, i2,j2: Integer): TDblMatrix; + // Sorting procedure Exchange(var a, b: Double); overload; procedure Exchange(var a, b: Integer); overload; @@ -685,6 +687,25 @@ begin end; +function SubMatrix(A: TDblMatrix; i1,j1, i2,j2: Integer): TDblMatrix; +var + i, j, n, m: Integer; +begin + MatSize(A, n,m); + i1 := EnsureRange(i1, 0, n); + i2 := EnsureRange(i2, 0, n); + j1 := EnsureRange(j1, 0, m); + j2 := EnsureRange(j2, 0, m); + if i1 > i2 then Exchange(i1, i2); + if j1 > j2 then Exchange(j1, j2); + + SetLength(Result, i2-i1+1, j2-j1+1); + for i := i1 to i2 do + for j := j1 to j2 do + Result[i-i1, j-j1] := A[i, j]; +end; + + {=============================================================================== TLUSolver -------------------------------------------------------------------------------- diff --git a/applications/lazstats/source/units/regressionunit.pas b/applications/lazstats/source/units/regressionunit.pas index 7e9e500f4..ae715ab2f 100644 --- a/applications/lazstats/source/units/regressionunit.pas +++ b/applications/lazstats/source/units/regressionunit.pas @@ -43,8 +43,10 @@ type CoeffStdErrors: TDblVector; // Standard errors of the coefficients Beta: TDblVector; // Standardized coefficients + XT_X: TDblMatrix; // Cross-products matrix VarCovar: TDblMatrix; // Variance-covariance matrix Correlations: TDblMatrix; // Correlations matrix + InvIndepCorrs: TDblMatrix; // Inverse of the INDEPENDENT correlation matrix SStotal, SSreg, SSresid: Double; // Sum of squares NumCases, NumDepVars: Integer; // number of cases, number of dependent variables @@ -59,24 +61,29 @@ type Means: TDblVector; Variances: TDblVector; StdDevs: TDblVector; - (* + XMeans: TDblVector; // Means of the x columns XVariances: TDblVector; // Variances of the x columns XStdDevs: TDblVector; // Std Deviations of the x columns - *) - MeanY: Double; // Mean of y column - VarianceY: Double; // Variance of y column - StdDevY: Double; // Std Deviation of y column + + YMean: Double; // Mean of y column + YVariance: Double; // Variance of y column + YStdDev: Double; // Std Deviation of y column procedure WriteANOVAReport(AReport: TStrings); procedure WriteCoeffsReport(AReport: TStrings; AVarNames: TStringArray); procedure WriteCorrelationReport(AReport: TStrings; AVarNames: TStringArray); + procedure WriteCrossProductsReport(AReport: TStrings; AVarNames: TStringArray); procedure WriteVarCovarReport(AReport: TStrings; AVarNames: TStringArray); end; function MultipleRegression(const xData: TDblMatrix; const yData: TDblVector; AConfLevel: Double; out AResults: TMultipleRegressionResults): TRegressionError; +procedure PredictMR(const xData: TDblMatrix; const yData: TDblVector; + const ARegressionResults: TMultipleRegressionResults; + out zPred, zResid, RawPred, RawResid, StdErrPred, Hi95, Lo95: TDblVector); + implementation @@ -171,6 +178,7 @@ begin // Product of transposed augmented with augmented matrix XT_X := XT * X; + AResults.XT_X := Matcopy(XT_X); // Product of inverse of XT_X with XT_Y yields the coefficients. Intercept is last. inv_XT_X := MatInverse(XT_X); @@ -187,24 +195,20 @@ begin AResults.Means[i] := XT_X[i, mx] / nx; AResults.Variances[i] := (XT_X[i, i] - sqr(AResults.Means[i]) * nx) / (nx - 1); AResults.StdDevs[i] := sqrt(AResults.Variances[i]); - { - if AResults.StdDevs[i] = 0.0 then - begin - Result := regStdDevZero; - exit; - end; - } end; // Variance-covariance matrix - SetLength(AResults.VarCovar, mx, mx); - SetLength(AResults.Correlations, mx, mx); - for i := 0 to mx-1 do - for j := 0 to mx-1 do - begin - AResults.VarCovar[i, j] := (XT_X[i, j] - XT_X[i, mx] * XT_X[j, mx] / nx) / (nx - 1); - AResults.Correlations[i, j] := AResults.VarCovar[i, j] / (AResults.StdDevs[i] * AResults.StdDevs[j]); - end; + with AResults do + begin + SetLength(VarCovar, mx, mx); + SetLength(Correlations, mx, mx); + for i := 0 to mx-1 do + for j := 0 to mx-1 do + begin + VarCovar[i, j] := (XT_X[i, j] - XT_X[i, mx] * XT_X[j, mx] / nx) / (nx - 1); + Correlations[i, j] := VarCovar[i, j] / (StdDevs[i] * StdDevs[j]); + end; + end; with AResults do begin @@ -213,24 +217,17 @@ begin DFresid := NumCases - NumDepVars - 1; // Calculate column means of xData - // MatColMeanVarStdDev(X, XMeans, XVariances, XStdDevs); + MatColMeanVarStdDev(xData, xMeans, xVariances, xStdDevs); // Basic stats of y data column VecSumSS(yData, SY, SSY); - MeanY := SY / NumCases; - SStotal := SSY - sqr(MeanY) * NumCases; // needed for ANOVA - VarianceY := SStotal / (NumCases - 1); - StdDevY := sqrt(VarianceY); - { - if StdDevY = 0 then - begin - Result := regStdDevZero; - exit; - end; - } + yMean := SY / NumCases; + SStotal := SSY - sqr(yMean) * NumCases; // needed for ANOVA + yVariance := SStotal / (NumCases - 1); + yStdDev := sqrt(yVariance); // Standardized coefficients - Beta := VecMultiply(Coeffs, StdDevs) * (1.0/StdDevY); + Beta := VecMultiply(Coeffs, StdDevs) * (1.0/yStdDev); // Get standard errors, squared multiple correlation, tests of significance SetLength(CoeffStdErrors, mx+1); @@ -251,6 +248,8 @@ begin t[i] := Coeffs[i] / CoeffStdErrors[i]; p[i] := ProbT(t[i], DFresid); end; + + InvIndepCorrs := MatInverse(SubMatrix(Correlations, 0, 0, mx-1, mx-1)); end; Result := regOK; @@ -266,13 +265,13 @@ begin DFtotal := NumCases - 1; DFresid := DFtotal - DFreg; AReport.Add('ANOVA'); - AReport.Add('------------------------------------------------------------------------------'); - AReport.Add('SOURCE DF Sum of Squares Mean Square F Probability'); - AReport.Add('---------- --- ---------------- ---------------- ------------ -----------'); - AReport.Add('Regression %3d %16.3f %16.3f %12.4f %10.4f', [DFreg, SSreg, SSreg/DFreg, F, Probability]); - AReport.Add('Residual %3d %16.3f %16.3f', [DFresid, SSresid, SSresid/DFresid]); - AReport.Add('Total %3d %16.3f', [DFtotal, SStotal]); - AReport.Add('------------------------------------------------------------------------------'); + AReport.Add('--------------------------------------------------------------------------'); + AReport.Add('SOURCE DF Sum of Squares Mean Square F Probability'); + AReport.Add('---------- --- ---------------- ---------------- -------- -----------'); + AReport.Add('Regression %4d %16.3f %16.3f %8.3f %10.3f', [DFreg, SSreg, SSreg/DFreg, F, Probability]); + AReport.Add('Residual %4d %16.3f %16.3f', [DFresid, SSresid, SSresid/DFresid]); + AReport.Add('Total %4d %16.3f', [DFtotal, SStotal]); + AReport.Add('--------------------------------------------------------------------------'); AReport.Add(''); AReport.Add('R2: %10.4f', [R2]); AReport.Add('Adjusted R2: %10.4f', [AdjR2]); @@ -291,9 +290,9 @@ var i, first, last: Integer; begin AReport.Add('COEFFICIENTS'); - AReport.Add('------------------------------------------------------------------------------'); - AReport.Add(' Variable Unstandardized Std.Error Standardized t p '); - AReport.Add('------------ -------------- ------------ ------------ --------- ---------'); + AReport.Add('--------------------------------------------------------------------------'); + AReport.Add(' Variable Unstandardized Std.Error Standardized t p '); + AReport.Add('------------ -------------- ------------ ------------ ------- -------'); first := 0; last := High(Coeffs); @@ -307,10 +306,10 @@ begin else varName := Format('VAR.%d', [i+1]); - AReport.Add('%12s %14.3f %12.3f %12.3f %9.4f %9.4f', + AReport.Add('%12s %14.3f %12.3f %12.3f %7.3f %7.3f', [varName, Coeffs[i], CoeffStdErrors[i], Beta[i], t[i], p[i]]); end; - AReport.Add('------------------------------------------------------------------------------'); + AReport.Add('--------------------------------------------------------------------------'); end; @@ -323,12 +322,26 @@ begin end; +{ Writes the cross-product matrix to a "report" + The names of the independent variables are neede in the array AVarNames. } +procedure TMultipleRegressionResults.WriteCrossProductsReport(AReport: TStrings; + AVarNames: TStringArray); +var + saved: String; +begin + saved := AVarNames[High(AVarNames)]; + AVarNames[High(AVarNames)] := '(Intercept)'; + WriteMatrixReport(AReport, XT_X, 'CROSS-PRODUCT MATRIX', AVarNames); + AVarNames[High(AVarNames)] := saved; +end; + + procedure TMultipleRegressionResults.WriteMatrixReport(AReport: TStrings; AMatrix: TDblMatrix; ATitle: String; AVarNames: TStringArray); function GetVarName(i: Integer): String; begin - if i < High(AVarNames) then + if i <= High(AVarNames) then Result := AVarNames[i] else Result := Format('VAR.%d', [i+1]); @@ -376,5 +389,90 @@ begin end; +procedure PredictMR(const xData: TDblMatrix; const yData: TDblVector; + const ARegressionResults: TMultipleRegressionResults; + out zPred, zResid, RawPred, RawResid, StdErrPred, Hi95, Lo95: TDblVector); +var + i, j, k, n, m: Integer; + x, x1, x2, y: Double; + term1, term2, t: Double; +begin + zPred := nil; // to silence the compiler + zResid := nil; + RawPred := nil; + RawResid := nil; + StdErrPred := nil; + Hi95 := nil; + Lo95 := nil; + + MatSize(xData, n,m); + + // z predicted and zResidual + SetLength(zPred, n); + SetLength(zResid, n); + for i := 0 to n-1 do + begin + zPred[i] := 0; + for j := 0 to m-1 do + begin + x := (xData[i, j] - ARegressionResults.xMeans[j]) / ARegressionResults.xStdDevs[j]; + zPred[i] := zPred[i] + ARegressionResults.Beta[j] * x; + end; + y := (yData[i] - ARegressionResults.yMean) / ARegressionResults.yStdDev; + zResid[i] := y - zPred[i]; + end; + + // Raw predicted and raw residuals + SetLength(RawPred, n); + SetLength(RawResid, n); + for i := 0 to n-1 do + begin + RawPred[i] := ARegressionResults.Coeffs[m]; // intercept + for j := 0 to m-1 do + RawPred[i] := RawPred[i] + ARegressionResults.Coeffs[j] * xData[i, j]; + RawResid[i] := yData[i] - RawPred[i]; + end; + + // Calculate confidence interval for raw predicted score + SetLength(StdErrPred, n); + SetLength(Hi95, n); + SetLength(Lo95, n); + + for i := 0 to n-1 do + begin + // Get term1 of the std. err. prediction + term1 := 0.0; + for j := 0 to m-1 do + begin + with ARegressionResults do + begin + x := (xData[i, j] - xMeans[j]) / xStdDevs[j]; + term1 := term1 + x * x * InvIndepCorrs[j, j];; + end; + end; + + // Get term2 of the std err. of prediction + term2 := 0.0; + for j := 0 to m-1 do + begin + for k := j + 1 to m-1 do + begin + with ARegressionResults do + begin + x1 := (xData[i, j] - xMeans[j]) / xStdDevs[j]; + x2 := (xData[i, k] - xMeans[k]) / xStdDevs[k]; + term2 := term2 + x1 * x2 * InvIndepCorrs[j, k]; + end; + end; + end; + term2 := 2.0 * Term2; + stdErrPred[i] := sqrt(n + 1 + term1 + term2); + stdErrPred[i] := (ARegressionResults.StdErrorEstimate / sqrt(n)) * stdErrPred[i]; + t := InverseT(0.975, n - m - 1); + Lo95[i] := RawPred[i] - t*StdErrPred[i]; + Hi95[i] := RawPred[i] + t*StdErrPred[i]; + end; +end; + end.