unit BestRegUnit; {$mode objfpc}{$H+} interface uses Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls, Buttons, ExtCtrls, Globals, MainUnit, MatrixLib, FunctionsLib, DataProcs, BasicStatsReportFormUnit; type { TBestRegForm } TBestRegForm = class(TBasicStatsReportForm) CPChkBox: TCheckBox; ComboShowChkBox: TCheckBox; CovChkBox: TCheckBox; CorrsChkBox: TCheckBox; MeansChkBox: TCheckBox; VarChkBox: TCheckBox; SDChkBox: TCheckBox; MatSaveChkBox: TCheckBox; PredictChkBox: TCheckBox; MatInChkBox: TCheckBox; InBtn: TBitBtn; OutBtn: TBitBtn; AllBtn: TBitBtn; DepInBtn: TBitBtn; DepOutBtn: TBitBtn; DepVarEdit: TEdit; InProbEdit: TEdit; GroupBox1: TGroupBox; Label1: TLabel; Label2: TLabel; Label3: TLabel; Label4: TLabel; BlockList: TListBox; OpenDialog1: TOpenDialog; SaveDialog1: TSaveDialog; VarList: TListBox; procedure AllBtnClick(Sender: TObject); procedure BlockListDblClick(Sender: TObject); procedure DepInBtnClick(Sender: TObject); procedure DepOutBtnClick(Sender: TObject); procedure InBtnClick(Sender: TObject); procedure OutBtnClick(Sender: TObject); procedure VarListDblClick(Sender: TObject); procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean); private { private declarations } pred_labels : StrDyneVec; y_ptr, v : integer; ii, jj : integer; pointer : integer; sets : integer; selected : IntDyneVec; max_set : IntDyneVec; cross_prod : DblDyneMat; ind_mat : DblDyneMat; end_of_set : boolean; all_done : boolean; more_to_do : boolean; no_predictors: integer; last_set : integer; first_pt : integer; testval : integer; sumx : DblDyneVec; mean : DblDyneVec; stddev : DblDyneVec; variance : DblDyneVec; xycross : DblDyneVec; raw_b : DblDyneVec; count : double; b_zero : double; stop_prob : double; mult_R2 : double; biggest_R2 : double; last_R2 : double; f_test : double; t, beta : double; ss_res : double; ms_res : double; ss_reg : double; ms_reg : double; df_reg : integer; df_res : integer; df1 : integer; df_tot : integer; prob_f : double; ss_total : double; seb : double; R2_diff : double; prout : double; errorcode : integer; errcode : boolean; DepVarCol : integer; RowLabels : StrDyneVec; ColLabels : StrDyneVec; ColNoSelected : IntDyneVec; NCases : integer; NoVars : integer; procedure Init; procedure Regress(AReport: TStrings); procedure BestSetStats(AReport: TStrings); procedure BumpOne; procedure StartSet; procedure ResetVars; protected procedure AdjustConstraints; override; procedure Compute; override; procedure UpdateBtnStates; override; public constructor Create(AOwner: TComponent); override; procedure Reset; override; end; var BestRegForm: TBestRegForm; implementation {$R *.lfm} uses Math, Utils, MathUnit; { TBestRegForm } constructor TBestRegForm.Create(AOwner: TComponent); begin inherited; InProbEdit.Text := FormatFloat('0.00', DEFAULT_ALPHA_LEVEL); end; procedure TBestRegForm.AdjustConstraints; begin inherited; ParamsPanel.Constraints.MinWidth := Max( 4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left, Groupbox1.Width); ParamsPanel.Constraints.MinHeight := InProbEdit.Top + InProbEdit.Height + InProbEdit.BorderSpacing.Top + GroupBox1.Height + ButtonBevel.Height + CloseBtn.BorderSpacing.Top + CloseBtn.Height; end; procedure TBestRegForm.AllBtnClick(Sender: TObject); var index: integer; begin for index := 0 to VarList.Items.Count-1 do BlockList.Items.Add(VarList.Items[index]); VarList.Clear; UpdateBtnStates; end; procedure TBestRegForm.BestSetStats(AReport: TStrings); var i, j: integer; outline: string; begin AReport.Add(''); AReport.Add('Variables entered in step %d', [sets]); for i := 1 to sets do begin ii := max_set[i-1]; selected[i-1] := max_set[i-1]; AReport.Add('%2d %s',[max_set[i-1],ColLabels[ii-1]]); end; AReport.Add(''); Regress(AReport); AReport.Add('Squared Multiple Correlation: %.4f', [mult_r2]); AReport.Add('Dependent variable: %s', [ColLabels[y_ptr-1]]); AReport.Add(''); AReport.Add('ANOVA for Regression Effects: '); AReport.Add('SOURCE df SS MS F Prob'); df_reg := sets; df_res := round(NCases) - sets - 1; df_tot := round(NCases) - 1; ms_reg := ss_reg / df_reg; ss_res := ( 1.0 - mult_R2) * ss_total ; ms_res := ss_res / df_res ; f_test := ms_reg / ms_res ; prob_f := ProbF(f_test, df_reg,df_res); { Get variance of b coefficients } AReport.Add('Regression %3d %14.4f %14.4f %14.4f %14.4f', [df_reg, ss_reg, ms_reg, f_test, prob_f]); AReport.Add('Residual %3d %14.4f %14.4f', [df_res, ss_res, ms_res]); AReport.Add('Total %3d %14.4f', [df_tot, ss_total]); AReport.Add(''); AReport.Add('Variables in the equation'); AReport.Add('VARIABLE b s.e. b Beta t prob. t'); for i := 1 to sets do for j := 1 to sets do ind_mat[i-1,j-1] := ind_mat[i-1,j-1] * ms_res ; for i := 1 to sets do begin ii := selected[i-1]; pred_labels[i-1] := ColLabels[ii-1]; outline := Format('%16s %10.5f ',[ColLabels[ii-1],raw_b[i-1]]); seb := sqrt(ind_mat[i-1,i-1]); t := raw_b[i-1] / seb ; f_test := t * t ; prob_f := probf(f_test,1,df_res); beta := raw_b[i-1] * stddev[ii-1] / stddev[y_ptr-1] ; outline := outline + Format('%8.4f %8.4f %6.3f %6.4f', [seb,beta,t,prob_f]); AReport.Add(outline); end; AReport.Add('(Intercept) %10.5f',[b_zero]); AReport.Add(''); { MAT_PRINT(sets,ind_mat,pred_labels,'Variance-covariance matrix of b s');} { Now see if the gain was significant over last step } df1 := sets - last_set ; f_test := ((mult_R2 - last_R2 ) / df1 ) / ( (1.0 - mult_R2) / df_res) ; prob_f := probf(f_test, df1,df_res); if prob_f > stop_prob then more_to_do := FALSE ; R2_diff := mult_R2 - last_R2 ; AReport.Add('Increase in squared R for this step: %8.6f', [R2_diff]); AReport.Add('F: %.4f with D.F. %d and %d with Probability %.4f', [f_test, df1, df_res, prob_f]); AReport.Add(''); AReport.Add('----------------------------------------------------------'); last_set := sets; last_R2 := mult_R2; end; procedure TBestRegForm.BlockListDblClick(Sender: TObject); var index: Integer; begin index := BlockList.ItemIndex; if index > -1 then begin VarList.Items.Add(BlockList.Items[index]); BlockList.Items.Delete(index); UpdatebtnStates; end; end; procedure TBestRegForm.BumpOne; begin if selected[first_pt-1] < no_predictors then selected[first_pt-1] := selected[first_pt-1] + 1 else begin all_done := false; while not all_done do begin first_pt := first_pt -1; if first_pt < 1 then all_done := true else begin selected[first_pt-1] := selected[first_pt-1] + 1; if selected[first_pt-1] < selected[first_pt] then begin ResetVars(); first_pt := pointer; all_done := true; end else selected[first_pt-1] := selected[first_pt-1] - 1; end; end; end; end; procedure TBestRegForm.Compute; var i, j: integer; title: string; cellstring: string; filename: string; R2: double; StdErrEst: double; IndepIndex: IntDyneVec = nil; constant: double; lReport: TStrings; begin if InProbEdit.Text = '' then begin InProbEdit.SetFocus; MessageDlg('Probability for inclusion not specified.', mtError, [mbOK], 0); exit; end; if not TryStrToFloat(InProbEdit.Text, stop_prob) then // probability to include a block begin InProbEdit.SetFocus; MessageDlg('No number given for probability.', mtError, [mbOk], 0); exit; end; if MatInChkBox.Checked then NoVariables := 200; SetLength(cross_prod, NoVariables+1, NoVariables+1); SetLength(ind_mat, NoVariables+1, NoVariables+1); SetLength(sumx, NoVariables); SetLength(mean, NoVariables); SetLength(stddev, NoVariables); SetLength(variance, NoVariables); SetLength(xycross, NoVariables); SetLength(raw_b, NoVariables); SetLength(RowLabels, NoVariables); SetLength(ColLabels, NoVariables); SetLength(IndepIndex, NoVariables); SetLength(ColNoSelected, NoVariables); SetLength(Selected, NoVariables); SetLength(Max_Set, NoVariables); SetLength(pred_labels, NoVariables); lReport := TStringList.Create; try lReport.Add('BEST COMBINATION MULTIPLEX REGRESSION by Bill Miller'); errorcode := 0; last_R2 := 0.0; last_set := 0 ; more_to_do := TRUE; prout := 1.0; { get data } if MatInChkBox.Checked then begin PredictChkBox.Checked := false; MatSaveChkBox.Checked := false; CPChkBox.Checked := false; OpenDialog1.Filter := 'LazStats matrix files (*.mat)|*.mat;*.MAT|All files (*.*)|*.*'; OpenDialog1.FilterIndex := 1; if OpenDialog1.Execute then begin filename := OpenDialog1.FileName; MatRead(cross_prod, NoVars, NoVars, Mean, stddev, NCases, RowLabels, ColLabels, filename); for i := 1 to NoVars do variance[i-1] := sqr(stddev[i-1]); MessageDlg('Last variable in matrix is the dependent variable', mtInformation, [mbOK], 0); lReport.Add('====================================================================='); end; if CorrsChkBox.Checked then begin lReport.Add(''); title := 'Product-Moment Correlations Matrix'; MatPrint(cross_prod, NoVars, NoVars, title, RowLabels, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; if MeansChkBox.Checked then begin lReport.Add(''); title := 'Means'; DynVectorPrint(mean, NoVars, title, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; if VarChkBox.Checked then begin lReport.Add(''); title := 'Variances'; DynVectorPrint(variance, NoVars, title, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; if SDChkBox.Checked = true then begin lReport.Add(''); title := 'Standard Deviations'; DynVectorPrint(stddev, NoVars, title, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; DepVarCol := NoVars; y_ptr := NoVars; DepVarEdit.Text := RowLabels[NoVars]; { convert correlations to deviation cross-products } for i := 1 to NoVars do for j := 1 to NoVars do cross_prod[i-1,j-1] := cross_prod[i-1,j-1] * stddev[i-1] * stddev[j-1] * (NCases - 1); end; if not MatInChkBox.Checked then begin { get independent item columns } NoVars := BlockList.Items.Count; if NoVars < 1 then begin MessageDlg('No independent variables selected.', mtError, [mbOK], 0); exit; end; for i := 1 to NoVars do begin cellstring := BlockList.Items.Strings[i-1]; for j := 1 to NoVariables do begin if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then begin ColNoSelected[i-1] := j; RowLabels[i-1] := cellstring; ColLabels[i-1] := cellstring; end; end; end; { get dependendent variable column } if DepVarEdit.Text = '' then begin MessageDlg('No dependent variable selected.', mtError, [mbOK], 0); exit; end; DepVarCol := 0; NoVars := NoVars + 1; y_ptr := NoVars; for j := 1 to NoVariables do begin if DepVarEdit.Text = OS3MainFrm.DataGrid.Cells[j,0] then begin DepVarCol := j; ColNoSelected[NoVars-1] := j; RowLabels[NoVars-1] := OS3MainFrm.DataGrid.Cells[j,0]; ColLabels[NoVars-1] := RowLabels[NoVars-1]; end; end; Init; title := 'Cross-Products Matrix'; GridXProd(NoVars,ColNoSelected,cross_prod,true,NCases); for i := 1 to NoVars do begin sumx[i-1] := cross_prod[i-1,NoVars]; mean[i-1] := sumx[i-1] / NCases; variance[i-1] := cross_prod[i-1,i-1] - (sumx[i-1] * sumx[i-1] / NCases); variance[i-1] := variance[i-1] / (NCases-1); if variance[i-1] > 0 then stddev[i-1] := sqrt(variance[i-1]) else begin MessageDlg('No variance for a variable!',mtError, [mbOK], 0); exit; end; end; if CPChkBox.Checked then begin lReport.Add(''); MatPrint(cross_prod, NoVars, NoVars, title, RowLabels, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; {get deviation cross-products matrix} for i := 1 to NoVars do for j := 1 to NoVars do cross_prod[i-1,j-1] := cross_prod[i-1,j-1] - (mean[i-1] * mean[j-1] * NCases); end; if CovChkBox.Checked then begin lReport.Add(''); title := 'Deviation Cross-Products Matrix'; MatPrint(cross_prod, NoVars, NoVars, title, RowLabels, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; v := NoVars; no_predictors := v - 1; ss_total := cross_prod[y_ptr-1,y_ptr-1]; biggest_R2 := 0.0; { Find best single predictor } sets := 1; for j := 1 to no_predictors do begin selected[0] := j; Regress(lReport); end; BestSetStats(lReport); { Find best combinations of 2 to no_predictors - 1 } sets := 2; while sets < no_predictors do begin end_of_set := FALSE; StartSet(); while not end_of_set do begin Regress(lReport); BumpOne(); end; Regress(lReport); BestSetStats(lReport); inc(sets); end; dec(sets); // no. of predictors { Find regression with all of the predictors } if more_to_do then begin sets := no_predictors; for i := 1 to sets do selected[i-1] := i; Regress(lReport); BestSetStats(lReport); end else begin lReport.Add(''); lReport.Add('Last variable added failed entry test. Job ended.'); end; if not MatInChkBox.Checked then begin { get correlation matrix and save if elected } Correlations(NoVars, ColNoSelected, cross_prod, mean, variance, stddev, errcode, NCases); if CorrsChkBox.Checked then begin title := 'Product-Moment Correlations Matrix'; MatPrint(cross_prod, NoVars, NoVars, title, RowLabels, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; if MeansChkBox.Checked then begin title := 'Means'; DynVectorPrint(mean, NoVars, title, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; if VarChkBox.Checked then begin title := 'Variances'; DynVectorPrint(variance, NoVars, title, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; if SDChkBox.Checked then begin title := 'Standard Deviations'; DynVectorPrint(stddev, NoVars, title, ColLabels, NCases, lReport); lReport.Add('====================================================================='); end; if MatSaveChkBox.Checked then begin SaveDialog1.Filter := 'LazStats matrix files (*.mat)|*.mat;*.MAT|All files (*.*)|*.*'; SaveDialog1.FilterIndex := 1; if SaveDialog1.Execute then begin filename := SaveDialog1.FileName; MatSave(cross_prod, NoVars, NoVars, mean, stddev, NCases, RowLabels, ColLabels, filename); end; end; { add [predicted scores, residual scores, etc. to grid if options elected } if PredictChkBox.Checked then begin for i := 1 to sets do begin ii := selected[i-1]; IndepIndex[i-1] := ii; //ColNoSelected[ii]; end; prout := 1.0; MReg2(NCases, NoVars, sets, IndepIndex, cross_prod, ind_mat, RowLabels, R2, raw_b, mean, variance, errorcode, StdErrEst, constant,prout, true, false,false, lReport ); Predict(ColNoSelected, NoVars, ind_mat, mean, stddev, raw_b, StdErrEst, IndepIndex, sets ); end; end; FReportFrame.DisplayReport(lReport); finally lReport.Free; end; end; procedure TBestRegForm.DepInBtnClick(Sender: TObject); var index: integer; begin index := VarList.ItemIndex; if (index > -1) and (DepVarEdit.Text = '') then begin DepVarEdit.Text := VarList.Items[index]; VarList.Items.Delete(index); end; UpdateBtnStates; end; procedure TBestRegForm.DepOutBtnClick(Sender: TObject); begin if DepVarEdit.Text <> '' then begin VarList.Items.Add(DepVarEdit.Text); DepVarEdit.Text := ''; end; UpdateBtnStates; end; procedure TBestRegForm.InBtnClick(Sender: TObject); var i: integer; begin i := 0; while i < VarList.Items.Count do begin if VarList.Selected[i] then begin BlockList.Items.Add(VarList.Items[i]); VarList.Items.Delete(i); i := 0; end else inc(i); end; UpdateBtnStates; end; procedure TBestRegForm.Init; var i, j: integer; begin count := 0.0; for i := 1 to NoVariables do begin sumx[i-1] := 0.0; mean[i-1] := 0.0; variance[i-1] := 0.0; stddev[i-1] := 0.0; for j := 1 to v do cross_prod[i-1,j-1] := 0.0; end; end; procedure TBestRegForm.OutBtnClick(Sender: TObject); var i: integer; begin i := 0; while i < BlockList.Items.Count do begin if BlockList.Selected[i] then begin VarList.Items.Add(BlockList.Items[i]); BlockList.Items.Delete(i); i := 0; end else inc(i); end; UpdateBtnStates; end; procedure TBestRegForm.Regress(AReport: TStrings); var i, j: integer; begin b_zero := 0.0 ; ss_reg := 0.0 ; for i := 1 to sets do raw_b[i-1] := 0.0 ; { Set up matrices of deviation cross_products to use } for i := 1 to sets do begin ii := selected[i-1]; xycross[i-1] := cross_prod[y_ptr-1,ii-1]; for j := 1 to sets do begin jj := selected[j-1]; ind_mat[i-1,j-1] := cross_prod[ii-1,jj-1]; end; end; SVDinverse(ind_mat, sets); for i := 1 to sets do begin ii := selected[i-1]; for j := 1 to sets do raw_b[i-1] := raw_b[i-1] + (ind_mat[i-1,j-1] * xycross[j-1]) ; b_zero := b_zero + raw_b[i-1] * mean[ii-1]; end; b_zero := mean[y_ptr-1] - b_zero; { Get sum of squares for regression and multiple R } for i := 1 to sets do ss_reg := ss_reg + raw_b[i-1] * xycross[i-1]; mult_R2 := ss_reg / ss_total; { Now, check to see if this R2 is largest. If so, save set } if mult_R2 > biggest_R2 then begin biggest_R2 := mult_R2; for i := 1 to sets do max_set[i-1] := selected[i-1]; end; { print out this combination for testing purposes } if ComboShowChkBox.Checked then begin AReport.Add(' Set %d includes variables:', [sets]); for i := 1 to sets do AReport.Add('variable %d (%s)', [selected[i-1], ColLabels[selected[i-1]-1]]); AReport.Add(''); AReport.Add('Squared R: %.4f', [mult_R2]); AReport.Add(''); end; end; procedure TBestRegForm.Reset; var i: integer; begin inherited; DepVarEdit.Clear; BlockList.Clear; VarList.Clear; for i := 1 to NoVariables do VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); CPChkBox.Checked := false; CovChkBox.Checked := false; CorrsChkBox.Checked := false; MeansChkBox.Checked := false; VarChkBox.Checked := false; SDChkBox.Checked := false; MatSaveChkBox.Checked := false; PredictChkBox.Checked := false; UpdateBtnStates; end; procedure TBestRegForm.ResetVars; var i: integer; begin testval := no_predictors - sets + 1 ; if (first_pt = 1) and (selected[first_pt-1] = testval) then end_of_set := TRUE else for i := first_pt + 1 to sets do selected[i-1] := selected[i-2] + 1; end; procedure TBestRegForm.StartSet; var i: integer; begin end_of_set := false; for i := 1 to sets do selected[i-1] := i; first_pt := sets; pointer := sets; end; procedure TBestRegForm.UpdateBtnStates; begin inherited; InBtn.Enabled := AnySelected(VarList); OutBtn.Enabled := AnySelected(BlockList); DepInBtn.Enabled := (VarList.ItemIndex > -1) and (DepVarEdit.Text <= ''); DepOutBtn.Enabled := DepVarEdit.Text <> ''; end; procedure TBestRegForm.VarListDblClick(Sender: TObject); var index: Integer; begin index := VarList.ItemIndex; if index > -1 then begin if DepVarEdit.Text = '' then DepVarEdit.Text := VarList.Items[index] else BlockList.Items.Add(VarList.Items[index]); VarList.Items.Delete(index); UpdateBtnStates; end; end; procedure TBestRegForm.VarListSelectionChange(Sender: TObject; User: boolean); begin UpdateBtnStates; end; end.