unit BestRegUnit; {$mode objfpc}{$H+} interface uses Classes, SysUtils, FileUtil, LResources, Forms, Controls, Graphics, Dialogs, StdCtrls, Buttons, Globals, MainUnit, MatrixLib, OutPutUnit, FunctionsLib, DataProcs; type { TBestRegFrm } TBestRegFrm = class(TForm) CPChkBox: TCheckBox; ComboShowChkBox: TCheckBox; CovChkBox: TCheckBox; CorrsChkBox: TCheckBox; MeansChkBox: TCheckBox; VarChkBox: TCheckBox; SDChkBox: TCheckBox; MatSaveChkBox: TCheckBox; PredictChkBox: TCheckBox; MatInChkBox: TCheckBox; InBtn: TBitBtn; OutBtn: TBitBtn; AllBtn: TBitBtn; DepInBtn: TBitBtn; DepOutBtn: TBitBtn; ResetBtn: TButton; CancelBtn: TButton; ComputeBtn: TButton; ReturnBtn: TButton; DepVar: TEdit; InProb: TEdit; GroupBox1: TGroupBox; Label1: TLabel; Label2: TLabel; Label3: TLabel; Label4: TLabel; BlockList: TListBox; OpenDialog1: TOpenDialog; SaveDialog1: TSaveDialog; VarList: TListBox; procedure ComputeBtnClick(Sender: TObject); procedure FormShow(Sender: TObject); procedure ResetBtnClick(Sender: TObject); procedure INIT(Sender: TObject); procedure REGRESS(Sender: TObject); procedure BEST_SET_STATS(Sender: TObject); procedure bump_one(Sender: TObject); procedure start_set(Sender: TObject); procedure re_set(Sender: TObject); procedure InBtnClick(Sender: TObject); procedure OutBtnClick(Sender: TObject); procedure AllBtnClick(Sender: TObject); procedure DepInBtnClick(Sender: TObject); procedure DepOutBtnClick(Sender: TObject); private { private declarations } pred_labels : StrDyneVec; y_ptr, v : integer; ii : integer; jj : integer; pointer : integer; sets : integer; selected : IntDyneVec; max_set : IntDyneVec; cross_prod : DblDyneMat; ind_mat : DblDyneMat; end_of_set : boolean; all_done : boolean; more_to_do : boolean; no_predictors: integer; last_set : integer; first_pt : integer; testval : integer; sumx : DblDyneVec; mean : DblDyneVec; stddev : DblDyneVec; variance : DblDyneVec; xycross : DblDyneVec; raw_b : DblDyneVec; count : double; b_zero : double; stop_prob : double; mult_R2 : double; biggest_R2 : double; last_R2 : double; f_test : double; t, beta : double; ss_res : double; ms_res : double; ss_reg : double; ms_reg : double; df_reg : integer; df_res : integer; df1 : integer; df_tot : integer; prob_f : double; ss_total : double; seb : double; R2_diff : double; prout : double; errorcode : integer; errcode : boolean; DepVarCol : integer; RowLabels : StrDyneVec; ColLabels : StrDyneVec; ColNoSelected : IntDyneVec; NCases : integer; NoVars : integer; public { public declarations } end; var BestRegFrm: TBestRegFrm; implementation { TBestRegFrm } procedure TBestRegFrm.ResetBtnClick(Sender: TObject); VAR i : integer; begin BlockList.Clear; VarList.Clear; for i := 1 to NoVariables do begin VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); end; InBtn.Enabled := true; OutBtn.Enabled := false; DepInBtn.Enabled := true; DepOutBtn.Enabled := false; CPChkBox.Checked := false; CovChkBox.Checked := false; CorrsChkBox.Checked := true; MeansChkBox.Checked := true; VarChkBox.Checked := false; SDChkBox.Checked := true; MatSaveChkBox.Checked := false; PredictChkBox.Checked := false; DepVar.Text := ''; InProb.Text := ''; end; procedure TBestRegFrm.FormShow(Sender: TObject); begin ResetBtnClick(self); end; procedure TBestRegFrm.ComputeBtnClick(Sender: TObject); Label CleanUp; var i, j : integer; title : string; cellstring : string; filename : string; R2 : double; StdErrEst : double; IndepIndex : IntDyneVec; constant : double; begin if MatInChkBox.Checked = true then NoVariables := 200; SetLength(cross_prod,NoVariables+1,NoVariables+1); SetLength(ind_mat,NoVariables+1,NoVariables+1); SetLength(sumx,NoVariables); SetLength(mean,NoVariables); SetLength(stddev,NoVariables); SetLength(variance,NoVariables); SetLength(xycross,NoVariables); SetLength(raw_b,NoVariables); SetLength(RowLabels,NoVariables); SetLength(ColLabels,NoVariables); SetLength(IndepIndex,NoVariables); SetLength(ColNoSelected,NoVariables); SetLength(Selected,NoVariables); SetLength(Max_Set,NoVariables); SetLength(pred_labels,NoVariables); stop_prob := StrToFloat(InProb.Text); // probability to include a block prout := 1.0; OutPutFrm.RichEdit.Clear; // OutPutFrm.RichEdit.ParaGraph.Alignment := taLeftJustify; OutPutFrm.RichEdit.Lines.Add('Best Combination Multiple Regression by Bill Miller'); errorcode := 0; last_R2 := 0.0; last_set := 0 ; more_to_do := TRUE; { get data } if MatInChkBox.Checked = true then begin PredictChkBox.Checked := false; MatSaveChkBox.Checked := false; CPChkBox.Checked := false; OpenDialog1.Filter := 'FreeStat matrix files (*.MAT)|*.MAT|All files (*.*)|*.*'; OpenDialog1.FilterIndex := 1; if OpenDialog1.Execute then begin filename := OpenDialog1.FileName; MATREAD(cross_prod,NoVars,NoVars,Mean,stddev,NCases,RowLabels,ColLabels,filename); for i := 1 to NoVars do variance[i-1] := sqr(stddev[i-1]); ShowMessage('NOTICE! Last variable in matrix is the dependent variable'); end; if CorrsChkBox.Checked = true then begin title := 'Product-Moment Correlations Matrix'; MAT_PRINT(cross_prod,NoVars,NoVars,title,RowLabels,ColLabels,NCases); end; title := 'Means'; if MeansChkBox.Checked = true then DynVectorPrint(mean,NoVars,title,ColLabels,NCases); title := 'Variances'; if VarChkBox.Checked = true then DynVectorPrint(variance,NoVars,title,ColLabels,NCases); title := 'Standard Deviations'; if SDChkBox.Checked = true then DynVectorPrint(stddev,NoVars,title,ColLabels,NCases); DepVarCol := NoVars; y_ptr := NoVars; DepVar.Text := RowLabels[NoVars]; { convert correlations to deviation cross-products } for i := 1 to NoVars do for j := 1 to NoVars do cross_prod[i-1,j-1] := cross_prod[i-1,j-1] * stddev[i-1] * stddev[j-1] * (NCases - 1); end; if MatInChkBox.Checked = false then begin { get independent item columns } NoVars := BlockList.Items.Count; if NoVars < 1 then begin ShowMessage('ERROR! No independent variables selected.'); goto CleanUp; end; for i := 1 to NoVars do begin cellstring := BlockList.Items.Strings[i-1]; for j := 1 to NoVariables do begin if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then begin ColNoSelected[i-1] := j; RowLabels[i-1] := cellstring; ColLabels[i-1] := cellstring; end; end; end; { get dependendent variable column } if DepVar.Text = '' then begin ShowMessage('ERROR! No Dependent variable selected.'); goto CleanUp; end; DepVarCol := 0; NoVars := NoVars + 1; y_ptr := NoVars; for j := 1 to NoVariables do begin if DepVar.Text = OS3MainFrm.DataGrid.Cells[j,0] then begin DepVarCol := j; ColNoSelected[NoVars-1] := j; RowLabels[NoVars-1] := OS3MainFrm.DataGrid.Cells[j,0]; ColLabels[NoVars-1] := RowLabels[NoVars-1]; end; end; INIT(self); title := 'Cross-Products Matrix'; GridXProd(NoVars,ColNoSelected,cross_prod,true,NCases); for i := 1 to NoVars do begin sumx[i-1] := cross_prod[i-1,NoVars]; mean[i-1] := sumx[i-1] / NCases; variance[i-1] := cross_prod[i-1,i-1] - (sumx[i-1] * sumx[i-1] / NCases); variance[i-1] := variance[i-1] / (NCases-1); if variance[i-1] > 0 then stddev[i-1] := sqrt(variance[i-1]) else begin ShowMessage('ERROR! No variance for a variable!'); goto cleanup; end; end; if CPChkBox.Checked = true then begin MAT_PRINT(cross_prod,NoVars,NoVars,title,RowLabels,ColLabels,NCases); end; {get deviation cross-products matrix} for i := 1 to NoVars do for j := 1 to NoVars do cross_prod[i-1,j-1] := cross_prod[i-1,j-1] - (mean[i-1] * mean[j-1] * NCases); end; title := 'Deviation Cross-Products Matrix'; if CovChkBox.Checked = true then MAT_PRINT(cross_prod,NoVars,NoVars,title,RowLabels,ColLabels,NCases); v := NoVars; no_predictors := v - 1; ss_total := cross_prod[y_ptr-1,y_ptr-1]; biggest_R2 := 0.0; { Find best single predictor } sets := 1; for j := 1 to no_predictors do begin selected[0] := j; regress(self); end; BEST_SET_STATS(self); { Find best combinations of 2 to no_predictors - 1 } sets := 2; while sets < no_predictors do begin end_of_set := FALSE; start_set(self); while NOT end_of_set do begin regress(self); bump_one(self); end; regress(self); BEST_SET_STATS(self); sets := sets + 1; end; sets := sets - 1; // no. of predictors { Find regression with all of the predictors } if more_to_do then begin sets := no_predictors; for i := 1 to sets do selected[i-1] := i; REGRESS(self); BEST_SET_STATS(self); end else begin OutputFrm.RichEdit.Lines.Add(''); OutputFrm.RichEdit.Lines.Add('Last variable added failed entry test. Job ended.'); end; if MatInChkBox.Checked = false then begin { get correlation matrix and save if elected } Correlations(NoVars,ColNoSelected,cross_prod,mean,variance, stddev,errcode,NCases); if (CorrsChkBox.Checked = true) then begin title := 'Product-Moment Correlations Matrix'; MAT_PRINT(cross_prod,NoVars,NoVars,title,RowLabels,ColLabels,NCases); end; title := 'Means'; if (MeansChkBox.Checked = true) then DynVectorPrint(mean,NoVars,title,ColLabels,NCases); title := 'Variances'; if (VarChkBox.Checked = true) then DynVectorPrint(variance,NoVars,title,ColLabels,NCases); title := 'Standard Deviations'; if (SDChkBox.Checked = true) then DynVectorPrint(stddev,NoVars,title,ColLabels,NCases); if MatSaveChkBox.Checked = true then begin SaveDialog1.Filter := 'FreeStat matrix files (*.MAT)|*.MAT|All files (*.*)|*.*'; SaveDialog1.FilterIndex := 1; if SaveDialog1.Execute then begin filename := SaveDialog1.FileName; MATSAVE(cross_prod,NoVars,NoVars,mean,stddev,NCases,RowLabels,ColLabels,filename); end; end; { add [predicted scores, residual scores, etc. to grid if options elected } if PredictChkBox.Checked = true then begin for i := 1 to sets do begin ii := selected[i-1]; IndepIndex[i-1] := ii; //ColNoSelected[ii]; end; prout := 1.0; MReg2(NCases,NoVars, sets, IndepIndex, cross_prod, ind_mat, RowLabels, R2, raw_b, mean, variance, errorcode, StdErrEst, constant,prout, true, false,false); Predict(ColNoSelected, NoVars, ind_mat, mean, stddev, raw_b, StdErrEst, IndepIndex, sets); end; end; OutPutFrm.ShowModal; cleanup: pred_labels := nil; Max_Set := nil; Selected := nil; ColNoSelected := nil; IndepIndex := nil; ColLabels := nil; RowLabels := nil; raw_b := nil; xycross := nil; variance := nil; stddev := nil; mean := nil; sumx := nil; ind_mat := nil; cross_prod := nil; end; procedure TBestRegFrm.INIT(Sender: TObject); var i,j : integer; begin count := 0.0; for i := 1 to NoVariables do begin sumx[i-1] := 0.0; mean[i-1] := 0.0; variance[i-1] := 0.0; stddev[i-1] := 0.0; for j := 1 to v do cross_prod[i-1,j-1] := 0.0; end; end; procedure TBestRegFrm.REGRESS(Sender: TObject); var i, j : integer; outline : string; begin b_zero := 0.0 ; ss_reg := 0.0 ; for i := 1 to sets do raw_b[i-1] := 0.0 ; { Set up matrices of deviation cross_products to use } for i := 1 to sets do begin ii := selected[i-1]; xycross[i-1] := cross_prod[y_ptr-1,ii-1]; for j := 1 to sets do begin jj := selected[j-1]; ind_mat[i-1,j-1] := cross_prod[ii-1,jj-1]; end; end; SVDinverse(ind_mat,sets); for i := 1 to sets do begin ii := selected[i-1]; for j := 1 to sets do raw_b[i-1] := raw_b[i-1] + (ind_mat[i-1,j-1] * xycross[j-1]) ; b_zero := b_zero + raw_b[i-1] * mean[ii-1]; end; b_zero := mean[y_ptr-1] - b_zero; { Get sum of squares for regression and multiple R } for i := 1 to sets do ss_reg := ss_reg + raw_b[i-1] * xycross[i-1]; mult_R2 := ss_reg / ss_total; { Now, check to see if this R2 is largest. If so, save set } if mult_R2 > biggest_R2 then begin biggest_R2 := mult_R2; for i := 1 to sets do max_set[i-1] := selected[i-1]; end; { print out this combination for testing purposes } if ComboShowChkBox.Checked = true then begin outline := format(' Set %2d includes variables:',[sets]); OutPutFrm.RichEdit.Lines.Add(outline); for i := 1 to sets do begin outline := format('variable %d (%s)',[selected[i-1],ColLabels[selected[i-1]-1]]); OutPutFrm.RichEdit.Lines.Add(outline); end; OutPutFrm.RichEdit.Lines.Add(''); outline := format('Squared R = %6.4f',[mult_R2]); OutPutFrm.RichEdit.Lines.Add(outline); OutPutFrm.RichEdit.Lines.Add(''); end; end; procedure TBestRegFrm.BEST_SET_STATS(Sender: TObject); var i, j : integer; outline : string; begin OutPutFrm.RichEdit.Lines.Add(''); outline := format('Variables entered in step %2d',[sets]); OutPutFrm.RichEdit.Lines.Add(outline); for i := 1 to sets do begin ii := max_set[i-1]; selected[i-1] := max_set[i-1]; outline := format('%2d %s',[max_set[i-1],ColLabels[ii-1]]); OutPutFrm.RichEdit.Lines.Add(outline); end; OutPutFrm.RichEdit.Lines.Add(''); REGRESS(self); outline := format('Squared Multiple Correlation = %5.4f',[mult_r2]); OutPutFrm.RichEdit.Lines.Add(outline); outline := format('Dependent variable = %s',[ColLabels[y_ptr-1]]); OutPutFrm.RichEdit.Lines.Add(outline); OutPutFrm.RichEdit.Lines.Add('ANOVA for Regression Effects : '); OutPutFrm.RichEdit.Lines.Add('SOURCE df SS MS F Prob'); df_reg := sets; df_res := round(NCases) - sets - 1; df_tot := round(NCases) - 1; ms_reg := ss_reg / df_reg; ss_res := ( 1.0 - mult_R2) * ss_total ; ms_res := ss_res / df_res ; f_test := ms_reg / ms_res ; prob_f := probf(f_test, df_reg,df_res); { Get variance of b coefficients } outline := format('Regression %3d %14.4f %14.4f %14.4f %14.4f', [df_reg,ss_reg,ms_reg,f_test,prob_f]); OutPutFrm.RichEdit.Lines.Add(outline); outline := format('Residual %3d %14.4f %14.4f',[df_res,ss_res,ms_res]); OutPutFrm.RichEdit.Lines.Add(outline); outline := format('Total %3d %14.4f',[df_tot,ss_total]); OutPutFrm.RichEdit.Lines.Add(outline); OutPutFrm.RichEdit.Lines.Add(''); OutPutFrm.RichEdit.Lines.Add('Variables in the equation'); OutPutFrm.RichEdit.Lines.Add('VARIABLE b s.e. b Beta t prob. t'); for i := 1 to sets do for j := 1 to sets do ind_mat[i-1,j-1] := ind_mat[i-1,j-1] * ms_res ; for i := 1 to sets do begin ii := selected[i-1]; pred_labels[i-1] := ColLabels[ii-1]; outline := format('%16s %10.5f ',[ColLabels[ii-1],raw_b[i-1]]); seb := sqrt(ind_mat[i-1,i-1]); t := raw_b[i-1] / seb ; f_test := t * t ; prob_f := probf(f_test,1,df_res); beta := raw_b[i-1] * stddev[ii-1] / stddev[y_ptr-1] ; outline := outline + format('%8.4f %8.4f %6.3f %6.4f', [seb,beta,t,prob_f]); OutPutFrm.RichEdit.Lines.Add(outline); end; outline := format('(Intercept) %10.5f',[b_zero]); OutPutFrm.RichEdit.Lines.Add(outline); { MAT_PRINT(sets,ind_mat,pred_labels,'Variance-covariance matrix of b s');} { Now see if the gain was significant over last step } df1 := sets - last_set ; f_test := ((mult_R2 - last_R2 ) / df1 ) / ( (1.0 - mult_R2) / df_res) ; prob_f := probf(f_test, df1,df_res); if prob_f > stop_prob then more_to_do := FALSE ; R2_diff := mult_R2 - last_R2 ; OutPutFrm.RichEdit.Lines.Add(''); outline := format('Increase in squared R for this step = %8.6f',[R2_diff]); OutPutFrm.RichEdit.Lines.Add(outline); outline := format('F = %8.4f with D.F. %d and %d with Probability = %6.4f', [f_test,df1,df_res,prob_f]); OutPutFrm.RichEdit.Lines.Add(outline); OutPutFrm.RichEdit.Lines.Add('----------------------------------------------------------'); OutPutFrm.RichEdit.Lines.Add(''); last_set := sets; last_R2 := mult_R2; end; procedure TBestRegFrm.bump_one(Sender: TObject); begin if selected[first_pt-1] < no_predictors then selected[first_pt-1] := selected[first_pt-1] + 1 else begin all_done := FALSE; while NOT all_done do begin first_pt := first_pt -1; if first_pt < 1 then all_done := TRUE else begin selected[first_pt-1] := selected[first_pt-1] + 1; if selected[first_pt-1] < selected[first_pt] then begin re_set(self); first_pt := pointer; all_done := TRUE; end else selected[first_pt-1] := selected[first_pt-1] - 1; end; end; end; end; procedure TBestRegFrm.start_set(Sender: TObject); var i : integer; begin end_of_set := FALSE; for i := 1 to sets do selected[i-1] := i; first_pt := sets; pointer := sets; end; procedure TBestRegFrm.re_set(Sender: TObject); var i : integer; begin testval := no_predictors - sets + 1 ; if (first_pt = 1) AND (selected[first_pt-1] = testval) then end_of_set := TRUE else begin for i := first_pt + 1 to sets do selected[i-1] := selected[i-2] + 1; end; end; procedure TBestRegFrm.InBtnClick(Sender: TObject); VAR i, index : integer; begin index := VarList.Items.Count; i := 0; while i < index do begin if (VarList.Selected[i]) then begin BlockList.Items.Add(VarList.Items.Strings[i]); VarList.Items.Delete(i); index := index - 1; i := 0; end else i := i + 1; end; OutBtn.Enabled := true; end; procedure TBestRegFrm.OutBtnClick(Sender: TObject); VAR index : integer; begin index := BlockList.ItemIndex; VarList.Items.Add(BlockList.Items.Strings[index]); BlockList.Items.Delete(index); InBtn.Enabled := true; if BlockList.Items.Count = 0 then OutBtn.Enabled := false; end; procedure TBestRegFrm.AllBtnClick(Sender: TObject); VAR counts, index : integer; begin counts := VarList.Items.Count; for index := 0 to counts-1 do begin BlockList.Items.Add(VarList.Items.Strings[index]); end; VarList.Clear; end; procedure TBestRegFrm.DepInBtnClick(Sender: TObject); VAR index : integer; begin index := BlockList.ItemIndex; DepVar.Text := BlockList.Items.Strings[index]; BlockList.Items.Delete(index); DepOutBtn.Enabled := true; DepInBtn.Enabled := false; end; procedure TBestRegFrm.DepOutBtnClick(Sender: TObject); begin BlockList.Items.Add(DepVar.Text); DepVar.Text := ''; DepInBtn.Enabled := true; end; initialization {$I bestregunit.lrs} end.