Files
lazarus-ccr/applications/lazstats/source/forms/analysis/multiple_regression/lsmrunit.pas
2020-10-19 08:24:46 +00:00

747 lines
20 KiB
ObjectPascal

unit LSMRUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs,
StdCtrls, Buttons, ExtCtrls, ComCtrls, Globals, MainUnit, MatrixLib,
DataProcs, BasicStatsParamsFormUnit, ReportFrameUnit, RegressionUnit;
type
{ TLSMregForm }
TLSMregForm = class(TBasicStatsParamsForm)
AllBtn: TBitBtn;
ANOVAChk: TCheckBox;
IndepVars: TListBox;
CorrsChk: TCheckBox;
CovChk: TCheckBox;
CrossProductsChk: TCheckBox;
DepInBtn: TBitBtn;
DepOutBtn: TBitBtn;
DepVarEdit: TEdit;
OptionsGroup: TGroupBox;
InBtn: TBitBtn;
InProbEdit: TEdit;
Label1: TLabel;
Label2: TLabel;
Label3: TLabel;
Label5: TLabel;
MatSaveChk: TCheckBox;
MeansChk: TCheckBox;
PageControl: TPageControl;
OutBtn: TBitBtn;
PredictChk: TCheckBox;
StdDevChk: TCheckBox;
RegressionPage: TTabSheet;
CrossProductsPage: TTabSheet;
CorrelationsPage: TTabSheet;
MeanVarStddevPage: TTabSheet;
ANOVAPage: TTabSheet;
VarCovarPage: TTabSheet;
VarChk: TCheckBox;
VarList: TListBox;
procedure AllBtnClick(Sender: TObject);
procedure DepInBtnClick(Sender: TObject);
procedure DepOutBtnClick(Sender: TObject);
procedure InBtnClick(Sender: TObject);
procedure IndepVarsDblClick(Sender: TObject);
procedure OutBtnClick(Sender: TObject);
procedure VarListDblClick(Sender: TObject);
procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean);
private
{ private declarations }
FRegressionFrame: TReportFrame;
FAnovaFrame: TReportFrame;
FCrossProductsFrame: TReportFrame;
FCorrelationsFrame: TReportFrame;
FVarCovarFrame: TReportFrame;
FMeanVarStddevFrame: TReportFrame;
IndepVarsCols: IntDyneVec;
NoVars: integer;
NoBlocks: integer;
procedure HideTabs;
procedure PredictionToGrid(xData: DblDyneMat; yData: DblDyneVec;
const ARegressionResults: TMultipleRegressionResults; ABadRows: IntDyneVec);
function PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out ARowLabels: StrDyneVec; out xValues: DblDyneMat;
out yValues: DblDyneVec; out ABadRows: IntDyneVec): Boolean;
procedure Process_Regression(
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec; const ABadRows: IntDyneVec);
procedure WriteMeanVarStddevReport(AReport: TStrings; AVarNames: StrDyneVec;
const AMeans, AVars, AStdDevs: DblDyneVec; Flags: Integer);
procedure WriteReportHeader(AReport: TStrings; AVarNames: StrDyneVec);
protected
procedure AdjustConstraints; override;
procedure Compute; override;
procedure UpdateBtnStates; override;
function Validate(out AMsg: String; out AControl: TWinControl): Boolean; override;
public
constructor Create(AOwner: TComponent); override;
procedure Reset; override;
end;
var
LSMregForm: TLSMregForm;
implementation
{$R *.lfm}
uses
Math, StrUtils,
Utils, GridProcs, MathUnit, MatrixUnit;
{ TLSMregForm }
constructor TLSMregForm.Create(AOwner: TComponent);
begin
inherited;
FRegressionFrame := TReportFrame.Create(self);
FRegressionFrame.Name := '';
FRegressionFrame.Parent := RegressionPage;
FRegressionFrame.Align := alClient;
FRegressionFrame.BorderSpacing.Left := 0;
FRegressionFrame.BorderSpacing.Top := 0;
FRegressionFrame.BorderSpacing.Bottom := 0;
FRegressionFrame.BorderSpacing.Right := 0;
InitToolbar(FRegressionFrame.ReportToolbar, tpRight);
FAnovaFrame := TReportFrame.Create(self);
FAnovaFrame.Name := '';
FAnovaFrame.Parent := AnovaPage;
FAnovaFrame.Align := alClient;
FAnovaFrame.BorderSpacing.Left := 0;
FAnovaFrame.BorderSpacing.Top := 0;
FAnovaFrame.BorderSpacing.Bottom := 0;
FAnovaFrame.BorderSpacing.Right := 0;
InitToolbar(FAnovaFrame.ReportToolbar, tpRight);
FCrossProductsFrame := TReportFrame.Create(self);
FCrossProductsFrame.Name := '';
FCrossProductsFrame.Parent := CrossProductsPage;
FCrossProductsFrame.Align := alClient;
FCrossProductsFrame.BorderSpacing.Left := 0;
FCrossProductsFrame.BorderSpacing.Top := 0;
FCrossProductsFrame.BorderSpacing.Bottom := 0;
FCrossProductsFrame.BorderSpacing.Right := 0;
InitToolbar(FCrossProductsFrame.ReportToolbar, tpRight);
FCorrelationsFrame := TReportFrame.Create(self);
FCorrelationsFrame.Name := '';
FCorrelationsFrame.Parent := CorrelationsPage;
FCorrelationsFrame.Align := alClient;
FCorrelationsFrame.BorderSpacing.Left := 0;
FCorrelationsFrame.BorderSpacing.Top := 0;
FCorrelationsFrame.BorderSpacing.Bottom := 0;
FCorrelationsFrame.BorderSpacing.Right := 0;
InitToolbar(FCorrelationsFrame.ReportToolbar, tpRight);
FVarCovarFrame := TReportFrame.Create(self);
FVarCovarFrame.Name := '';
FVarCovarFrame.Parent := VarCovarPage;
FVarCovarFrame.Align := alClient;
FVarCovarFrame.BorderSpacing.Left := 0;
FVarCovarFrame.BorderSpacing.Top := 0;
FVarCovarFrame.BorderSpacing.Bottom := 0;
FVarCovarFrame.BorderSpacing.Right := 0;
InitToolbar(FVarCovarFrame.ReportToolbar, tpRight);
FMeanVarStddevFrame := TReportFrame.Create(self);
FMeanVarStddevFrame.Name := '';
FMeanVarStddevFrame.Parent := MeanVarStdDevPage;
FMeanVarStddevFrame.Align := alClient;
FMeanVarStddevFrame.BorderSpacing.Left := 0;
FMeanVarStddevFrame.BorderSpacing.Top := 0;
FMeanVarStddevFrame.BorderSpacing.Bottom := 0;
FMeanVarStddevFrame.BorderSpacing.Right := 0;
InitToolbar(FMeanVarStddevFrame.ReportToolbar, tpRight);
PageControl.ActivePage := RegressionPage;
end;
procedure TLSMregForm.AdjustConstraints;
begin
ParamsPanel.Constraints.MinWidth := Max(
OptionsGroup.Width,
4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left
);
ParamsPanel.Constraints.MinHeight := AllBtn.Top + AllBtn.Height +
VarList.BorderSpacing.Bottom + InProbEdit.Height +
OptionsGroup.BorderSpacing.Top + OptionsGroup.Height +
ButtonBevel.Height + CloseBtn.BorderSpacing.Top + CloseBtn.Height;
end;
procedure TLSMregForm.AllBtnClick(Sender: TObject);
var
index: integer;
begin
for index := 0 to VarList.Items.Count-1 do
IndepVars.Items.Add(VarList.Items.Strings[index]);
VarList.Clear;
UpdateBtnStates;
end;
procedure TLSMRegForm.Compute;
var
indepCols: IntDyneVec = nil;
RowLabels: StrDyneVec = nil;
xValues: DblDyneMat = nil;
yValues: DblDyneVec = nil;
badRows: IntDyneVec = nil;
depCol: Integer;
begin
if PrepareData(indepCols, depCol, RowLabels, xValues, yValues, badRows) then
Process_Regression(RowLabels, xValues, yValues, badRows);
end;
procedure TLSMregForm.DepInBtnClick(Sender: TObject);
var
index: integer;
begin
index := VarList.ItemIndex;
if (index > -1) and (DepVarEdit.Text = '') then
begin
DepVarEdit.Text := VarList.Items[index];
VarList.Items.Delete(index);
end;
UpdateBtnStates;
end;
procedure TLSMregForm.DepOutBtnClick(Sender: TObject);
begin
if DepVarEdit.Text <> '' then
begin
VarList.Items.Add(DepVarEdit.Text);
DepVarEdit.Text := '';
end;
UpdateBtnStates;
end;
procedure TLSMregForm.InBtnClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while i < VarList.Items.Count do
begin
if VarList.Selected[i] then
begin
IndepVars.Items.Add(VarList.Items[i]);
VarList.Items.Delete(i);
i := 0;
end else
inc(i);
end;
UpdateBtnStates;
end;
procedure TLSMregForm.IndepVarsDblClick(Sender: TObject);
var
index: Integer;
begin
index := IndepVars.ItemIndex;
if index > -1 then begin
VarList.Items.Add(IndepVars.Items[index]);
IndepVars.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure TLSMregForm.HideTabs;
var
i: Integer;
begin
for i := 1 to PageControl.PageCount-1 do // i=1 --> keep 1st page visible
PageControl.Pages[i].TabVisible := false;
end;
procedure TLSMregForm.OutBtnClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while i < IndepVars.Items.Count do
begin
if IndepVars.Selected[i] then
begin
VarList.Items.Add(IndepVars.Items[i]);
IndepVars.Items.Delete(i);
i := 0;
end else
inc(i);
end;
UpdateBtnStates;
end;
procedure TLSMregForm.PredictionToGrid(xData: DblDyneMat; yData: DblDyneVec;
const ARegressionResults: TMultipleRegressionResults; ABadRows: IntDyneVec);
var
zPred, zResid, rawPred, rawResid, stdErrPred, hi95, lo95: DblDyneVec;
begin
PredictMR(xData, yData, ARegressionResults,
zPred, zResid, RawPred, RawResid, StdErrPred, Hi95, Lo95);
AddVariable('Pred.z', zPred, '%8.4f', ABadRows);
AddVariable('z Resid', zResid, '%8.4f', ABadRows);
AddVariable('Raw Pred', rawPred, '%8.3f', ABadRows);
AddVariable('Raw Resid', rawResid, '%8.3f', ABadRows);
AddVariable('StdErr Pred', stdErrPred, '%8.3f', ABadRows);
AddVariable('Low 95%', lo95, '%8.3f', ABadRows);
AddVariable('Top 95%', hi95, '%8.3f', ABadRows);
end;
{ Prepares the data for the analysis by extracting all needed data from the
grid:
- AIndepCols: integer array containing the grid column indexes of the
independent variables to be used
- ADepCol: grid column index of the dependent variable to be used
- ARowLabels: string array containing the names of the independent variables
as well of the dependent variable (last)
- xValues: matrix with all independent values. The columns of the matrix
correspond to the variables, the row correspond to the cases.
- yValues: vector with the dependent variable values
}
function TLSMregForm.PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out ARowLabels: StrDyneVec; out xValues: DblDyneMat; out yValues: DblDyneVec;
out ABadRows: IntDyneVec): Boolean;
var
i, n: Integer;
msg: String;
C: TWinControl;
numIndepCols: Integer;
cols: IntDyneVec = nil;
begin
Result := false;
AIndepCols := nil;
ARowLabels := nil;
xValues := nil;
yvalues := nil;
ABadRows := nil;
if not Validate(msg, C) then
begin
C.SetFocus;
ErrorMsg(msg);
exit;
end;
numIndepCols := IndepVars.Items.Count;
ADepCol := GetVariableIndex(OS3MainFrm.DataGrid, DepVarEdit.Text);
SetLength(AIndepCols, numIndepCols);
SetLength(ARowLabels, numIndepCols + 1); // +1 to add independent column label
for i := 0 to numIndepCols-1 do
begin
AIndepCols[i] := GetVariableIndex(OS3MainFrm.DataGrid, IndepVars.Items[i]);
if AIndepCols[i] = -1 then
begin
ErrorMsg('Dependent variable %s not found.', [IndepVars.Items[i]]);
exit;
end;
ARowLabels[i] := IndepVars.Items[i];
end;
ARowLabels[numIndepCols] := DepVarEdit.Text;
// Check variable types: all of them must be numeric (float or integer)
if not IsNumericCol(ADepCol) then
begin
ErrorMsg('Incorrect data type of dependent variable.');
exit;
end;
for i := 0 to numIndepCols-1 do
if not IsNumericCol(AIndepCols[i]) then
begin
ErrorMsg('Incorrect data type of independent variable "%s"', [ARowLabels[i]]);
exit;
end;
// Prepare list of all column indices to be loaded: x, y
// ADepCol will follow immediately after the x columns.
SetLength(cols, NumIndepCols + 1);
cols[numIndepCols] := ADepCol;
for i := 0 to numIndepCols-1 do cols[i] := AIndepCols[i];
// Determine list of indices of rows containing invalid entries.
SetLength(ABadRows, OS3MainFrm.DataGrid.RowCount);
n := 0;
for i := 1 to OS3MainFrm.DataGrid.RowCount-1 do
if not GoodRecord(OS3MainFrm.DataGrid, i, cols) then
begin
ABadRows[n] := i;
inc(n);
end;
SetLength(ABadRows, n);
// Extract data values; take care to skip invalid values in both x and y
xValues := CollectMatValues(OS3MainFrm.DataGrid, cols);
// The y column has index numIndepCols, i.e. follows after the x columns.
yValues := MatColVector(xValues, numIndepCols);
MatColDelete(xValues, numIndepCols);
Result := true;
end;
{ Runs the least squares regression on the data in xValues and yValues }
procedure TLSMregForm.Process_Regression(const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec;
const ABadRows: IntDyneVec);
var
lReport: TStrings;
regressionRes: TMultipleRegressionResults;
n: Integer;
confLevel: Double;
err: TRegressionError;
flags: Integer;
means: DblDyneVec = nil;
vars: DblDyneVec = nil;
stdDevs: DblDyneVec = nil;
begin
confLevel := 1.0 - StrToFloat(InProbEdit.Text);
err := MultipleRegression(xValues, yValues, confLevel, regressionRes);
case err of
regOK : ;
regTooFewValues : ErrorMsg('At least two values required for regression.');
end;
if err <> regOK then
exit;
// Calculate means, variances, stddevs of all required variables and put them
// in vectors means, vars, stddevs, y data at end.
MatColMeanVarStdDev(xValues, means, vars, stdDevs);
n := Length(means);
SetLength(means, n+1);
SetLength(vars, n+1);
SetLength(stddevs, n+1);
VecMeanVarStdDev(yValues, means[n], vars[n], stddevs[n]);
lReport := TStringList.Create;
try
WriteReportHeader(lReport, ARowLabels);
regressionRes.WriteCoeffsReport(lReport, ARowLabels);
FRegressionFrame.DisplayReport(lReport);
if AnovaChk.Checked then
begin
lReport.Clear;
WriteReportHeader(lReport, ARowLabels);
regressionRes.WriteAnovaReport(lReport);
FAnovaFrame.DisplayReport(lReport);
end;
AnovaPage.TabVisible := AnovaChk.Checked;
if CrossProductsChk.Checked then
begin
lReport.Clear;
WriteReportHeader(lReport, ARowLabels);
regressionRes.WriteCrossProductsReport(lReport, ARowLabels);
FCrossProductsFrame.DisplayReport(lReport);
end;
CrossProductsPage.TabVisible := CrossProductsChk.Checked;
if CovChk.Checked then
begin
lReport.Clear;
WriteReportHeader(lReport, ARowLabels);
regressionRes.WriteVarCovarReport(lReport, ARowLabels);
FVarCovarFrame.DisplayReport(lReport);
end;
VarCovarPage.TabVisible := CovChk.Checked;
if CorrsChk.Checked then
begin
lReport.Clear;
WriteReportHeader(lReport, ARowLabels);
regressionRes.WriteCorrelationReport(lReport, ARowLabels);
FCorrelationsFrame.DisplayReport(lReport);
end;
CorrelationsPage.TabVisible := CorrsChk.Checked;
if MeansChk.Checked or VarChk.Checked or StdDevChk.Checked then
begin
lReport.Clear;
WriteReportHeader(lReport, ARowLabels);
flags := 0;
if MeansChk.Checked then inc(flags, 1);
if VarChk.Checked then inc(flags, 2);
if StdDevChk.Checked then inc(flags, 4);
WriteMeanVarStddevReport(lReport, ARowLabels, means, vars, stdDevs, flags);
FMeanVarStdDevFrame.displayReport(lReport);
end;
MeanVarStdDevPage.TabVisible := MeansChk.Checked or VarChk.Checked or StdDevChk.Checked;
if PredictChk.Checked then
PredictionToGrid(xValues, yValues, regressionRes, ABadRows);
if MatSaveChk.Checked then
begin
Application.ProcessMessages;
with TSaveDialog.Create(nil) do
try
Filter := 'LazStats matrix files (*.mat)|*.mat;*.MAT|All files (*.*)|*.*';
FilterIndex := 1;
if Execute then
begin
n := Length(means);
MatSave(RegressionRes.Correlations, n-1, n-1, means, stdDevs, regressionRes.NumCases, ARowLabels, ARowLabels, Filename);
end;
finally
Free;
end;
MatSaveChk.Checked := false;
end;
finally
lReport.Free;
end;
end;
procedure TLSMregForm.Reset;
var
i: integer;
begin
inherited;
IndepVars.Items.Clear;
NoBlocks := 1;
VarList.Items.Clear;
for i := 1 to NoVariables do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]);
AnovaChk.Checked := true;
CrossProductsChk.Checked := false;
CovChk.Checked := true;
CorrsChk.Checked := true;
MeansChk.Checked := true;
VarChk.Checked := false;
StdDevChk.Checked := true;
MatSaveChk.Checked := false;
PredictChk.Checked := false;
NoVars := 0;
DepVarEdit.Text := '';
InProbEdit.Text := FormatFloat('0.00', DEFAULT_ALPHA_LEVEL);
SetLength(IndepVarsCols, NoVariables+1);
HideTabs;
UpdateBtnStates;
end;
procedure TLSMregForm.UpdateBtnStates;
var
lSelected: Boolean;
begin
inherited;
if Assigned(FRegressionFrame) then
FRegressionFrame.UpdateBtnStates;
if Assigned(FAnovaFrame) then
FAnovaFrame.UpdateBtnStates;
if Assigned(FCrossProductsFrame) then
FCrossProductsFrame.UpdateBtnStates;
if Assigned(FCorrelationsFrame) then
FCorrelationsFrame.UpdateBtnStates;
if Assigned(FVarCovarFrame) then
FVarCovarFrame.UpdateBtnStates;
if Assigned(FMeanVarStddevFrame) then
FMeanVarStddevFrame.UpdateBtnStates;
lSelected := AnySelected(VarList);
DepInBtn.Enabled := lSelected;
InBtn.Enabled := lSelected;
OutBtn.Enabled := AnySelected(IndepVars);
DepOutBtn.Enabled := DepVarEdit.Text <> '';
AllBtn.Enabled := VarList.Items.Count > 0;
end;
function TLSMregForm.Validate(out AMsg: String; out AControl: TWinControl): Boolean;
var
x: double;
begin
Result := false;
if DepVarEdit.Text = '' then
begin
AControl := DepVarEdit;
AMsg := 'No dependent variable selected.';
exit;
end;
if IndepVars.Items.Count = 0 then
begin
AControl := IndepVars;
AMsg := 'No independent variables selected.';
exit;
end;
if InProbEdit.Text = '' then
begin
AControl := InProbEdit;
AMsg := 'This field cannot be empty.';
exit;
end;
if not TryStrToFloat(InProbEdit.Text, x) then
begin
AControl := InProbEdit;
AMsg := 'Non-numeric value.';
exit;
end;
Result := true;
end;
procedure TLSMregForm.VarListDblClick(Sender: TObject);
var
index: Integer;
begin
index := VarList.ItemIndex;
if index > -1 then
begin
if DepVarEdit.Text = '' then
DepVarEdit.Text := VarList.Items[index]
else
IndepVars.Items.Add(VarList.Items[index]);
VarList.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure TLSMregForm.VarListSelectionChange(Sender: TObject; User: boolean);
begin
UpdateBtnStates;
end;
{ Flags = 1 ... mean; Flags = 2 ... variacne; Flags = 4 = stdDev }
procedure TLSMRegForm.WriteMeanVarStddevReport(AReport: TStrings;
AVarNames: StrDyneVec; const AMeans, AVars, AStdDevs: DblDyneVec;
Flags: Integer);
const
W = 15;
SPACE = ' ';
MASK = SPACE + '%*.3f';
var
s, sL, sLL: String;
i, n: Integer;
begin
s := '';
if Flags and 1 <> 0 then
s := s + 'MEANS, ';
if Flags and 2 <> 0 then
s := s + 'VARIANCES, ';
if Flags and 4 <> 0 then
s := s + 'STANDARD DEVIATIONS, ';
SetLength(s, Length(s)-2); // remove training ', '
//Caption
AReport.Add(s);
n := 1;
s := CenterString('Variable', W);
sL := DupeString('-', W);
if Flags and 1 <> 0 then
begin
s := s + SPACE + CenterString('Mean', W);
sL := sL + SPACE + Dupestring('-', W);
inc(n);
end;
if Flags and 2 <> 0 then
begin
s := s + SPACE + CenterString('Variance', W);
sL := sL + SPACE + Dupestring('-', W);
inc(n);
end;
if Flags and 4 <> 0 then
begin
s := s + SPACE + CenterString('Std.Deviation', W);
sL := sL + SPACE + Dupestring('-', W);
inc(n);
end;
// Divider below caption
sLL := DupeString('-', n*W + (n-1) * Length(SPACE));
AReport.Add(sLL);
// Table headers
AReport.Add(s);
// Table header separating line
AReport.Add(sL);
// Table cells
n := Length(AMeans);
for i := 0 to n-1 do
begin
s := Format('%*s', [W, AVarNames[i]]);
if Flags and 1 <> 0 then
s := s + Format(MASK, [W, AMeans[i]]);
if Flags and 2 <> 0 then
s := s + Format(MASK, [W, AVars[i]]);
if Flags and 4 <> 0 then
s := s + Format(MASK, [W, AStdDevs[i]]);
AReport.Add(s);
if i = n-2 then
AReport.Add(sL);
end;
// Final dividing line below table
AReport.Add(sLL);
end;
procedure TLSMRegForm.WriteReportHeader(AReport: TStrings; AVarNames: StrDyneVec);
var
i, n: Integer;
begin
n := Length(AVarNames);
AReport.Clear;
AReport.Add('LEAST SQUARES REGRESSION RESULTS');
AReport.Add('');
AReport.Add('Dependent variable: ');
AReport.Add(' ' + AVarNames[n-1]);
AReport.Add('Independent variables:');
for i := 0 to n-2 do
AReport.Add(' ' + AVarNames[i]);
AReport.Add('');
end;
end.