Files
lazarus-ccr/applications/lazstats/source/forms/analysis/multiple_regression/wlsunit.pas

1223 lines
35 KiB
ObjectPascal
Raw Normal View History

unit WLSUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs,
StdCtrls, Buttons, ExtCtrls, ComCtrls,
Globals, MainUnit, DictionaryUnit, Matrixlib, DataProcs,
RegressionUnit, ReportFrameUnit, ChartFrameUnit, BasicStatsParamsFormUnit;
type
{ TWLSFrm }
TWLSFrm = class(TBasicStatsParamsForm)
DepInBtn: TBitBtn;
DepOutBtn: TBitBtn;
IndInBtn: TBitBtn;
IndOutBtn: TBitBtn;
PageControl: TPageControl;
OLSPage: TTabSheet;
ResRegPageControl: TPageControl;
ResidualsRegPage: TTabSheet;
ResRegReportPage: TTabSheet;
UserWeightsChk: TRadioButton;
WeightChk: TRadioButton;
WLSPage: TTabSheet;
WeightInBtn: TBitBtn;
WeightOutBtn: TBitBtn;
SaveChk: TCheckBox;
OriginChk: TCheckBox;
Origin2Chk: TCheckBox;
DepVarEdit: TEdit;
WeightVarEdit: TEdit;
OptionsGroup: TGroupBox;
Label1: TLabel;
Label2: TLabel;
Label3: TLabel;
Label4: TLabel;
IndVarList: TListBox;
VarList: TListBox;
procedure DepInBtnClick(Sender: TObject);
procedure DepOutBtnClick(Sender: TObject);
procedure IndInBtnClick(Sender: TObject);
procedure IndOutBtnClick(Sender: TObject);
procedure IndVarListDblClick(Sender: TObject);
procedure UserWeightsChkChange(Sender: TObject);
procedure VarListDblClick(Sender: TObject);
procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean);
procedure WeightInBtnClick(Sender: TObject);
procedure WeightOutBtnClick(Sender: TObject);
private
OLSReportFrame: TReportFrame;
ResidualsRegReportFrame: TReportFrame;
WLSReportFrame: TReportFrame;
procedure AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String);
procedure AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec);
procedure CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec;
out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec);
procedure CreateOrGetChartFrame(AColIndex: Integer; AVarName: String;
out AMemo: TMemo; out AChartFrame: TChartFrame);
function GetPageCaption(AVarName: String): String;
procedure PlotSquaredResiduals(AIndepCols: IntDyneVec; ADepCol: Integer;
const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec);
procedure PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec;
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
procedure Predict(const xData: DblDyneMat; const yData: DblDyneVec;
ARegressionResults: TMultipleRegressionResults);
function PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out AWeightCol: Integer; out ARowLabels: StrDyneVec;
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean;
function Process_OLSRegression(AIndepCols: IntDyneVec; ADepCol: Integer;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec): Boolean;
function Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
out AWeights: DblDyneVec): Boolean;
function Process_WeightedRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec; const AWeights: DblDyneVec;
SubtractMeans: Boolean): Boolean;
function RegressionAndReport(const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec;
out ARegressionResults: TMultipleRegressionResults; AReport: TStrings): Boolean;
procedure WriteDescriptiveReport(AMemo: TMemo;
const ARegressionResults: TBivariateRegressionResults;
const XLabel, YLabel: String);
protected
procedure AdjustConstraints; override;
procedure Compute; override;
procedure UpdateBtnStates; override;
function Validate(out AMsg: String; out AControl: TWinControl): Boolean; override;
public
constructor Create(AOwner: TComponent); override;
procedure Reset; override;
end;
var
WLSFrm: TWLSFrm;
implementation
{$R *.lfm}
uses
Math,
TAChartUtils, TAChartAxisUtils, TALegend, TASources, TACustomSeries,
Utils, MatrixUnit, GridProcs;
const
CONF_LEVEL = DEFAULT_CONFIDENCE_LEVEL_PERCENT / 100.0;
{ TWLSFrm }
constructor TWLSFrm.Create(AOwner: TComponent);
begin
inherited;
if DictionaryFrm = nil then Application.CreateForm(TDictionaryFrm, DictionaryFrm);
OLSReportFrame := TReportFrame.Create(self);
OLSReportFrame.Name := '';
OLSReportFrame.Parent := OLSPage;
OLSReportFrame.Align := alClient;
OLSReportFrame.BorderSpacing.Left := 0;
OLSReportFrame.BorderSpacing.Top := 0;
OLSReportFrame.BorderSpacing.Bottom := 0;
OLSReportFrame.BorderSpacing.Right := 0;
InitToolbar(OLSReportFrame.ReportToolbar, tpRight);
ResidualsRegReportFrame := TReportFrame.Create(self);
ResidualsRegReportFrame.Name := '';
ResidualsRegReportFrame.Parent := ResRegReportPage;
ResidualsRegReportFrame.Align := alClient;
ResidualsRegReportFrame.BorderSpacing.Left := 0;
ResidualsRegReportFrame.BorderSpacing.Top := 0;
ResidualsRegReportFrame.BorderSpacing.Bottom := 0;
ResidualsRegReportFrame.BorderSpacing.Right := 0;
InitToolbar(ResidualsRegReportFrame.ReportToolbar, tpRight);
WLSReportFrame := TReportFrame.Create(self);
WLSReportFrame.Name := '';
WLSReportFrame.Parent := WLSPage;
WLSReportFrame.Align := alClient;
WLSReportFrame.BorderSpacing.Left := 0;
WLSReportFrame.BorderSpacing.Top := 0;
WLSReportFrame.BorderSpacing.Bottom := 0;
WLSReportFrame.BorderSpacing.Right := 0;
InitToolbar(WLSReportFrame.ReportToolbar, tpRight);
PageControl.ActivePageIndex := 0;
end;
{ Adds a new variable names AColTitle after the last grid column,
and writes the specified data to the grid (in the specified number format). }
procedure TWLSFrm.AddVariable(AVarName: String; AData: DblDyneVec; ANumFormat: String);
var
i, colIndex: Integer;
begin
colIndex := GetVariableIndex(OS3MainFrm.DataGrid, AVarname);
if colIndex = -1 then
begin
colIndex := NoVariables + 1;
DictionaryFrm.NewVar(colIndex);
DictionaryFrm.DictGrid.Cells[1, colIndex] := AVarName;
DictionaryFrm.DictGrid.Cells[7, colIndex] := 'R';
OS3MainFrm.DataGrid.Cells[colIndex, 0] := AVarName;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
end;
for i := 0 to High(AData) do
OS3MainFrm.DataGrid.Cells[colIndex, i+1] := Format(ANumFormat, [AData[i]]);
end;
{ Calculate predicted squared residuals and save recipricols to grid as weights }
procedure TWLSFrm.AddWeightsToGrid(const ASqrPredictedResiduals, AWeights: DblDyneVec);
begin
// Create new variables and add to grid
AddVariable('Pred SqrResid', ASqrPredictedResiduals, '%.3f');
AddVariable('WEIGHTS', AWeights, '%.3f');
end;
{ Calculate predicted values of the squared residuals, as well as the weights }
procedure TWLSFrm.CalcWeights(xValues: DblDyneMat; ACoeffs: DblDyneVec;
out ASquaredPredictedResiduals: DblDyneVec; out AWeights: DblDyneVec);
var
i, j, n, m: Integer;
sum: Double;
begin
ASquaredPredictedResiduals := nil;
AWeights := nil;
MatSize(xValues, n,m);
SetLength(ASquaredPredictedResiduals, n);
SetLength(AWeights, n);
sum := 0;
for i := 0 to n-1 do
begin
ASquaredPredictedResiduals[i] := ACoeffs[m]; // intercept value
for j := 0 to m-1 do
ASquaredPredictedResiduals[i] += abs(xValues[i, j] * ACoeffs[j]);
if ASquaredPredictedResiduals[i] <> 0 then
AWeights[i] := 1 / ASquaredPredictedResiduals[i]
else
AWeights[i] := 0;
sum := sum + AWeights[i];
end;
// Normalize weights to 1.0
AWeights := AWeights * (1.0 / sum);
end;
procedure TWLSFrm.AdjustConstraints;
begin
ParamsPanel.Constraints.MinHeight := DepInBtn.Top + (IndOutBtn.Top - DepInBtn.Top)*2 + DepInBtn.Top +
OptionsGroup.Height + ButtonBevel.Height + CloseBtn.BorderSpacing.Top + CloseBtn.Height;
ParamsPanel.Constraints.MinWidth := Max(
4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left,
OptionsGroup.Width
);
end;
procedure TWLSFrm.Compute;
var
i, j, noIndep, NCases, pos, col: integer;
X, weight: double;
Means: DblDyneVec = nil;
Variances: DblDyneVec = nil;
StdDevs: DblDyneVec = nil;
BWeights: DblDyneVec = nil;
BetaWeights: DblDyneVec = nil;
BStdErrs: DblDyneVec = nil;
BtTests: DblDyneVec = nil;
tProbs: DblDyneVec = nil;
lReport: TStrings;
StdErrEst: Double = 0.0;
R2: Double = 0.0;
errorcode: Boolean = false;
PrintDesc: boolean = true;
indepCols: IntDyneVec = nil;
rowLabels: StrDyneVec = nil;
weights: DblDyneVec = nil;
xValues: DblDyneMat = nil;
yValues: DblDyneVec = nil;
depCol: Integer;
weightCol: Integer = -1;
useOrigin: Boolean;
begin
SetLength(Means, NoVariables + 2);
SetLength(Variances, NoVariables + 2);
SetLength(StdDevs, NoVariables + 2);
SetLength(BWeights, NoVariables + 2); // do not remove!
SetLength(BetaWeights, NoVariables + 2);
SetLength(BStdErrs, NoVariables + 2);
SetLength(Bttests, NoVariables + 2);
SetLength(tprobs, NoVariables + 2);
lReport := TStringList.Create;
try
NCases := NoCases;
// Get column indexes and do some validation checks.
if not PrepareData(indepCols, depCol, weightCol, RowLabels, xValues, yValues) then
exit;
// Do the OLS regression
if not Process_OLSRegression(indepCols, depCol, RowLabels, xValues, yValues) then
exit;
// Regress the squared residuals on the predictors
if WeightChk.Checked then
begin
if not Process_SquaredResidualsRegression(indepCols, RowLabels, xValues, weights) then
exit;
useOrigin := OriginChk.Checked;
end else
begin
// Read the weights from the user column
weights := CollectVecValues(OS3MainFrm.DataGrid, weightCol, indepCols);
useOrigin := Origin2Chk.Checked;
end;
// Do the weighted regression, finally
Process_WeightedRegression(indepCols, RowLabels, xValues, yValues, weights, useOrigin);
exit;
if WeightChk.Checked then
begin
// Weight variables and do OLS regression on weighted variables
// DepCol := olddepcol;
IndepCols[Noindep] := DepCol;
for i := 1 to NoCases do
begin
weight := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[NoVariables,i]));
for j := 0 to Noindep do
begin
pos := IndepCols[j];
X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos,i]));
X := X * weight;
OS3MainFrm.DataGrid.Cells[pos, i] := FloatToStr(X); // wp: DON'T OVERWRITE GRID CELLS
end;
end;
// get means of variables and subtract from the values
if OriginChk.Checked then
begin
for j := 0 to NoIndep do
begin
Means[j] := 0.0;
NCases := 0;
pos := IndepCols[j];
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
Means[j] := Means[j] + StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos,i]));
NCases := NCases + 1;
end;
end;
Means[j] := Means[j] / NCases;
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos,i]));
X := X - Means[j];
OS3MainFrm.DataGrid.Cells[pos,i] := FloatToStr(X); // wp: DON'T OVERWRITE GRID DATA!
end;
end; // next i
end; // next j
end; // if origin checked
lReport.Clear;
lReport.Add('WEIGHTED LEAST SQUARES (WLS) REGRESSION RESULTS');
lReport.Add('');
MReg(Noindep, IndepCols, DepCol, RowLabels, Means, Variances, StdDevs,
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
NCases, errorcode, PrintDesc, lReport);
WLSReportFrame.DisplayReport(lReport);
lReport.Clear;
end; // if weightschk checked
// use the weights entered by the user
if UserWeightsChk.Checked then
begin
// Weight variables and do OLS regression on weighted variables
// depCol := olddepcol;
indepCols[Noindep] := depCol; // wp: CALCULATION SHOULD NORMALIZE USER WEIGHTS HERE !!!
for i := 1 to NoCases do
begin
weight := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[weightCol, i]));
for j := 0 to Noindep do
begin
pos := indepCols[j];
X := StrToFloat(OS3MainFrm.DataGrid.Cells[pos,i]);
X := X * weight;
OS3MainFrm.DataGrid.Cells[pos, i] := FloatToStr(X);
end;
end;
if Origin2Chk.Checked then // get means of variables and subtract from the values
begin
for j := 0 to Noindep do
begin
Means[j] := 0.0;
NCases := 0;
pos := IndepCols[j];
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
Means[j] := Means[j] + StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[pos, i]));
NCases := NCases + 1;
end;
end;
Means[j] := Means[j] / NCases;
for i := 1 to NoCases do
begin
if (DataProcs.ValidValue(i,pos)) then
begin
X := StrToFloat(OS3MainFrm.DataGrid.Cells[pos,i]);
X := X - Means[j];
OS3MainFrm.DataGrid.Cells[pos,i] := FloatToStr(X); // wp: DON'T OVERWRITE GRID DATA!
end;
end; // next i
end; // next j
end; // if origin checked
lReport.Clear;
lReport.Add('WEIGHTED LEAST SQUARES (WLS) REGRESSION RESULTS');
lReport.Add('');
MReg(Noindep, IndepCols, DepCol, RowLabels, Means, Variances, StdDevs,
BWeights, BetaWeights, BStdErrs, Bttests, tprobs, R2, stdErrEst,
NCases, errorcode, PrintDesc, lReport);
WLSReportFrame.DisplayReport(lReport);
lReport.Clear;
end;
finally
lReport.Free;
end;
end;
procedure TWLSFrm.CreateOrGetChartFrame(AColIndex: Integer; AVarName: String;
out AMemo: TMemo; out AChartFrame: TChartFrame);
var
sheetTitle: String;
tabSheet: TTabSheet;
i: Integer;
begin
sheetTitle := GetPageCaption(AVarName);
// Find existing sheet first.
for i := 1 to ResRegPageControl.PageCount-1 do
if ResRegPageControl.Pages[i].Caption = sheetTitle then begin
tabSheet := ResRegPageControl.Pages[i];
AChartFrame := tabSheet.Controls[0] as TChartFrame;
AMemo := tabSheet.Controls[1] as TMemo;
exit;
end;
// Not found: create new sheet ...
tabSheet := ResRegPageControl.AddTabSheet;
tabSheet.Caption := sheetTitle;
tabSheet.Tag := AColIndex;
// ... and add ChartFrame
AChartFrame := TChartFrame.Create(tabSheet);
AChartFrame.Parent := tabSheet;
AChartFrame.Align := alClient;
AChartFrame.Chart.Legend.Alignment := laBottomCenter;
AChartFrame.Chart.Legend.ColumnCount := 3;
AChartFrame.Chart.Legend.TextFormat := tfHTML;
AChartFrame.Chart.BottomAxis.Intervals.MaxLength := 80;
AChartFrame.Chart.BottomAxis.Intervals.MinLength := 30;
with AChartFrame.Chart.AxisList.Add do
begin
Alignment := calRight;
Marks.Source := TListChartSource.Create(self);
Marks.Style := smsLabel;
Grid.Visible := false;
TickColor := clNone;
end;
with AChartFrame.Chart.AxisList.Add do
begin
Alignment := calTop;
Marks.Source := TListChartSource.Create(self);
Marks.Style := smsLabel;
Grid.Visible := false;
TickColor := clNone;
end;
// ... and add memo
AMemo := TMemo.Create(tabSheet);
AMemo.Parent := tabSheet;
AMemo.Align := alBottom;
AMemo.BorderStyle := bsNone;
AMemo.Font.Name := 'Courier New';
AMemo.Font.Size := 8;
AMemo.ReadOnly := true;
AMemo.Scrollbars := ssAutoBoth;
AMemo.WordWrap := false;
// ... and splitter
with TSplitter.Create(tabSheet) do
begin
Parent := tabSheet;
Align := alBottom;
end;
end;
procedure TWLSFrm.DepInBtnClick(Sender: TObject);
var
index: integer;
begin
index := VarList.ItemIndex;
if (index > -1) and (DepVarEdit.Text = '') then
begin
DepVarEdit.Text := VarList.Items[index];
VarList.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure TWLSFrm.DepOutBtnClick(Sender: TObject);
begin
if (DepVarEdit.Text <> '') then
begin
VarList.Items.Add(DepVarEdit.Text);
DepVarEdit.Text := '';
UpdateBtnStates;
end;
end;
function TWLSFrm.GetPageCaption(AVarName: String): String;
begin
Result := 'Plot of ' + AVarName;
end;
procedure TWLSFrm.IndInBtnClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while (i < VarList.Items.Count) do
begin
if (VarList.Selected[i]) then
begin
IndVarList.Items.Add(VarList.Items[i]);
VarList.Items.Delete(i);
i := 0;
end else
inc(i);
end;
UpdateBtnStates;
end;
procedure TWLSFrm.IndOutBtnClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while (i < IndVarList.Items.Count) do
begin
if IndVarlist.Selected[i] then
begin
VarList.Items.Add(IndVarList.Items[i]);
IndVarlist.Items.Delete(i);
i := 0;
end else
inc(i);
end;
UpdateBtnStates;
end;
procedure TWLSFrm.IndVarListDblClick(Sender: TObject);
var
index: Integer;
begin
index := IndVarList.ItemIndex;
if index > -1 then
begin
VarList.Items.Add(IndVarList.Items[index]);
IndVarlist.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure TWLSFrm.PlotSquaredResiduals(AIndepCols: IntDyneVec;
ADepCol: Integer; const AIndepValues: DblDyneMat; const ADepValues: DblDyneVec);
var
x, y: DblDyneVec;
i, xCol, yCol: Integer;
regressionRes: TBivariateRegressionResults;
memo: TMemo;
chartFrame: TChartFrame;
xLabel, yLabel: String;
numIndepCols: Integer;
begin
// We will plot the selected vector of the independent values vertically,
// and the dependent values horizontally.
xCol := ADepCol;
x := VecCopy(ADepValues);
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
numIndepCols := Length(AIndepCols);
for i := 0 to numIndepCols-1 do
begin
yCol := AIndepCols[i];
yLabel := OS3MainFrm.DataGrid.Cells[yCol, 0];
y := MatColVector(AIndepValues, yCol-1);
SortOnX(x, y);
// Regression
BivariateRegression(x, y, CONF_LEVEL, regressionRes);
// Create tab with chart and report controls
CreateOrGetChartFrame(yCol-1, yLabel, memo, chartFrame); // -1 because yCol i is in grid units
// Plot
PlotXY(chartFrame, x, y, regressionRes, xLabel, yLabel);
// Print the descriptive statistics
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
end;
end;
(*
var
xCol, yCol: Integer;
xLabel, yLabel: String;
i: Integer;
colNoSelected: IntDyneVec = nil;
xPoints: DblDyneVec = nil;
yPoints: DblDyneVec = nil;
regressionRes: TBivariateRegressionResults;
memo: TMemo;
chartFrame: TChartFrame;
begin
SetLength(colNoSelected, 2);
xCol := ADepCol;
for i := 0 to ANumIndepCols-1 do
begin
// Get values
yCol := AIndepCols[i];
colNoSelected[0] := xCol;
colNoSelected[1] := yCol;
xLabel := OS3MainFrm.DataGrid.Cells[xCol, 0];
yLabel := OS3MainFrm.DataGrid.Cells[yCol, 0];
xPoints := CollectVecValues(OS3MainFrm.DataGrid, xCol, colNoSelected);
yPoints := CollectVecValues(OS3MainFrm.DataGrid, yCol, colNoSelected);
SortOnX(xPoints, yPoints);
// Regression
BivariateRegression(xPoints, yPoints, AConfLevel, regressionRes);
// Create tab with chart and report controls
CreateOrGetChartFrame(yCol, yLabel, memo, chartFrame);
// Plot
PlotXY(chartFrame, xPoints, yPoints, regressionRes, xLabel, yLabel);
// Print the descriptive statistics
WriteDescriptiveReport(memo, regressionRes, xLabel, yLabel);
end;
end;
*)
procedure TWLSFrm.PlotXY(AChartFrame: TChartFrame; const XPoints, YPoints: DblDyneVec;
const ARegressionResults: TBivariateRegressionResults; const XLabel, YLabel: String);
var
xpts: DblDyneVec = nil;
ypts: DblDyneVec = nil;
conf: DblDyneVec = nil;
ext: TDoubleRect;
i: Integer;
rightLabels, topLabels: TListChartSource;
ser: TChartSeries;
begin
rightLabels := AChartFrame.Chart.AxisList[2].Marks.Source as TListChartSource;
rightLabels.Clear;
topLabels := AChartFrame.Chart.AxisList[3].Marks.Source as TListChartSource;
topLabels.Clear;
AChartFrame.Clear;
with ARegressionResults do
AChartFrame.SetFooter(Format('R(X,Y) = %.3f, Slope = %.3f, Intercept = %.3f', [
R, Slope, Intercept
]));
// Data points
AChartFrame.SetXTitle(XLabel);
AChartFrame.SetYTitle(YLabel);
AChartFrame.PlotXY(ptSymbols, XPoints, YPoints, nil, nil, 'Data', DATA_COLORS[0]);
// Regression line
SetLength(xpts, 2);
SetLengtH(ypts, 2);
ext := AChartFrame.Chart.GetFullExtent;
xpts[0] := ext.a.x;
xpts[1] := ext.b.x;
with ARegressionResults do
begin
ypts[0] := Intercept + Slope * xpts[0];
ypts[1] := Intercept + Slope * xpts[1];
end;
AChartFrame.PlotXY(ptLines, xpts, ypts, nil, nil, 'Predicted', clBlack);
rightLabels.Add(ypts[1], ypts[1], 'Predicted');
// Upper ...
SetLength(conf, ARegressionResults.Count);
for i := 0 to High(conf) do
conf[i] := ARegressionResults.ConfidenceLimits(XPoints[i], true);
ser := AChartFrame.PlotXY(ptLines, XPoints, conf, nil, nil, 'Upper confidence band', clRed);
rightLabels.Add(ser.yValue[ser.Count-1], ser.yValue[ser.Count-1], 'UCL');
// ... and lower confidence limit curves
for i := 0 to High(conf) do
conf[i] := ARegressionResults.ConfidenceLimits(XPoints[i], false);
ser := AChartFrame.PlotXY(ptLines, XPoints, conf, nil, nil, 'Lower confidence band', clRed);
rightLabels.Add(ser.yValue[ser.Count-1], ser.yValue[ser.Count-1], 'LCL');
// Mean lines
with ARegressionResults do
begin
AChartFrame.VertLine(XMean, clGreen, psDashDot, 'Mean ' + XLabel);
topLabels.Add(XMean, XMean, 'Mean ' + XLabel);
AChartFrame.HorLine(YMean, clGreen, psDash, 'Mean ' + YLabel);
rightLabels.Add(YMean, YMean, 'Mean ' + YLabel);
end;
end;
{ Routine obtains predicted raw and standardized scores and their
residuals. It is assumed that the dependent variable is last in the
list of variable column pointers stored in the ColNoSelected vector.
Get the z predicted score and its residual }
procedure TWLSFrm.Predict(const xData: DblDyneMat; const yData: DblDyneVec;
ARegressionResults: TMultipleRegressionResults);
var
means, stddevs, variances: DblDyneVec;
i, j, n, m: Integer;
zPred: DblDyneVec = nil;
// zResid: DblDyneVec = nil;
rawPred: DblDyneVec = nil;
rawResid: DblDyneVec = nil;
sqrResid: DblDyneVec = nil;
begin
MatSize(xData, n, m);
MatColMeanVarStdDev(xData, means, variances, stddevs);
SetLength(zPred, n);
// SetLength(zResid, n);
SetLength(rawPred, n);
SetLength(rawResid, n);
SetLength(sqrResid, n);
for i := 0 to n-1 do
begin
zPred[i] := 0;
for j := 0 to m-1 do
zPred[i] := zPred[i] + (xData[i, j] - means[j]) / stdDevs[j] * ARegressionResults.Beta[j];
{
zResid[i] := (yData[i] - ARegressionResults.MeanY) / ARegressionResults.StdDevY;
w: THIS IS NOT CORRECT. Remove above line because it is not needed.
This is the code used by the original routine
if StdDevs[NoVars-1] <> 0.0 then
begin
Index := ColNoSelected[NoVars-1];
z2 := StrToFloat(OS3MainFrm.DataGrid.Cells[Index,i]);
z2 := (z2 - Means[NoVars-1]) / StdDevs[NoVars-1]; // z score
OS3MainFrm.DataGrid.Cells[col2, i] := Format('%.4f',[z2 - zpredicted]); // z residual
end;
}
rawPred[i] := ARegressionResults.Coeffs[m]; // intercept
for j := 0 to m-1 do
rawPred[i] := rawPred[i] + ARegressionResults.Coeffs[j] * xData[i, j];
rawResid[i] := rawPred[i] - yData[i];
sqrResid[i] := sqr(rawResid[i]);
end;
AddVariable('z Pred', zPred, '%.4f');
// AddGridColumn('z Resid', zResid, '%.4f');
AddVariable('Raw Pred', rawPred, '%.3f');
AddVariable('Raw Resid', rawResid, '%.3f');
AddVariable('Sqr Resid', sqrResid, '%.3f');
end;
{ Prepares the data for the analysis by extracting all needed data from the
grid:
- AIndepCols: integer array containing the grid column indexes of the
independent variables to be used
- ADepCol: grid column index of the dependent variable to be used
- AWeightCol: optional grid column index of the weight data to be used
- ARowLabels: string array containing the names of the independent variables
as well of the dependent variable (last)
- xValues: matrix with all independent values. The columns of the matrix
correspond to the variables, the row correspond to the cases.
- yValues: vector with the dependent variable values
}
function TWLSFrm.PrepareData(out AIndepCols: IntDyneVec; out ADepCol: Integer;
out AWeightCol: Integer; out ARowLabels: StrDyneVec;
out xValues: DblDyneMat; out yValues: DblDyneVec): Boolean;
var
i: Integer;
msg: String;
C: TWinControl;
numIndepCols: Integer;
begin
Result := false;
AIndepCols := nil;
ARowLabels := nil;
xValues := nil;
yvalues := nil;
if not Validate(msg, C) then
begin
C.SetFocus;
ErrorMsg(msg);
exit;
end;
numIndepCols := IndVarList.Items.Count;
ADepCol := GetVariableIndex(OS3MainFrm.DataGrid, DepVarEdit.Text);
AWeightCol := GetVariableIndex(OS3MainFrm.DataGrid, WeightVarEdit.Text);
SetLength(AIndepCols, numIndepCols);
SetLength(ARowLabels, numIndepCols + 1); // +1 to add independent column label
for i := 0 to numIndepCols-1 do
begin
AIndepCols[i] := GetVariableIndex(OS3MainFrm.DataGrid, IndVarList.Items[i]);
if AIndepCols[i] = -1 then
begin
ErrorMsg('Dependent variable %s not found.', [IndVarList.Items[i]]);
exit;
end;
ARowLabels[i] := IndVarList.Items[i];
end;
ARowLabels[numIndepCols] := DepVarEdit.Text;
// Check variable types: all of them must be numeric (float or integer)
if not IsNumericCol(ADepCol) then
begin
ErrorMsg('Incorrect data type of dependent variable.');
exit;
end;
for i := 0 to numIndepCols-1 do
if not IsNumericCol(AIndepCols[i]) then
begin
ErrorMsg('Incorrect data type of independent variable "%s"', [ARowLabels[i]]);
exit;
end;
if (AWeightCol > -1) and (not IsNumericCol(AWeightCol)) then
begin
ErrorMsg('Incorrect data type of weight variable.');
exit;
end;
xValues := CollectMatValues(OS3MainFrm.DataGrid, AIndepCols);
yValues := CollectVecValues(OS3MainFrm.DataGrid, ADepCol);
Result := true;
end;
{ Runs the ordinary least squares regression on the grid data }
function TWLSFrm.Process_OLSRegression(AIndepCols: IntDyneVec;
ADepCol: Integer; const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec): Boolean;
var
lReport: TStrings;
regressionRes: TMultipleRegressionResults;
i: Integer;
numIndepCols: Integer;
begin
Result := false;
numIndepCols := Length(AIndepCols);
lReport := TStringList.Create;
try
lReport.Add('ORDINARY LEAST SQUARES (OLS) REGRESSION RESULTS');
lReport.Add('');
lReport.Add('Dependent variable: ');
lReport.Add(' ' + OS3MainFrm.DataGrid.Cells[ADepCol, 0]);
lReport.Add('Independent variables:');
for i := 0 to numIndepCols-1 do
lReport.Add(' ' + ARowLabels[i]);
lReport.Add('');
Result := RegressionAndReport(ARowLabels, xValues, yValues, regressionRes, lReport);
if Result then
begin
Predict(xValues, yValues, regressionRes);
OLSReportFrame.DisplayReport(lReport);
end;
finally
lReport.Free;
end;
end;
function TWLSFrm.Process_SquaredResidualsRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat; out AWeights: DblDyneVec): Boolean;
var
lReport: TStrings;
sqrResiduals: DblDyneVec;
predSqrResiduals: DblDyneVec;
regressionRes: TMultipleRegressionResults;
i, depCol, numIndepCols: Integer;
begin
AWeights := nil;
ResidualsRegPage.TabVisible := WeightChk.Checked;
if not WeightChk.Checked then
exit;
numIndepCols := Length(AIndepCols);
// The last grid column (added by Process_ODSRegression) contains the
// squared residuals which will be fitted here.
depCol := NoVariables;
sqrResiduals := CollectVecValues(OS3MainFrm.DataGrid, depCol);
lReport := TStringList.Create;
try
lReport.Add('REGRESSION OF SQUARED RESIDUALS ON INDEPENDENT VARIABLES');
lReport.Add('');
lReport.Add('Dependent variable: ');
lReport.Add(' ' + ARowLabels[numIndepCols]);
lReport.Add('Independent variables:');
for i := 0 to numIndepCols-1 do
lReport.Add(' ' + ARowLabels[i]);
lReport.Add('');
Result := RegressionAndReport(ARowLabels, xValues, sqrResiduals, regressionRes, lReport);
if Result then
begin
// Display the results
ResidualsRegReportFrame.DisplayReport(lReport);
// Calculate weights and store them in the grid
CalcWeights(xValues, regressionRes.Coeffs, predSqrResiduals, AWeights);
// Display squared residuals for each independent variable
PlotSquaredResiduals(AIndepCols, depCol, xValues, sqrResiduals);
// Store weights to the grid
if SaveChk.Checked then
AddWeightsToGrid(predSqrResiduals, AWeights);
end;
finally
lReport.Free;
end;
end;
function TWLSFrm.Process_WeightedRegression(AIndepCols: IntDyneVec;
const ARowLabels: StrDyneVec; const xValues: DblDyneMat;
const yValues: DblDyneVec; const AWeights: DblDyneVec; SubtractMeans: Boolean): Boolean;
var
i, j, n, m: Integer;
regressionRes: TMultipleRegressionResults;
lReport: TStrings;
means: DblDyneVec;
begin
MatSize(xValues, n, m);
for i :=0 to n-1 do
for j := 0 to m-1 do
xValues[i, j] := xValues[i, j] * AWeights[i];
if SubtractMeans then
begin
means := MatRowMeans(xValues);
for i := 0 to n-1 do
for j := 0 to m-1 do
xValues[i, j] := xValues[i, j] - means[i];
end;
lReport := TStringList.Create;
try
lReport.Add('WEIGHTED LEAST SQUARES (WLS) REGRESSION RESULTS');
lReport.Add('');
lReport.Add('Dependent variable: ');
lReport.Add(' ' + ARowLabels[m]);
lReport.Add('Independent variables:');
for i := 0 to m-1 do
lReport.Add(' ' + ARowLabels[i]);
lReport.Add('');
Result := RegressionAndReport(ARowLabels, xValues, yValues, regressionRes, lReport);
if Result then
WLSReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
end;
function TWLSFrm.RegressionAndReport(const ARowLabels: StrDyneVec;
const xValues: DblDyneMat; const yValues: DblDyneVec;
out ARegressionResults: TMultipleRegressionResults; AReport: TStrings): Boolean;
var
err: TRegressionError;
begin
err := MultipleRegression(xValues, yValues, CONF_LEVEL, ARegressionResults);
case err of
regOK: ;
regTooFewValues: ErrorMsg('At least two values required for regression.');
regStdDevZero: ErrorMsg('Standard deviation is zero.');
end;
Result := (err = regOK);
ARegressionResults.WriteCoeffsReport(AReport, ARowLabels);
AReport.Add('');
AReport.Add('');
ARegressionResults.WriteANOVAReport(AReport);
AReport.Add('');
AReport.Add('');
ARegressionResults.WriteVarCovarReport(AReport, ARowLabels);
AReport.Add('');
AReport.Add('');
ARegressionResults.WriteCorrelationReport(AReport, ARowLabels);
end;
procedure TWLSFrm.Reset;
var
i: integer;
begin
inherited;
VarList.Clear;
for i := 0 to NoVariables - 1 do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i+1,0]);
IndVarList.Clear;
DepVarEdit.Clear;
WeightVarEdit.Clear;
if OLSReportFrame <> nil then
OLSReportFrame.Clear;
if ResidualsRegReportFrame <> nil then
ResidualsRegReportFrame.clear;
if WLSReportFrame <> nil then
WLSReportFrame.Clear;
WeightChk.Checked := true;
UserWeightsChk.Checked := false;
OriginChk.Checked := false;
Origin2Chk.Checked := false;
UpdateBtnStates;
end;
procedure TWLSFrm.UpdateBtnStates;
var
i: Integer;
lSelected: Boolean;
begin
inherited;
lSelected := false;
for i:=0 to VarList.Items.Count-1 do
if Varlist.Selected[i] then
begin
lSelected := true;
break;
end;
DepInBtn.Enabled := lSelected and (DepVarEdit.Text = '');
IndInBtn.Enabled := lSelected;
WeightInBtn.Enabled := lSelected and (WeightVarEdit.Text = '') and UserWeightsChk.Checked;
lSelected := false;
for i:=0 to IndVarList.Items.Count-1 do
if IndVarList.Selected[i] then
begin
lSelected := true;
break;
end;
DepOutBtn.Enabled := (DepVarEdit.Text <> '');
IndOutBtn.Enabled := lSelected;
WeightOutBtn.Enabled := (WeightVarEdit.Text <> '') and UserWeightsChk.Checked;
if OLSReportFrame <> nil then
OLSReportFrame.UpdateBtnStates;
if ResidualsRegReportFrame <> nil then
ResidualsRegReportFrame.UpdateBtnStates;
if WLSReportFrame <> nil then
WLSReportFrame.UpdateBtnStates;
end;
procedure TWLSFrm.UserWeightsChkChange(Sender: TObject);
begin
WeightVarEdit.Enabled := UserWeightsChk.Checked;
Label4.Enabled := WeightVarEdit.Enabled;
WeightInBtn.Enabled := UserWeightsChk.Checked and (VarList.ItemIndex > -1) and (WeightVarEdit.Text = '');
WeightOutBtn.Enabled := UserWeightsChk.Checked and (WeightVarEdit.Text <> '');
end;
function TWLSFrm.Validate(out AMsg: String; out AControl: TWinControl): Boolean;
begin
Result := false;
if DepVarEdit.Text = '' then
begin
AControl := DepVarEdit;
AMsg := 'No dependent variable selected.';
exit;
end;
if IndVarList.Items.Count = 0 then
begin
AControl := VarList;
AMsg := 'No independent variables selected.';
exit;
end;
Result := true;
end;
procedure TWLSFrm.VarListDblClick(Sender: TObject);
var
index: Integer;
begin
index := VarList.ItemIndex;
if index > -1 then
begin
if DepVarEdit.Text = '' then
DepVarEdit.Text := VarList.Items[index]
else
IndVarList.Items.Add(VarList.Items[index]);
VarList.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure TWLSFrm.VarListSelectionChange(Sender: TObject; User: boolean);
begin
UpdateBtnStates;
end;
procedure TWLSFrm.WeightInBtnClick(Sender: TObject);
var
index: integer;
begin
index := VarList.ItemIndex;
if (index > -1) and (WeightVarEdit.Text = '') then
begin
WeightVarEdit.Text := VarList.Items[index];
VarList.Items.Delete(index);
end;
UpdateBtnStates;
end;
procedure TWLSFrm.WeightOutBtnClick(Sender: TObject);
begin
if (WeightVarEdit.Text <> '') then
begin
VarList.Items.Add(WeightVarEdit.Text);
WeightVarEdit.Text := '';
end;
UpdateBtnStates;
end;
procedure TWLSFrm.WriteDescriptiveReport(AMemo: TMemo;
const ARegressionResults: TBivariateRegressionResults;
const XLabel, YLabel: String);
var
lReport: TStrings;
begin
lReport := TStringList.Create;
try
lReport.Add(' Variable Mean Variance Std.Dev. ');
lReport.Add('------------ ------------ ------------ ------------');
with ARegressionResults do
begin
lReport.Add('%12s %12.2f %12.2f %12.2f', [xLabel, XMean, XVariance, XStdDev]);
lReport.Add('%12s %12.2f %12.2f %12.2f', [yLabel, YMean, YVariance, YStdDev]);
lReport.Add('');
lReport.Add('Regression:');
lReport.Add(' Correlation: %8.3f', [R]);
lReport.Add(' Slope: %8.3f', [Slope]);
lReport.Add(' Intercept: %8.3f', [Intercept]);
lReport.Add(' Standard Error of Estimate: %8.3f', [StdErrorPredicted]);
lReport.Add(' Number of good cases: %8d', [Count]);
end;
AMemo.Lines.Assign(lReport);
finally
lReport.Free;
end;
end;
end.