Files
lazarus-ccr/applications/lazstats/source/forms/analysis/multivariate/hierarchunit.pas

627 lines
16 KiB
ObjectPascal
Raw Normal View History

// Sample file for testing: cansas.laz, use all variables.
unit HierarchUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, Forms, Controls, Graphics, StdCtrls, Buttons, ExtCtrls, ComCtrls,
MainUnit, Globals, MatrixLib, ReportFrameUnit, BasicStatsReportAndChartFormUnit;
type
{ THierarchForm }
THierarchForm = class(TBasicStatsReportAndChartForm)
MaxGrps: TEdit;
STDChk: TCheckBox;
ReplaceChk: TCheckBox;
StatsChk: TCheckBox;
PlotChk: TCheckBox;
MaxGrpsChk: TCheckBox;
MembersChk: TCheckBox;
StatsPage: TTabSheet;
VarChk: TCheckBox;
OptionsGroup: TGroupBox;
PredIn: TBitBtn;
PredOut: TBitBtn;
Label1: TLabel;
Label2: TLabel;
PredList: TListBox;
VarList: TListBox;
procedure PredInClick(Sender: TObject);
procedure PredListDblClick(Sender: TObject);
procedure PredOutClick(Sender: TObject);
procedure VarListDblClick(Sender: TObject);
procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean);
private
FStatsReportFrame: TReportFrame;
procedure Plot_GroupCount_Error(const AGrpCount, AError: DblDyneVec;
ADataCount: Integer);
procedure ShowDescriptiveStats(const AMeans, AVars, AStdDevs: DblDyneVec;
ANumCols, ANumCases: Integer; const AVarLabels: StrDyneVec);
protected
procedure AdjustConstraints; override;
procedure Compute; override;
procedure UpdateBtnStates; override;
function Validate(out AMsg: String; out AControl: TWincontrol): Boolean; override;
public
constructor Create(AOwner: TComponent); override;
procedure Reset; override;
end;
var
HierarchForm: THierarchForm;
implementation
{$R *.lfm}
uses
TAChartUtils, TACustomSeries,
Utils, GridProcs, ChartFrameUnit;
{ THierarchForm }
constructor THierarchForm.Create(AOwner: TComponent);
begin
inherited;
InitToolbar(FReportFrame.ReportToolbar, tpTop);
FReportFrame.ClearBorderSpacings;
FStatsReportFrame := TReportFrame.Create(self);
FStatsReportFrame.Parent := StatsPage;
FStatsReportFrame.Align := alClient;
StatsPage.PageIndex := 1;
PageControl.ActivePageIndex := 0;
end;
procedure THierarchForm.AdjustConstraints;
begin
inherited;
ParamsPanel.Constraints.MinWidth := 4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left;
ParamsPanel.Constraints.MinHeight := PredOut.Top + PredOut.Height +
VarList.BorderSpacing.Bottom + OptionsGroup.Height +
ButtonBevel.Height + CloseBtn.Height;
end;
procedure THierarchForm.Compute;
//label next1;
var
varLabels: StrDyneVec = nil;
rowLabels: StrDyneVec = nil;
w2: IntDyneVec = nil;
k4: IntDyneVec = nil;
k5: IntDyneVec = nil;
L1: IntDyneVec = nil;
ColSelected: IntDyneVec = nil;
W: DblDyneVec = nil;
XAxis: DblDyneVec = nil;
YAxis: DblDyneVec = nil;
means: DblDyneVec = nil;
variances: DblDyneVec = nil;
stddevs: DblDyneVec = nil;
Distance : DblDyneMat = nil;
i, j, k, k1, k3, L, w3, n3, n4, n5, M, col, count: integer;
GrpCnt, Nrows, Ncols, NoSelected: integer;
X, Y, d1, x1, MaxError: double;
lReport: TStrings;
begin
MaxError := 0.0;
GrpCnt := 0;
NoSelected := PredList.Items.Count;
if not VarChk.Checked then
begin
SetLength(w2,NoCases);
SetLength(k4,NoCases);
SetLength(k5,NoCases);
SetLength(L1,NoCases);
SetLength(W,NoSelected);
SetLength(XAxis,NoCases);
SetLength(YAxis,NoCases);
SetLength(means,NoSelected);
SetLength(variances,NoSelected);
SetLength(stddevs,NoSelected);
SetLength(Distance,NoCases,NoCases);
SetLength(varlabels,NoSelected);
SetLength(rowlabels,NoCases);
SetLength(ColSelected,NoSelected);
nCols := NoSelected;
nRows := NoCases;
for i := 0 to nCols-1 do // nCols = NoSelected!
begin
varLabels[i] := PredList.Items[i];
colSelected[i] := GetVariableIndex(OS3MainFrm.DataGrid, varLabels[i]);
end;
for i := 0 to NoCases-1 do
rowLabels[i] := OS3MainFrm.DataGrid.Cells[0, i+1]; //IntToStr(i);
end else
begin
SetLength(w2,NoSelected);
SetLength(k4,NoSelected);
SetLength(k5,NoSelected);
SetLength(L1,NoSelected);
SetLength(W,NoCases);
SetLength(XAxis,NoSelected);
SetLength(YAxis,NoSelected);
SetLength(means,NoCases);
SetLength(variances,NoCases);
SetLength(stddevs,NoCases);
SetLength(Distance,NoSelected,NoCases);
SetLength(varlabels,NoCases);
SetLength(rowlabels,NoSelected);
SetLength(ColSelected,NoSelected);
nCols := NoCases;
nRows := NoSelected;
//Get labels of selected variables
for i := 0 to nRows - 1 do // nRows = NoSelected!
begin
rowLabels[i] := PredList.Items[i];
colSelected[i] := GetVariableIndex(OS3MainFrm.DataGrid, rowLabels[i]);
end;
for i := 0 to NoCases-1 do
varlabels[i] := OS3MainFrm.DataGrid.Cells[0, i+1]; //IntToStr(i);
end;
if MaxGrpsChk.Checked then
k1 := StrToInt(MaxGrps.Text);
if MembersChk.Checked then
k3 := 1
else
k3 := 0;
for j := 0 to nCols-1 do
begin
means[j] := 0.0;
variances[j] := 0.0;
stddevs[j] := 0.0;
end;
if not VarChk.Checked then
begin
// Get labels of rows
// for i := 1 to Nrows do rowlabels[i-1] := MainFrm.Grid.Cells[0,i];
// Get data into the distance matrix
count := 0;
for i := 1 to nRows do
begin
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColSelected)) then continue;
count := count + 1;
for j := 1 to Ncols do
begin
col := ColSelected[j-1];
X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[col,i]));
means[j-1] := means[j-1] + X;
variances[j-1] := variances[j-1] + (X * X);
Distance[i-1,j-1] := X;
end;
end;
end else
begin // cluster variables
// Get labels of columns
// for i := 1 to Nrows do rowlabels[i-1] := MainFrm.Grid.Cells[i,0];
// Get data into the distance matrix
count := 0;
for i := 0 to nRows-1 do // actually grid column in this case
begin
// if (not GoodRecord(i,NoSelected,ColSelected)) then continue;
count := count + 1;
col := ColSelected[i];
for j := 0 to Ncols-1 do // actually grid rows in this case
begin
// if (not GoodRecord(j,NoSelected,ColSelected)) then continue;
X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[col, j+1]));
means[j] := means[j] + X;
variances[j] := variances[j] + (X * X);
Distance[i, j] := X;
end;
end;
end;
// Calculate means and standard deviations of variables
for j := 0 to nCols-1 do
begin
variances[j] := (variances[j] - sqr(means[j]) / count) / (count - 1);
stddevs[j] := sqrt(variances[j]);
means[j] := means[j] / count;
end;
// Report descriptive statistics
if StatsChk.Checked then
begin
StatsPage.TabVisible := true;
ShowDescriptiveStats(means, variances, stddevs, nCols, count, varlabels);
end else
StatsPage.TabVisible := false;
// Ready the output form
lReport := TStringList.Create;
try
lReport.Add('HIERARCHICAL CLUSTER ANALYSIS');
lReport.Add('');
lReport.Add('Number of objects to cluster: %d on %d variables.', [Nrows, Ncols]);
lReport.Add('');
// Standardize the distance scores if elected
if StdChk.Checked then
begin
for j := 0 to nCols-1 do
for i := 0 to nRows-1 do
Distance[i,j] := (Distance[i,j] - means[j]) / stddevs[j];
end;
// replace original values in grid with z scores if elected
if ReplaceChk.Checked then
begin
for i := 0 to nRows-1 do
begin
if not GoodRecord(OS3MainFrm.DataGrid, i+1, ColSelected) then continue;
for j := 0 to nCols-1 do
begin
col := ColSelected[j];
OS3MainFrm.DataGrid.Cells[col, i+1] := Format('%6.4f', [Distance[i, j]]);
end;
end;
end;
// Convert data matrix to initial matrix of error potentials
for i := 1 to nRows do
begin
// if (not GoodRecord(i,NoSelected,ColSelected)) then continue;
for j := 0 to nCols-1 do
W[j] := Distance[i-1, j];
for j := i to nRows do
begin
// if (not GoodRecord(i,NoSelected,ColSelected)) then continue;
Distance[i-1,j-1] := 0.0;
for k := 1 to Ncols do
Distance[i-1,j-1] := Distance[i-1,j-1] + (Distance[j-1,k-1] - W[k-1]) * (Distance[j-1,k-1] - W[k-1]);
Distance[i-1,j-1] := Distance[i-1,j-1] / 2.0;
end;
end;
for i := 1 to nRows do
for j := i to nRows do Distance[j-1,i-1] := 0.0;
// Now, group the cases for maximum groups down
if MaxGrpsChk.Checked then
k1 := StrToInt(MaxGrps.Text)
else
k1 := 2;
n3 := nRows;
// Initialize group membership and group-n vectors
for i := 0 to Nrows-1 do
begin
k4[i] := i+1;
k5[i] := i+1;
w2[i] := 1;
end;
// Locate optimal combination, if more than 2 groups remain
{
next1:
n3 := n3 - 1;
if (n3 > 1) then
begin
}
repeat;
n3 := n3 - 1;
x1 := MAX_FLOAT;
for i := 1 to nRows do
begin
if (k5[i-1] = i) then
begin
for j := i to nRows do
begin
if ((i <> j) and (k5[j-1] = j)) then
begin
d1 := Distance[i-1,j-1] - Distance[i-1,i-1] - Distance[j-1,j-1];
if (d1 < x1) then
begin
x1 := d1;
L := i;
M := j;
end; // end if
end; // end if
end; // next j
end; // end if
end; // next i
n4 := w2[L-1];
n5 := w2[M-1];
XAxis[GrpCnt] := n3;
YAxis[GrpCnt] := x1;
GrpCnt := GrpCnt + 1;
if (x1 > MaxError) then MaxError := x1;
lReport.Add('%2.d groups after combining group %2.d (n = %2.d) and group %2.d (n = %2.d), error: %7.3f', [n3, L, n4, M, n5, x1]);
w3 := w2[L-1] + w2[M-1];
x1 := Distance[L-1,M-1] * w3;
Y := Distance[L-1,L-1] * w2[L-1] + Distance[M-1,M-1] * w2[M-1];
Distance[L-1,L-1] := Distance[L-1,M-1];
for i := 0 to nRows-1 do
if (k5[i] = M) then k5[i] := L;
for i := 1 to nRows do
begin
if ((i <> L) and (k5[i-1] = i)) then
begin
if (i <= L) then
begin
Distance[i-1,L-1] := Distance[i-1,L-1] * (w2[i-1] + w2[L-1])
+ Distance[i-1,M-1] * (w2[i-1] + w2[M-1])
+ x1 - Y - Distance[i-1,i-1] * w2[i-1];
Distance[i-1,L-1] := Distance[i-1,L-1] / (w2[i-1] + w3);
end else
begin
Distance[L-1,i-1] := Distance[L-1,i-1] * (w2[L-1] + w2[i-1])
+ (Distance[M-1,i-1] + Distance[i-1,M-1]) * (w2[M-1] + w2[i-1]);
Distance[L-1,i-1] := (Distance[L-1,i-1]+ x1 - Y
- Distance[i-1,i-1] * w2[i-1]) / (w2[i-1] + w3);
end;
end;
end;
w2[L-1] := w3;
if (n3 > k1) then
Continue;
// Print group memberships of all objects, if optioned
if MembersChk.Checked then
begin
lReport.Add('');
for i := 1 to nRows do
begin
if (k5[i-1] = i) then
begin
L := 0;
for j := 1 to nRows do
begin
if (k5[j-1] = i) then
begin
L := L + 1;
L1[L-1] := k4[j-1];
if k3 = 1 then L1[L-1] := j;
end;
end;
if k3 = 1 then
begin
lReport.Add('Group %d (n = %d)', [i, L]);
for j := 1 to L do
lReport.Add(' Object: %s', [rowLabels[L1[j-1]-1]]);
end; // end if
end; // end if
end; // next i
lReport.Add('');
end;
//goto next1;
until n3 = 2;
//end; // end if
FReportFrame.DisplayReport(lReport);
if PlotChk.Checked then
begin
ChartPage.TabVisible := true;
Plot_GroupCount_Error(XAxis, YAxis, GrpCnt);
end else
ChartPage.TabVisible := false;
finally
lReport.Free;
end;
end;
procedure THierarchForm.Plot_GroupCount_Error(const AGrpCount, AError: DblDyneVec;
ADataCount: Integer);
var
ser: TChartSeries;
i: Integer;
begin
FChartFrame.Clear;
FChartFrame.SetTitle('Number of Groups vs. Grouping Error');
FChartFrame.SetXTitle('Number of Groups');
FChartFrame.SetYTitle('Grouping Error');
ser := FChartFrame.PlotXY(ptSymbols, nil, nil, nil, nil, '', DATA_COLORS[0]);
for i := 0 to ADataCount-1 do
ser.AddXY(i, AError[i], Format('%.0f', [AGrpCount[i]]));
FChartFrame.Chart.BottomAxis.Marks.Source := ser.Source;
FChartFrame.Chart.BottomAxis.Marks.Style := smsLabel;
FChartFrame.Chart.Legend.Visible := false;
end;
procedure THierarchForm.PredInClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while i < VarList.Items.Count do
begin
if VarList.Selected[i] then
begin
PredList.Items.Add(VarList.Items[i]);
VarList.Items.Delete(i);
i := 0;
end else
i := i + 1;
end;
UpdateBtnStates;
end;
procedure THierarchForm.PredListDblClick(Sender: TObject);
var
index: Integer;
begin
index := PredList.ItemIndex;
if index > -1 then
begin
VarList.Items.Add(PredList.Items[index]);
PredList.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure THierarchForm.PredOutClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while i < PredList.Items.Count do
begin
if PredList.Selected[i] then
begin
VarList.Items.Add(PredList.Items[i]);
PredList.Items.Delete(i);
i := 0;
end else
i := i + 1;
end;
UpdateBtnStates;
end;
procedure THierarchForm.Reset;
var
i: integer;
begin
inherited;
if FStatsReportFrame <> nil then
FStatsReportFrame.Clear;
VarList.Clear;
for i := 1 to NoVariables do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]);
PredList.Clear;
StdChk.Checked := false;
ReplaceChk.Checked := false;
StatsChk.Checked := false;
PlotChk.Checked := false;
MaxGrpsChk.Checked := false;
VarChk.Checked := false;
MaxGrps.Clear;
UpdateBtnStates;
end;
procedure THierarchForm.ShowDescriptiveStats(const AMeans, AVars, AStdDevs: DblDyneVec;
ANumCols, ANumCases: Integer; const AVarLabels: StrDyneVec);
var
lReport: TStrings;
begin
lReport := TStringList.Create;
try
DynVectorPrint(AMeans, ANumCols, 'Variable Means', AVarLabels, ANumCases, lReport);
lReport.Add(DIVIDER_SMALL_AUTO);
lReport.Add('');
DynVectorPrint(AVars, ANumCols, 'Variable Variances', AVarLabels, ANumCases, lReport);
lReport.Add(DIVIDER_SMALL_AUTO);
lReport.Add('');
DynVectorPrint(AStdDevs, ANumCols, 'Variable Standard Deviations', AVarLabels, ANumCases, lReport);
FStatsReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
end;
procedure THierarchForm.UpdateBtnStates;
begin
inherited;
if FStatsReportFrame <> nil then
FStatsReportFrame.UpdateBtnStates;
PredIn.Enabled := AnySelected(VarList);
PredOut.Enabled := AnySelected(PredList);
end;
function THierarchForm.Validate(out AMsg: String; out AControl: TWincontrol): Boolean;
var
n: Integer;
begin
Result := false;
if PredList.Items.Count = 0 then
begin
AMsg := 'No Predictor Variables selected.';
AControl := VarList;
exit;
end;
if MaxGrpsChk.Checked then
begin
if MaxGrps.Text = '' then
begin
AMsg := 'Maximum number of groups not specified.';
AControl := MaxGrps;
exit;
end;
if not TryStrToInt(MaxGrps.Text, n) or (n < 1) then
begin
AMsg := 'No valid number of groups given.';
AControl := MaxGrps;
exit;
end;
end;
Result := true;
end;
procedure THierarchForm.VarListDblClick(Sender: TObject);
var
index: Integer;
begin
index := VarList.ItemIndex;
if index > -1 then
begin
PredList.Items.Add(VarList.Items[index]);
VarList.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure THierarchForm.VarListSelectionChange(Sender: TObject; User: boolean);
begin
UpdateBtnStates;
end;
end.