Files
lazarus-ccr/applications/lazstats/source/forms/analysis/multivariate/singlelinkunit.pas

670 lines
18 KiB
ObjectPascal
Raw Normal View History

unit SingleLinkUnit;
{$mode objfpc}{$H+}
{$WARN 6058 off : Call to subroutine "$1" marked as inline is not inlined}
interface
uses
Classes, SysUtils, Forms, Controls, Graphics, StdCtrls, Buttons, ExtCtrls, ComCtrls,
MainUnit, Globals, ReportFrameUnit, BasicStatsReportAndChartFormUnit;
type
{ TSingleLinkForm }
TSingleLinkForm = class(TBasicStatsReportAndChartForm)
StandardizeChk: TCheckBox;
ReplaceChk: TCheckBox;
CombinationsChk: TCheckBox;
PlotChk: TCheckBox;
DendogramChk: TCheckBox;
OptionsGroup: TGroupBox;
CombinationsPage: TTabSheet;
DendogramPage: TTabSheet;
VarSelEdit: TEdit;
Label2: TLabel;
VarInBtn: TBitBtn;
VarOutBtn: TBitBtn;
Label1: TLabel;
VarList: TListBox;
procedure StandardizeChkChange(Sender: TObject);
procedure VarListDblClick(Sender: TObject);
procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean);
procedure VarInBtnClick(Sender: TObject);
procedure VarOutBtnClick(Sender: TObject);
private
procedure PlotGroupsVsError(const AGroupErrors: DblDyneVec;
ANumGroups: Integer);
procedure ShowCombinations(const ASubjectIDs, ANumInGroup: IntDyneVec;
const AScores, AGroupErrors: DblDyneVec; const ADistance: DblDyneMat;
const AGroups, AClusters: IntDyneMat;
ANumScores, ANumCases: Integer; out ANumGroups: Integer);
procedure ShowStatistics(AVarLabel: String; ANumCases: Integer;
AMean, AVariance, AStdDev: Double);
procedure TreePlot(const Clusters: IntDyneMat; const Lst: IntDyneVec;
NoPoints: integer);
private
FCombinationsReportFrame: TReportFrame;
FDendogramReportFrame: TReportFrame;
protected
procedure AdjustConstraints; override;
procedure Compute; override;
procedure UpdateBtnStates; override;
function Validate(out AMsg: String; out AControl: TWinControl): Boolean; override;
public
constructor Create(AOwner: TComponent); override;
procedure Reset; override;
end;
var
SingleLinkForm: TSingleLinkForm;
implementation
{$R *.lfm}
uses
Math, TAChartUtils, TACustomSeries,
GridProcs, MatrixUnit, ChartFrameUnit;
{ TSingleLinkForm }
constructor TSingleLinkForm.Create(AOwner: TComponent);
begin
inherited;
FCombinationsReportFrame := TReportFrame.Create(self);
FCombinationsReportFrame.Parent := CombinationsPage;
FCombinationsReportFrame.Align := alClient;
FDendogramReportFrame := TReportFrame.Create(self);
FDendogramReportFrame.Parent := DendogramPage;
FDendogramReportFrame.Align := alClient;
ChartPage.PageIndex := PageControl.PageCount-1;
PageControl.ActivePageIndex := 0;
end;
procedure TSingleLinkForm.AdjustConstraints;
begin
inherited;
ParamsPanel.Constraints.MinWidth := Max(
4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left,
OptionsGroup.Width
);
ParamsPanel.Constraints.MinHeight :=
Max(VarOutBtn.Top + VarOutBtn.Height, VarSelEdit.Top + VarSelEdit.Height) +
VarList.BorderSpacing.Bottom + OptionsGroup.Height +
ButtonBevel.Height + CloseBtn.BorderSpacing.Top + CloseBtn.Height;
end;
procedure TSingleLinkForm.Compute;
var
NoInGrp : IntDyneVec = nil; // no. of subjects in a grouping
i, j, NoGroups: integer;
ColSelected : integer;
NoScores : integer;
varlabel : string;
// outline : array[1..501] of char;
// astring : array[0..5] of char;
Scores : DblDyneVec = nil; // subject scores
Distance : DblDyneMat = nil; // distance between objects
SubjectIDs : IntDyneVec = nil; // subject ids - sorted with Distance
Groups : IntDyneMat = nil; // subjects in each group
GrpErrors : DblDyneVec = nil;
Mean, Variance, StdDev: double;
clusters : IntDyneMat = nil;
Lst : IntDyneVec = nil;
// XAxis: DblDyneVec = nil;
// YAxis: DblDyneVec = nil;
// MaxError : double;
// GrpLabels : StrDyneVec = nil;
begin
NoScores := NoCases;
//Get selected variable
ColSelected := GetVariableIndex(OS3MainFrm.DataGrid, VarSelEdit.Text);
// Allocate memory
SetLength(Distance,NoCases+1,NoCases+1); // wp: why always +1?
SetLength(SubjectIDs,NoCases+1);
SetLength(NoInGrp,NoCases+1);
SetLength(Groups,NoCases+1,NoCases+1);
SetLength(Scores,NoCases+1);
SetLength(GrpErrors,NoCases+1);
SetLength(clusters,NoCases+1,3);
SetLength(Lst,NoCases+1);
for i := 0 to NoCases-1 do
begin
NoInGrp[i] := 1;
SubjectIDs[i] := i+1;
end;
varlabel := VarSelEdit.Text;
// Get data into the distance matrix
Scores := CollectVecValues(OS3MainFrm.DataGrid, colSelected);
VecMeanVarStdDev(Scores, mean, variance, stdDev);
// Sort the scores and ids in distance and subjed ids
for i := 0 to NoCases - 2 do
begin
for j := i+1 to NoCases - 1 do
begin
if (Scores[i] > Scores[j]) then // swap
begin
Exchange(Scores[i], Scores[j]);
Exchange(SubjectIDs[i], SubjectIDs[j]);
end;
end;
end;
for i := 0 to NoCases - 1 do
Lst[i+1] := SubjectIDs[i];
// Standardize the distance scores if elected
if StandardizeChk.Checked then
begin
for i := 0 to NoCases - 1 do
Scores[i] := (Scores[i] - Mean) / StdDev;
if ReplaceChk.Checked then // replace original values in DataGrid with z scores if elected
for i := 0 to NoCases - 1 do
OS3MainFrm.DataGrid.Cells[ColSelected, i+1] := Format('%6.4f', [Scores[i]]);
end;
// Show statistics results
ShowStatistics(varLabel, NoCases, Mean, Variance, StdDev);
// Calculate and show combinations
CombinationsPage.TabVisible := CombinationsChk.Checked;
ShowCombinations(subjectIDs, noInGrp, scores, grpErrors, distance, groups, clusters,
NoScores, NoCases, NoGroups);
// Plot groups vs error
if PlotChk.Checked then
begin
ChartPage.TabVisible := true;
PlotGroupsVsError(GrpErrors, NoGroups);
end else
ChartPage.TabVisible := false;
// Dendogram plot
if DendogramChk.Checked then
begin
DendogramPage.TabVisible := true;
TreePlot(clusters, Lst, NoGroups+1);
end else
DendogramPage.TabVisible := false;
end;
procedure TSingleLinkForm.PlotGroupsVsError(const AGroupErrors: DblDyneVec;
ANumGroups: Integer);
var
xData: DblDyneVec = nil;
yData: DblDyneVec = nil;
labels: StrDyneVec = nil;
i: Integer;
ser: TChartSeries;
begin
SetLength(xData, ANumGroups);
SetLength(yData, ANumGroups);
SetLength(labels, ANumGroups);
for i := 0 to ANumGroups-1 do
begin
xData[i] := ANumGroups - i;
yData[i] := AGroupErrors[i];
labels[i] := IntToStr(i+1);
end;
FChartFrame.Clear;
FChartFrame.SetTitle('Plot of Error vs Group Number');
FChartFrame.SetXTitle('Group number');
FChartFrame.SetYTitle('Size of Error');
ser := FChartFrame.PlotXY(ptSymbols, xData, yData, labels, nil, '', DATA_COLORS[0]);
FChartFrame.Chart.BottomAxis.Marks.Source := ser.Source;
FChartFrame.Chart.BottomAxis.Marks.Style := smsXValue;
FChartFrame.Chart.Legend.Visible := false;
(*
SetLength(XAxis,NoCases);
SetLength(YAxis,NoCases);
SetLength(GrpLabels,NoGroups+1);
for i := 0 to NoGroups - 1 do
begin
XAxis[i] := NoGroups - i;
YAxis[i] := GrpErrors[i];
GrpLabels[i] := IntToStr(i + 1);
end;
ScatPlot(XAxis, YAxis, NoGroups, 'Plot of Error vs No. of Groups',
'No. of Groups', 'Size of Error', 2.0, NoCases, 0.0, MaxError,GrpLabels, lReport);
GrpLabels := nil;
YAxis := nil;
XAxis := nil;
*)
end;
procedure TSingleLinkForm.Reset;
var
i: integer;
begin
inherited;
if FCombinationsReportFrame <> nil then
FCombinationsReportFrame.Clear;
if FDendogramReportFrame <> nil then
FDendogramReportFrame.Clear;
VarSelEdit.Clear;
VarList.Clear;
for i := 1 to NoVariables do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]);
ReplaceChk.Checked := false;
StandardizeChk.Checked := false;
CombinationsChk.Checked := false;
PlotChk.Checked := false;
UpdateBtnStates;
end;
procedure TSingleLinkForm.ShowCombinations(
const ASubjectIDs, ANumInGroup: IntDyneVec;
const AScores, AGroupErrors: DblDyneVec; const ADistance: DblDyneMat;
const AGroups, AClusters: IntDyneMat;
ANumScores, ANumCases: Integer; out ANumGroups: Integer);
var
lReport: TStrings;
done: Boolean;
startAt, endAt: Integer;
outline: String;
smallest: Double;
average: Double;
i, j, ID1, ID2: Integer;
begin
ANumGroups := 0;
lReport := TStringList.Create;
try
if CombinationsChk.Checked then
begin
lReport.Add('SINGLE LINKAGE CLUSTERING by Bill Miller');
lReport.Add('');
done := false;
startAt := 0;
endAt := ANumScores;
if (endAt > 20) then endAt := 20;
while (not done) do
begin
outline := 'GROUP ID';
for i := startAt to endAt - 1 do
outline := outline + Format('%4d', [ASubjectIDs[i]]);
lReport.Add(outline);
startAt := endAt;
if (startAt >= ANumScores) then
done := true;
endAt := startAt + 20;
if (endAt > ANumScores) then
endAt := ANumScores;
end;
end;
// Calculate distances and smallest distance
repeat
smallest := abs(AScores[0] - AScores[1]); // Initial values
for i := 0 to ANumScores - 2 do
begin
for j := i+1 to ANumScores - 1 do
begin
ADistance[i, j] := abs(AScores[i] - AScores[j]);
ADistance[j, i] := ADistance[i, j];
if (ADistance[i, j] <= smallest) then
begin
smallest := ADistance[i, j];
ID1 := i;
ID2 := j;
end;
end;
end;
if (ANumGroups < ANumCases-1) then
begin
if CombinationsChk.Checked then
begin
lReport.Add(' Group %d is combined with Group %d', [ASubjectIDs[ID1], ASubjectIDs[ID2]]);
lReport.Add('');
end;
end;
// Eliminate second score and replace first with average
ANumInGroup[ID1] := ANumInGroup[ID1] + 1;
ANumInGroup[ID2] := ANumInGroup[ID2] - 1;
AClusters[ANumGroups+1, 1] := ASubjectIDs[ID1];
AClusters[ANumGroups+1, 2] := ASubjectIDs[ID2];
// Record results for this grouping
// Set flags for those objects grouped
AGroups[ANumGroups, ID1] := 1;
AGroups[ANumGroups, ID2] := 1;
// Eliminate second score and replace first with average
if (ANumGroups < ANumCases-1) then
begin
average := abs(AScores[ID1] + AScores[ID2]) / 2.0;
AScores[ID1] := average;
for i := ID2 to ANumScores - 2 do
begin
AScores[i] := AScores[i+1];
ASubjectIDs[i] := ASubjectIDs[i+1];
end;
ANumScores := ANumScores - 1;
for i := 0 to ANumScores - 1 do
AGroups[ANumGroups, ASubjectIDs[i]] := 1;
if CombinationsChk.Checked then
begin
done := false;
startAt := 0;
endat := ANumScores;
if (endAt > 20) then endAt := 20;
while (not done) do
begin
outline := 'GROUP ID';
for i := startAt to endAt - 1 do
outline := outline + Format('%4d', [ASubjectIDs[i]]);
lReport.Add(outline);
startAt := endAt;
if (startAt >= ANumScores) then done := true;
endAt := startAt + 20;
if (endAt > ANumScores) then endAt := ANumScores;
end;
end;
// Get errors
AGroupErrors[ANumGroups] := AGroupErrors[ANumGroups] + ADistance[ID1, ID2];
ANumGroups := ANumGroups + 1;
end;
until (ANumGroups = ANumCases-1);
// Show errors
if CombinationsChk.Checked then
begin
lReport.Add('');
lReport.Add('GROUPING STEP ERROR');
lReport.Add('------------- --------');
for i := 0 to ANumGroups - 1 do
lReport.Add('%8d %8.3f', [i+1, AGroupErrors[i]]);
FCombinationsReportFrame.DisplayReport(lReport);
end;
finally
lReport.Free;
end;
end;
procedure TSingleLinkForm.ShowStatistics(AVarLabel: String; ANumCases: Integer;
AMean, AVariance, AStdDev: Double);
var
lReport: TStrings;
begin
lReport := TStringList.Create;
try
lReport.Add('SINGLE LINKAGE CLUSTERING by Bill Miller');
lReport.Add('');
lReport.Add('File: %-s', [OS3MainFrm.FileNameEdit.Text]);
lReport.Add('Variable: %-s', [AVarLabel]);
lReport.Add('');
lReport.Add('Number of cases: %12d', [ANumCases]);
lReport.Add('Mean: %12.3f', [AMean]);
lReport.Add('Variance: %12.3f', [AVariance]);
lReport.Add('Std.Dev.: %12.3f', [AStdDev]);
FReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
end;
procedure TSingleLinkForm.StandardizeChkChange(Sender: TObject);
begin
ReplaceChk.Enabled := StandardizeChk.Checked;
end;
procedure TSingleLinkForm.TreePlot(const Clusters: IntDyneMat;
const Lst: IntDyneVec; NoPoints: integer);
const
STAR: Char = '*';
BLANK: Char = ' ';
var
lReport: TStrings;
outline: array[0..501] of char;
aline: array[0..82] of char;
valstr: string;
tempstr: string;
plotline: string;
col1, col2, colpos1, colpos2: integer;
noparts, startcol, endcol: integer;
Results: StrDyneVec = niL;
ColPos: IntDyneVec = nil;
i, j, k, L, linecount, newcol, howlong, count: integer;
begin
linecount := 1;
SetLength(ColPos, NoPoints+2);
SetLength(Results, NoPoints*2+3);
// store initial column positions of vertical linkages
for i := 1 to NoPoints do ColPos[Lst[i]] := 4 + (i * 5);
// create column heading indented 10 spaces
tempstr := 'UNIT ';
for i := 1 to NoPoints do
begin
valstr := format('%5d',[Lst[i]]);
tempstr := tempstr + valstr;
end;
Results[linecount] := tempstr;
linecount := linecount + 1;
// create beginning of vertical linkages
plotline := 'STEP ';
for i := 1 to NoPoints do plotline := plotline + ' *';
Results[linecount] := plotline;
linecount := linecount + 1;
// start dendoplot
for i := 1 to NoPoints - 1 do
begin
outline := '';
valstr := Format('%5d', [i]); // put step no. first
outline := valstr;
// clear remainder of outline
for j := 5 to (5 + NoPoints * 5) do outline[j] := ' ';
outline[6 + NoPoints * 5] := #0;
col1 := Clusters[i,1];
col2 := Clusters[i,2];
// find column positions for each variable
colpos1 := ColPos[col1];
colpos2 := ColPos[col2];
for k := colpos1 to colpos2 do outline[k] := star;
// change column positions 1/2 way between the matched ones
newcol := colpos1 + ((colpos2 - colpos1) div 2);
for k := 1 to NoPoints do
if ((ColPos[k] = colpos1) or (ColPos[k] = colpos2)) then ColPos[k] := newcol;
for k := 1 to NoPoints do
begin
L := ColPos[k];
if ((L <> colpos1) and (L <> colpos2)) then outline[L] := star;
end;
Results[linecount] := outline;
linecount := linecount + 1;
// add a line of connectors to next grouping
outline := ' ';
for j := 5 to (5 + NoPoints * 5) do outline[j] := blank;
for j := 1 to NoPoints do
begin
colpos1 := ColPos[j];
outline[colpos1] := star;
end;
Results[linecount] := outline;
linecount := linecount + 1;
end;
// output the Results in parts
// determine number of pages needed for whole plot
lReport := TStringList.Create;
try
lReport.Add('SINGLE LINKAGE CLUSTERING by Bill Miller');
lReport.Add('');
noparts := 0;
howlong := Length(Results[1]);
noparts := round(howlong / 80.0);
if (noparts <= 0) then
noparts := 1;
if (noparts = 1) then // simply print the list
for i := 0 to linecount - 1 do
lReport.Add(Results[i])
else // break lines into strings of 15 units
begin
startcol := 0;
endcol := 80;
for i := 1 to noparts do
begin
lReport.Add('PART %d OUTPUT', [i]);
for j := 0 to 80 do
aline[j] := blank;
for j := 0 to linecount - 1 do
begin
count := 0;
outline := Results[j];
for k := startcol to endcol do
begin
aline[count] := outline[k];
count := count + 1;
end;
aline[count+1] := #0;
lReport.Add(aline);
end;
lReport.Add('');
startcol := endcol + 1;
endcol := endcol + 80;
if (endcol > howlong) then endcol := howlong;
end;
end;
FDendogramReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
end;
procedure TSingleLinkForm.UpdateBtnStates;
begin
inherited;
if FCombinationsReportFrame <> nil then
FCombinationsReportFrame.UpdateBtnStates;
if FDendogramReportFrame <> nil then
FDendogramReportFrame.UpdateBtnStates;
VarInBtn.Enabled := (VarList.ItemIndex > -1) and (VarSelEdit.Text = '');
VarOutBtn.Enabled := (VarSelEdit.Text <> '');
end;
function TSingleLinkForm.Validate(out AMsg: String; out AControl: TWinControl): Boolean;
begin
Result := false;
if VarSelEdit.Text = '' then
begin
AMsg := 'No variable selected for analysis.';
AControl := VarList;
exit;
end;
Result := true;
end;
procedure TSingleLinkForm.VarInBtnClick(Sender: TObject);
var
index: integer;
begin
index := VarList.ItemIndex;
if (index > -1) and (VarSelEdit.Text = '') then
begin
VarSelEdit.Text := VarList.Items.Strings[index];
VarList.Items.Delete(index);
end;
UpdateBtnStates;
end;
procedure TSingleLinkForm.VarOutBtnClick(Sender: TObject);
begin
if VarSelEdit.Text <> '' then
begin
VarList.Items.Add(VarSelEdit.Text);
VarSelEdit.Text := '';
end;
UpdateBtnStates;
end;
procedure TSingleLinkForm.VarListDblClick(Sender: TObject);
var
index: Integer;
begin
index := VarList.ItemIndex;
if index > -1 then
begin
if VarSelEdit.Text = '' then
begin
VarSelEdit.Text := VarList.Items[index];
VarList.Items.Delete(index);
UpdateBtnStates;
end;
end;
end;
procedure TSingleLinkForm.VarListSelectionChange(Sender: TObject; User: boolean);
begin
UpdateBtnStates;
end;
end.