unit SingleLinkUnit; {$mode objfpc}{$H+} {$WARN 6058 off : Call to subroutine "$1" marked as inline is not inlined} interface uses Classes, SysUtils, Forms, Controls, Graphics, StdCtrls, Buttons, ExtCtrls, ComCtrls, MainUnit, Globals, ReportFrameUnit, BasicStatsReportAndChartFormUnit; type { TSingleLinkForm } TSingleLinkForm = class(TBasicStatsReportAndChartForm) StandardizeChk: TCheckBox; ReplaceChk: TCheckBox; CombinationsChk: TCheckBox; PlotChk: TCheckBox; DendogramChk: TCheckBox; OptionsGroup: TGroupBox; CombinationsPage: TTabSheet; DendogramPage: TTabSheet; VarSelEdit: TEdit; Label2: TLabel; VarInBtn: TBitBtn; VarOutBtn: TBitBtn; Label1: TLabel; VarList: TListBox; procedure StandardizeChkChange(Sender: TObject); procedure VarListDblClick(Sender: TObject); procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean); procedure VarInBtnClick(Sender: TObject); procedure VarOutBtnClick(Sender: TObject); private procedure PlotGroupsVsError(const AGroupErrors: DblDyneVec; ANumGroups: Integer); procedure ShowCombinations(const ASubjectIDs, ANumInGroup: IntDyneVec; const AScores, AGroupErrors: DblDyneVec; const ADistance: DblDyneMat; const AGroups, AClusters: IntDyneMat; ANumScores, ANumCases: Integer; out ANumGroups: Integer); procedure ShowStatistics(AVarLabel: String; ANumCases: Integer; AMean, AVariance, AStdDev: Double); procedure TreePlot(const Clusters: IntDyneMat; const Lst: IntDyneVec; NoPoints: integer); private FCombinationsReportFrame: TReportFrame; FDendogramReportFrame: TReportFrame; protected procedure AdjustConstraints; override; procedure Compute; override; procedure UpdateBtnStates; override; function Validate(out AMsg: String; out AControl: TWinControl): Boolean; override; public constructor Create(AOwner: TComponent); override; procedure Reset; override; end; var SingleLinkForm: TSingleLinkForm; implementation {$R *.lfm} uses Math, TAChartUtils, TACustomSeries, GridProcs, MatrixUnit, ChartFrameUnit; { TSingleLinkForm } constructor TSingleLinkForm.Create(AOwner: TComponent); begin inherited; FCombinationsReportFrame := TReportFrame.Create(self); FCombinationsReportFrame.Parent := CombinationsPage; FCombinationsReportFrame.Align := alClient; FDendogramReportFrame := TReportFrame.Create(self); FDendogramReportFrame.Parent := DendogramPage; FDendogramReportFrame.Align := alClient; ChartPage.PageIndex := PageControl.PageCount-1; PageControl.ActivePageIndex := 0; end; procedure TSingleLinkForm.AdjustConstraints; begin inherited; ParamsPanel.Constraints.MinWidth := Max( 4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left, OptionsGroup.Width ); ParamsPanel.Constraints.MinHeight := Max(VarOutBtn.Top + VarOutBtn.Height, VarSelEdit.Top + VarSelEdit.Height) + VarList.BorderSpacing.Bottom + OptionsGroup.Height + ButtonBevel.Height + CloseBtn.BorderSpacing.Top + CloseBtn.Height; end; procedure TSingleLinkForm.Compute; var NoInGrp : IntDyneVec = nil; // no. of subjects in a grouping i, j, NoGroups: integer; ColSelected : integer; NoScores : integer; varlabel : string; // outline : array[1..501] of char; // astring : array[0..5] of char; Scores : DblDyneVec = nil; // subject scores Distance : DblDyneMat = nil; // distance between objects SubjectIDs : IntDyneVec = nil; // subject ids - sorted with Distance Groups : IntDyneMat = nil; // subjects in each group GrpErrors : DblDyneVec = nil; Mean, Variance, StdDev: double; clusters : IntDyneMat = nil; Lst : IntDyneVec = nil; // XAxis: DblDyneVec = nil; // YAxis: DblDyneVec = nil; // MaxError : double; // GrpLabels : StrDyneVec = nil; begin NoScores := NoCases; //Get selected variable ColSelected := GetVariableIndex(OS3MainFrm.DataGrid, VarSelEdit.Text); // Allocate memory SetLength(Distance,NoCases+1,NoCases+1); // wp: why always +1? SetLength(SubjectIDs,NoCases+1); SetLength(NoInGrp,NoCases+1); SetLength(Groups,NoCases+1,NoCases+1); SetLength(Scores,NoCases+1); SetLength(GrpErrors,NoCases+1); SetLength(clusters,NoCases+1,3); SetLength(Lst,NoCases+1); for i := 0 to NoCases-1 do begin NoInGrp[i] := 1; SubjectIDs[i] := i+1; end; varlabel := VarSelEdit.Text; // Get data into the distance matrix Scores := CollectVecValues(OS3MainFrm.DataGrid, colSelected); VecMeanVarStdDev(Scores, mean, variance, stdDev); // Sort the scores and ids in distance and subjed ids for i := 0 to NoCases - 2 do begin for j := i+1 to NoCases - 1 do begin if (Scores[i] > Scores[j]) then // swap begin Exchange(Scores[i], Scores[j]); Exchange(SubjectIDs[i], SubjectIDs[j]); end; end; end; for i := 0 to NoCases - 1 do Lst[i+1] := SubjectIDs[i]; // Standardize the distance scores if elected if StandardizeChk.Checked then begin for i := 0 to NoCases - 1 do Scores[i] := (Scores[i] - Mean) / StdDev; if ReplaceChk.Checked then // replace original values in DataGrid with z scores if elected for i := 0 to NoCases - 1 do OS3MainFrm.DataGrid.Cells[ColSelected, i+1] := Format('%6.4f', [Scores[i]]); end; // Show statistics results ShowStatistics(varLabel, NoCases, Mean, Variance, StdDev); // Calculate and show combinations CombinationsPage.TabVisible := CombinationsChk.Checked; ShowCombinations(subjectIDs, noInGrp, scores, grpErrors, distance, groups, clusters, NoScores, NoCases, NoGroups); // Plot groups vs error if PlotChk.Checked then begin ChartPage.TabVisible := true; PlotGroupsVsError(GrpErrors, NoGroups); end else ChartPage.TabVisible := false; // Dendogram plot if DendogramChk.Checked then begin DendogramPage.TabVisible := true; TreePlot(clusters, Lst, NoGroups+1); end else DendogramPage.TabVisible := false; end; procedure TSingleLinkForm.PlotGroupsVsError(const AGroupErrors: DblDyneVec; ANumGroups: Integer); var xData: DblDyneVec = nil; yData: DblDyneVec = nil; labels: StrDyneVec = nil; i: Integer; ser: TChartSeries; begin SetLength(xData, ANumGroups); SetLength(yData, ANumGroups); SetLength(labels, ANumGroups); for i := 0 to ANumGroups-1 do begin xData[i] := ANumGroups - i; yData[i] := AGroupErrors[i]; labels[i] := IntToStr(i+1); end; FChartFrame.Clear; FChartFrame.SetTitle('Plot of Error vs Group Number'); FChartFrame.SetXTitle('Group number'); FChartFrame.SetYTitle('Size of Error'); ser := FChartFrame.PlotXY(ptSymbols, xData, yData, labels, nil, '', DATA_COLORS[0]); FChartFrame.Chart.BottomAxis.Marks.Source := ser.Source; FChartFrame.Chart.BottomAxis.Marks.Style := smsXValue; FChartFrame.Chart.Legend.Visible := false; (* SetLength(XAxis,NoCases); SetLength(YAxis,NoCases); SetLength(GrpLabels,NoGroups+1); for i := 0 to NoGroups - 1 do begin XAxis[i] := NoGroups - i; YAxis[i] := GrpErrors[i]; GrpLabels[i] := IntToStr(i + 1); end; ScatPlot(XAxis, YAxis, NoGroups, 'Plot of Error vs No. of Groups', 'No. of Groups', 'Size of Error', 2.0, NoCases, 0.0, MaxError,GrpLabels, lReport); GrpLabels := nil; YAxis := nil; XAxis := nil; *) end; procedure TSingleLinkForm.Reset; var i: integer; begin inherited; if FCombinationsReportFrame <> nil then FCombinationsReportFrame.Clear; if FDendogramReportFrame <> nil then FDendogramReportFrame.Clear; VarSelEdit.Clear; VarList.Clear; for i := 1 to NoVariables do VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); ReplaceChk.Checked := false; StandardizeChk.Checked := false; CombinationsChk.Checked := false; PlotChk.Checked := false; UpdateBtnStates; end; procedure TSingleLinkForm.ShowCombinations( const ASubjectIDs, ANumInGroup: IntDyneVec; const AScores, AGroupErrors: DblDyneVec; const ADistance: DblDyneMat; const AGroups, AClusters: IntDyneMat; ANumScores, ANumCases: Integer; out ANumGroups: Integer); var lReport: TStrings; done: Boolean; startAt, endAt: Integer; outline: String; smallest: Double; average: Double; i, j, ID1, ID2: Integer; begin ANumGroups := 0; lReport := TStringList.Create; try if CombinationsChk.Checked then begin lReport.Add('SINGLE LINKAGE CLUSTERING by Bill Miller'); lReport.Add(''); done := false; startAt := 0; endAt := ANumScores; if (endAt > 20) then endAt := 20; while (not done) do begin outline := 'GROUP ID'; for i := startAt to endAt - 1 do outline := outline + Format('%4d', [ASubjectIDs[i]]); lReport.Add(outline); startAt := endAt; if (startAt >= ANumScores) then done := true; endAt := startAt + 20; if (endAt > ANumScores) then endAt := ANumScores; end; end; // Calculate distances and smallest distance repeat smallest := abs(AScores[0] - AScores[1]); // Initial values for i := 0 to ANumScores - 2 do begin for j := i+1 to ANumScores - 1 do begin ADistance[i, j] := abs(AScores[i] - AScores[j]); ADistance[j, i] := ADistance[i, j]; if (ADistance[i, j] <= smallest) then begin smallest := ADistance[i, j]; ID1 := i; ID2 := j; end; end; end; if (ANumGroups < ANumCases-1) then begin if CombinationsChk.Checked then begin lReport.Add(' Group %d is combined with Group %d', [ASubjectIDs[ID1], ASubjectIDs[ID2]]); lReport.Add(''); end; end; // Eliminate second score and replace first with average ANumInGroup[ID1] := ANumInGroup[ID1] + 1; ANumInGroup[ID2] := ANumInGroup[ID2] - 1; AClusters[ANumGroups+1, 1] := ASubjectIDs[ID1]; AClusters[ANumGroups+1, 2] := ASubjectIDs[ID2]; // Record results for this grouping // Set flags for those objects grouped AGroups[ANumGroups, ID1] := 1; AGroups[ANumGroups, ID2] := 1; // Eliminate second score and replace first with average if (ANumGroups < ANumCases-1) then begin average := abs(AScores[ID1] + AScores[ID2]) / 2.0; AScores[ID1] := average; for i := ID2 to ANumScores - 2 do begin AScores[i] := AScores[i+1]; ASubjectIDs[i] := ASubjectIDs[i+1]; end; ANumScores := ANumScores - 1; for i := 0 to ANumScores - 1 do AGroups[ANumGroups, ASubjectIDs[i]] := 1; if CombinationsChk.Checked then begin done := false; startAt := 0; endat := ANumScores; if (endAt > 20) then endAt := 20; while (not done) do begin outline := 'GROUP ID'; for i := startAt to endAt - 1 do outline := outline + Format('%4d', [ASubjectIDs[i]]); lReport.Add(outline); startAt := endAt; if (startAt >= ANumScores) then done := true; endAt := startAt + 20; if (endAt > ANumScores) then endAt := ANumScores; end; end; // Get errors AGroupErrors[ANumGroups] := AGroupErrors[ANumGroups] + ADistance[ID1, ID2]; ANumGroups := ANumGroups + 1; end; until (ANumGroups = ANumCases-1); // Show errors if CombinationsChk.Checked then begin lReport.Add(''); lReport.Add('GROUPING STEP ERROR'); lReport.Add('------------- --------'); for i := 0 to ANumGroups - 1 do lReport.Add('%8d %8.3f', [i+1, AGroupErrors[i]]); FCombinationsReportFrame.DisplayReport(lReport); end; finally lReport.Free; end; end; procedure TSingleLinkForm.ShowStatistics(AVarLabel: String; ANumCases: Integer; AMean, AVariance, AStdDev: Double); var lReport: TStrings; begin lReport := TStringList.Create; try lReport.Add('SINGLE LINKAGE CLUSTERING by Bill Miller'); lReport.Add(''); lReport.Add('File: %-s', [OS3MainFrm.FileNameEdit.Text]); lReport.Add('Variable: %-s', [AVarLabel]); lReport.Add(''); lReport.Add('Number of cases: %12d', [ANumCases]); lReport.Add('Mean: %12.3f', [AMean]); lReport.Add('Variance: %12.3f', [AVariance]); lReport.Add('Std.Dev.: %12.3f', [AStdDev]); FReportFrame.DisplayReport(lReport); finally lReport.Free; end; end; procedure TSingleLinkForm.StandardizeChkChange(Sender: TObject); begin ReplaceChk.Enabled := StandardizeChk.Checked; end; procedure TSingleLinkForm.TreePlot(const Clusters: IntDyneMat; const Lst: IntDyneVec; NoPoints: integer); const STAR: Char = '*'; BLANK: Char = ' '; var lReport: TStrings; outline: array[0..501] of char; aline: array[0..82] of char; valstr: string; tempstr: string; plotline: string; col1, col2, colpos1, colpos2: integer; noparts, startcol, endcol: integer; Results: StrDyneVec = niL; ColPos: IntDyneVec = nil; i, j, k, L, linecount, newcol, howlong, count: integer; begin linecount := 1; SetLength(ColPos, NoPoints+2); SetLength(Results, NoPoints*2+3); // store initial column positions of vertical linkages for i := 1 to NoPoints do ColPos[Lst[i]] := 4 + (i * 5); // create column heading indented 10 spaces tempstr := 'UNIT '; for i := 1 to NoPoints do begin valstr := format('%5d',[Lst[i]]); tempstr := tempstr + valstr; end; Results[linecount] := tempstr; linecount := linecount + 1; // create beginning of vertical linkages plotline := 'STEP '; for i := 1 to NoPoints do plotline := plotline + ' *'; Results[linecount] := plotline; linecount := linecount + 1; // start dendoplot for i := 1 to NoPoints - 1 do begin outline := ''; valstr := Format('%5d', [i]); // put step no. first outline := valstr; // clear remainder of outline for j := 5 to (5 + NoPoints * 5) do outline[j] := ' '; outline[6 + NoPoints * 5] := #0; col1 := Clusters[i,1]; col2 := Clusters[i,2]; // find column positions for each variable colpos1 := ColPos[col1]; colpos2 := ColPos[col2]; for k := colpos1 to colpos2 do outline[k] := star; // change column positions 1/2 way between the matched ones newcol := colpos1 + ((colpos2 - colpos1) div 2); for k := 1 to NoPoints do if ((ColPos[k] = colpos1) or (ColPos[k] = colpos2)) then ColPos[k] := newcol; for k := 1 to NoPoints do begin L := ColPos[k]; if ((L <> colpos1) and (L <> colpos2)) then outline[L] := star; end; Results[linecount] := outline; linecount := linecount + 1; // add a line of connectors to next grouping outline := ' '; for j := 5 to (5 + NoPoints * 5) do outline[j] := blank; for j := 1 to NoPoints do begin colpos1 := ColPos[j]; outline[colpos1] := star; end; Results[linecount] := outline; linecount := linecount + 1; end; // output the Results in parts // determine number of pages needed for whole plot lReport := TStringList.Create; try lReport.Add('SINGLE LINKAGE CLUSTERING by Bill Miller'); lReport.Add(''); noparts := 0; howlong := Length(Results[1]); noparts := round(howlong / 80.0); if (noparts <= 0) then noparts := 1; if (noparts = 1) then // simply print the list for i := 0 to linecount - 1 do lReport.Add(Results[i]) else // break lines into strings of 15 units begin startcol := 0; endcol := 80; for i := 1 to noparts do begin lReport.Add('PART %d OUTPUT', [i]); for j := 0 to 80 do aline[j] := blank; for j := 0 to linecount - 1 do begin count := 0; outline := Results[j]; for k := startcol to endcol do begin aline[count] := outline[k]; count := count + 1; end; aline[count+1] := #0; lReport.Add(aline); end; lReport.Add(''); startcol := endcol + 1; endcol := endcol + 80; if (endcol > howlong) then endcol := howlong; end; end; FDendogramReportFrame.DisplayReport(lReport); finally lReport.Free; end; end; procedure TSingleLinkForm.UpdateBtnStates; begin inherited; if FCombinationsReportFrame <> nil then FCombinationsReportFrame.UpdateBtnStates; if FDendogramReportFrame <> nil then FDendogramReportFrame.UpdateBtnStates; VarInBtn.Enabled := (VarList.ItemIndex > -1) and (VarSelEdit.Text = ''); VarOutBtn.Enabled := (VarSelEdit.Text <> ''); end; function TSingleLinkForm.Validate(out AMsg: String; out AControl: TWinControl): Boolean; begin Result := false; if VarSelEdit.Text = '' then begin AMsg := 'No variable selected for analysis.'; AControl := VarList; exit; end; Result := true; end; procedure TSingleLinkForm.VarInBtnClick(Sender: TObject); var index: integer; begin index := VarList.ItemIndex; if (index > -1) and (VarSelEdit.Text = '') then begin VarSelEdit.Text := VarList.Items.Strings[index]; VarList.Items.Delete(index); end; UpdateBtnStates; end; procedure TSingleLinkForm.VarOutBtnClick(Sender: TObject); begin if VarSelEdit.Text <> '' then begin VarList.Items.Add(VarSelEdit.Text); VarSelEdit.Text := ''; end; UpdateBtnStates; end; procedure TSingleLinkForm.VarListDblClick(Sender: TObject); var index: Integer; begin index := VarList.ItemIndex; if index > -1 then begin if VarSelEdit.Text = '' then begin VarSelEdit.Text := VarList.Items[index]; VarList.Items.Delete(index); UpdateBtnStates; end; end; end; procedure TSingleLinkForm.VarListSelectionChange(Sender: TObject; User: boolean); begin UpdateBtnStates; end; end.