// Sample file for testing: cansas.laz, use all variables. unit HierarchUnit; {$mode objfpc}{$H+} {$WARN 6058 off : Call to subroutine "$1" marked as inline is not inlined} interface uses Classes, SysUtils, Forms, Controls, Graphics, StdCtrls, Buttons, ExtCtrls, ComCtrls, MainUnit, Globals, MatrixLib, ReportFrameUnit, BasicStatsReportAndChartFormUnit; type { THierarchForm } THierarchForm = class(TBasicStatsReportAndChartForm) MaxGrps: TEdit; STDChk: TCheckBox; ReplaceChk: TCheckBox; StatsChk: TCheckBox; PlotChk: TCheckBox; MaxGrpsChk: TCheckBox; MembersChk: TCheckBox; StatsPage: TTabSheet; VarChk: TCheckBox; OptionsGroup: TGroupBox; PredIn: TBitBtn; PredOut: TBitBtn; Label1: TLabel; Label2: TLabel; PredList: TListBox; VarList: TListBox; procedure PredInClick(Sender: TObject); procedure PredListDblClick(Sender: TObject); procedure PredOutClick(Sender: TObject); procedure VarListDblClick(Sender: TObject); procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean); private FStatsReportFrame: TReportFrame; procedure Plot_GroupCount_Error(const AGrpCount, AError: DblDyneVec; ADataCount: Integer); procedure ShowDescriptiveStats(const AMeans, AVars, AStdDevs: DblDyneVec; ANumCols, ANumCases: Integer; const AVarLabels: StrDyneVec); protected procedure AdjustConstraints; override; procedure Compute; override; procedure UpdateBtnStates; override; function Validate(out AMsg: String; out AControl: TWincontrol): Boolean; override; public constructor Create(AOwner: TComponent); override; procedure Reset; override; end; var HierarchForm: THierarchForm; implementation {$R *.lfm} uses TAChartUtils, TACustomSeries, Utils, GridProcs, ChartFrameUnit; { THierarchForm } constructor THierarchForm.Create(AOwner: TComponent); begin inherited; InitToolbar(FReportFrame.ReportToolbar, tpTop); FReportFrame.ClearBorderSpacings; FStatsReportFrame := TReportFrame.Create(self); FStatsReportFrame.Parent := StatsPage; FStatsReportFrame.Align := alClient; StatsPage.PageIndex := 1; PageControl.ActivePageIndex := 0; end; procedure THierarchForm.AdjustConstraints; begin inherited; ParamsPanel.Constraints.MinWidth := 4*CloseBtn.Width + 3*CloseBtn.BorderSpacing.Left; ParamsPanel.Constraints.MinHeight := PredOut.Top + PredOut.Height + VarList.BorderSpacing.Bottom + OptionsGroup.Height + ButtonBevel.Height + CloseBtn.Height; end; procedure THierarchForm.Compute; //label next1; var varLabels: StrDyneVec = nil; rowLabels: StrDyneVec = nil; w2: IntDyneVec = nil; k4: IntDyneVec = nil; k5: IntDyneVec = nil; L1: IntDyneVec = nil; ColSelected: IntDyneVec = nil; W: DblDyneVec = nil; XAxis: DblDyneVec = nil; YAxis: DblDyneVec = nil; means: DblDyneVec = nil; variances: DblDyneVec = nil; stddevs: DblDyneVec = nil; Distance : DblDyneMat = nil; i, j, k, k1, k3, L, w3, n3, n4, n5, M, col, count: integer; GrpCnt, Nrows, Ncols, NoSelected: integer; X, Y, d1, x1, MaxError: double; lReport: TStrings; begin MaxError := 0.0; GrpCnt := 0; NoSelected := PredList.Items.Count; if not VarChk.Checked then begin SetLength(w2,NoCases); SetLength(k4,NoCases); SetLength(k5,NoCases); SetLength(L1,NoCases); SetLength(W,NoSelected); SetLength(XAxis,NoCases); SetLength(YAxis,NoCases); SetLength(means,NoSelected); SetLength(variances,NoSelected); SetLength(stddevs,NoSelected); SetLength(Distance,NoCases,NoCases); SetLength(varlabels,NoSelected); SetLength(rowlabels,NoCases); SetLength(ColSelected,NoSelected); nCols := NoSelected; nRows := NoCases; for i := 0 to nCols-1 do // nCols = NoSelected! begin varLabels[i] := PredList.Items[i]; colSelected[i] := GetVariableIndex(OS3MainFrm.DataGrid, varLabels[i]); end; for i := 0 to NoCases-1 do rowLabels[i] := OS3MainFrm.DataGrid.Cells[0, i+1]; //IntToStr(i); end else begin SetLength(w2,NoSelected); SetLength(k4,NoSelected); SetLength(k5,NoSelected); SetLength(L1,NoSelected); SetLength(W,NoCases); SetLength(XAxis,NoSelected); SetLength(YAxis,NoSelected); SetLength(means,NoCases); SetLength(variances,NoCases); SetLength(stddevs,NoCases); SetLength(Distance,NoSelected,NoCases); SetLength(varlabels,NoCases); SetLength(rowlabels,NoSelected); SetLength(ColSelected,NoSelected); nCols := NoCases; nRows := NoSelected; //Get labels of selected variables for i := 0 to nRows - 1 do // nRows = NoSelected! begin rowLabels[i] := PredList.Items[i]; colSelected[i] := GetVariableIndex(OS3MainFrm.DataGrid, rowLabels[i]); end; for i := 0 to NoCases-1 do varlabels[i] := OS3MainFrm.DataGrid.Cells[0, i+1]; //IntToStr(i); end; if MaxGrpsChk.Checked then k1 := StrToInt(MaxGrps.Text); if MembersChk.Checked then k3 := 1 else k3 := 0; for j := 0 to nCols-1 do begin means[j] := 0.0; variances[j] := 0.0; stddevs[j] := 0.0; end; if not VarChk.Checked then begin // Get labels of rows // for i := 1 to Nrows do rowlabels[i-1] := MainFrm.Grid.Cells[0,i]; // Get data into the distance matrix count := 0; for i := 1 to nRows do begin if (not GoodRecord(OS3MainFrm.DataGrid, i, ColSelected)) then continue; count := count + 1; for j := 1 to Ncols do begin col := ColSelected[j-1]; X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[col,i])); means[j-1] := means[j-1] + X; variances[j-1] := variances[j-1] + (X * X); Distance[i-1,j-1] := X; end; end; end else begin // cluster variables // Get labels of columns // for i := 1 to Nrows do rowlabels[i-1] := MainFrm.Grid.Cells[i,0]; // Get data into the distance matrix count := 0; for i := 0 to nRows-1 do // actually grid column in this case begin // if (not GoodRecord(i,NoSelected,ColSelected)) then continue; count := count + 1; col := ColSelected[i]; for j := 0 to Ncols-1 do // actually grid rows in this case begin // if (not GoodRecord(j,NoSelected,ColSelected)) then continue; X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[col, j+1])); means[j] := means[j] + X; variances[j] := variances[j] + (X * X); Distance[i, j] := X; end; end; end; // Calculate means and standard deviations of variables for j := 0 to nCols-1 do begin variances[j] := (variances[j] - sqr(means[j]) / count) / (count - 1); stddevs[j] := sqrt(variances[j]); means[j] := means[j] / count; end; // Report descriptive statistics if StatsChk.Checked then begin StatsPage.TabVisible := true; ShowDescriptiveStats(means, variances, stddevs, nCols, count, varlabels); end else StatsPage.TabVisible := false; // Ready the output form lReport := TStringList.Create; try lReport.Add('HIERARCHICAL CLUSTER ANALYSIS'); lReport.Add(''); lReport.Add('Number of objects to cluster: %d on %d variables.', [Nrows, Ncols]); lReport.Add(''); // Standardize the distance scores if elected if StdChk.Checked then begin for j := 0 to nCols-1 do for i := 0 to nRows-1 do Distance[i,j] := (Distance[i,j] - means[j]) / stddevs[j]; end; // replace original values in grid with z scores if elected if ReplaceChk.Checked then begin for i := 0 to nRows-1 do begin if not GoodRecord(OS3MainFrm.DataGrid, i+1, ColSelected) then continue; for j := 0 to nCols-1 do begin col := ColSelected[j]; OS3MainFrm.DataGrid.Cells[col, i+1] := Format('%6.4f', [Distance[i, j]]); end; end; end; // Convert data matrix to initial matrix of error potentials for i := 1 to nRows do begin // if (not GoodRecord(i,NoSelected,ColSelected)) then continue; for j := 0 to nCols-1 do W[j] := Distance[i-1, j]; for j := i to nRows do begin // if (not GoodRecord(i,NoSelected,ColSelected)) then continue; Distance[i-1,j-1] := 0.0; for k := 1 to Ncols do Distance[i-1,j-1] := Distance[i-1,j-1] + (Distance[j-1,k-1] - W[k-1]) * (Distance[j-1,k-1] - W[k-1]); Distance[i-1,j-1] := Distance[i-1,j-1] / 2.0; end; end; for i := 1 to nRows do for j := i to nRows do Distance[j-1,i-1] := 0.0; // Now, group the cases for maximum groups down if MaxGrpsChk.Checked then k1 := StrToInt(MaxGrps.Text) else k1 := 2; n3 := nRows; // Initialize group membership and group-n vectors for i := 0 to Nrows-1 do begin k4[i] := i+1; k5[i] := i+1; w2[i] := 1; end; // Locate optimal combination, if more than 2 groups remain { next1: n3 := n3 - 1; if (n3 > 1) then begin } repeat; n3 := n3 - 1; x1 := MAX_FLOAT; for i := 1 to nRows do begin if (k5[i-1] = i) then begin for j := i to nRows do begin if ((i <> j) and (k5[j-1] = j)) then begin d1 := Distance[i-1,j-1] - Distance[i-1,i-1] - Distance[j-1,j-1]; if (d1 < x1) then begin x1 := d1; L := i; M := j; end; // end if end; // end if end; // next j end; // end if end; // next i n4 := w2[L-1]; n5 := w2[M-1]; XAxis[GrpCnt] := n3; YAxis[GrpCnt] := x1; GrpCnt := GrpCnt + 1; if (x1 > MaxError) then MaxError := x1; lReport.Add('%2.d groups after combining group %2.d (n = %2.d) and group %2.d (n = %2.d), error: %7.3f', [n3, L, n4, M, n5, x1]); w3 := w2[L-1] + w2[M-1]; x1 := Distance[L-1,M-1] * w3; Y := Distance[L-1,L-1] * w2[L-1] + Distance[M-1,M-1] * w2[M-1]; Distance[L-1,L-1] := Distance[L-1,M-1]; for i := 0 to nRows-1 do if (k5[i] = M) then k5[i] := L; for i := 1 to nRows do begin if ((i <> L) and (k5[i-1] = i)) then begin if (i <= L) then begin Distance[i-1,L-1] := Distance[i-1,L-1] * (w2[i-1] + w2[L-1]) + Distance[i-1,M-1] * (w2[i-1] + w2[M-1]) + x1 - Y - Distance[i-1,i-1] * w2[i-1]; Distance[i-1,L-1] := Distance[i-1,L-1] / (w2[i-1] + w3); end else begin Distance[L-1,i-1] := Distance[L-1,i-1] * (w2[L-1] + w2[i-1]) + (Distance[M-1,i-1] + Distance[i-1,M-1]) * (w2[M-1] + w2[i-1]); Distance[L-1,i-1] := (Distance[L-1,i-1]+ x1 - Y - Distance[i-1,i-1] * w2[i-1]) / (w2[i-1] + w3); end; end; end; w2[L-1] := w3; if (n3 > k1) then Continue; // Print group memberships of all objects, if optioned if MembersChk.Checked then begin lReport.Add(''); for i := 1 to nRows do begin if (k5[i-1] = i) then begin L := 0; for j := 1 to nRows do begin if (k5[j-1] = i) then begin L := L + 1; L1[L-1] := k4[j-1]; if k3 = 1 then L1[L-1] := j; end; end; if k3 = 1 then begin lReport.Add('Group %d (n = %d)', [i, L]); for j := 1 to L do lReport.Add(' Object: %s', [rowLabels[L1[j-1]-1]]); end; // end if end; // end if end; // next i lReport.Add(''); end; //goto next1; until n3 = 2; //end; // end if FReportFrame.DisplayReport(lReport); if PlotChk.Checked then begin ChartPage.TabVisible := true; Plot_GroupCount_Error(XAxis, YAxis, GrpCnt); end else ChartPage.TabVisible := false; finally lReport.Free; end; end; procedure THierarchForm.Plot_GroupCount_Error(const AGrpCount, AError: DblDyneVec; ADataCount: Integer); var ser: TChartSeries; i: Integer; begin FChartFrame.Clear; FChartFrame.SetTitle('Number of Groups vs. Grouping Error'); FChartFrame.SetXTitle('Number of Groups'); FChartFrame.SetYTitle('Grouping Error'); ser := FChartFrame.PlotXY(ptSymbols, nil, nil, nil, nil, '', DATA_COLORS[0]); for i := 0 to ADataCount-1 do ser.AddXY(i, AError[i], Format('%.0f', [AGrpCount[i]])); FChartFrame.Chart.BottomAxis.Marks.Source := ser.Source; FChartFrame.Chart.BottomAxis.Marks.Style := smsLabel; FChartFrame.Chart.Legend.Visible := false; end; procedure THierarchForm.PredInClick(Sender: TObject); var i: integer; begin i := 0; while i < VarList.Items.Count do begin if VarList.Selected[i] then begin PredList.Items.Add(VarList.Items[i]); VarList.Items.Delete(i); i := 0; end else i := i + 1; end; UpdateBtnStates; end; procedure THierarchForm.PredListDblClick(Sender: TObject); var index: Integer; begin index := PredList.ItemIndex; if index > -1 then begin VarList.Items.Add(PredList.Items[index]); PredList.Items.Delete(index); UpdateBtnStates; end; end; procedure THierarchForm.PredOutClick(Sender: TObject); var i: integer; begin i := 0; while i < PredList.Items.Count do begin if PredList.Selected[i] then begin VarList.Items.Add(PredList.Items[i]); PredList.Items.Delete(i); i := 0; end else i := i + 1; end; UpdateBtnStates; end; procedure THierarchForm.Reset; var i: integer; begin inherited; if FStatsReportFrame <> nil then FStatsReportFrame.Clear; VarList.Clear; for i := 1 to NoVariables do VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); PredList.Clear; StdChk.Checked := false; ReplaceChk.Checked := false; StatsChk.Checked := false; PlotChk.Checked := false; MaxGrpsChk.Checked := false; VarChk.Checked := false; MaxGrps.Clear; UpdateBtnStates; end; procedure THierarchForm.ShowDescriptiveStats(const AMeans, AVars, AStdDevs: DblDyneVec; ANumCols, ANumCases: Integer; const AVarLabels: StrDyneVec); var lReport: TStrings; begin lReport := TStringList.Create; try DynVectorPrint(AMeans, ANumCols, 'Variable Means', AVarLabels, ANumCases, lReport); lReport.Add(DIVIDER_SMALL_AUTO); lReport.Add(''); DynVectorPrint(AVars, ANumCols, 'Variable Variances', AVarLabels, ANumCases, lReport); lReport.Add(DIVIDER_SMALL_AUTO); lReport.Add(''); DynVectorPrint(AStdDevs, ANumCols, 'Variable Standard Deviations', AVarLabels, ANumCases, lReport); FStatsReportFrame.DisplayReport(lReport); finally lReport.Free; end; end; procedure THierarchForm.UpdateBtnStates; begin inherited; if FStatsReportFrame <> nil then FStatsReportFrame.UpdateBtnStates; PredIn.Enabled := AnySelected(VarList); PredOut.Enabled := AnySelected(PredList); end; function THierarchForm.Validate(out AMsg: String; out AControl: TWincontrol): Boolean; var n: Integer; begin Result := false; if PredList.Items.Count = 0 then begin AMsg := 'No Predictor Variables selected.'; AControl := VarList; exit; end; if MaxGrpsChk.Checked then begin if MaxGrps.Text = '' then begin AMsg := 'Maximum number of groups not specified.'; AControl := MaxGrps; exit; end; if not TryStrToInt(MaxGrps.Text, n) or (n < 1) then begin AMsg := 'No valid number of groups given.'; AControl := MaxGrps; exit; end; end; Result := true; end; procedure THierarchForm.VarListDblClick(Sender: TObject); var index: Integer; begin index := VarList.ItemIndex; if index > -1 then begin PredList.Items.Add(VarList.Items[index]); VarList.Items.Delete(index); UpdateBtnStates; end; end; procedure THierarchForm.VarListSelectionChange(Sender: TObject; User: boolean); begin UpdateBtnStates; end; end.