// Sample file for testing: cansas.laz, use all variiables. unit HierarchUnit; {$mode objfpc}{$H+} interface uses Classes, SysUtils, FileUtil, LResources, Forms, Controls, Graphics, Dialogs, StdCtrls, Buttons, ExtCtrls, MainUnit, OutputUnit, Globals, MatrixLib, GraphLib, DataProcs; type { THierarchFrm } THierarchFrm = class(TForm) Bevel1: TBevel; ResetBtn: TButton; ComputeBtn: TButton; CloseBtn: TButton; MaxGrps: TEdit; STDChk: TCheckBox; ReplaceChk: TCheckBox; StatsChk: TCheckBox; PlotChk: TCheckBox; MaxGrpsChk: TCheckBox; MembersChk: TCheckBox; VarChk: TCheckBox; GroupBox1: TGroupBox; PredIn: TBitBtn; PredOut: TBitBtn; Label1: TLabel; Label2: TLabel; PredList: TListBox; VarList: TListBox; procedure ComputeBtnClick(Sender: TObject); procedure FormActivate(Sender: TObject); procedure FormCreate(Sender: TObject); procedure FormShow(Sender: TObject); procedure PredInClick(Sender: TObject); procedure PredOutClick(Sender: TObject); procedure ResetBtnClick(Sender: TObject); procedure VarListSelectionChange(Sender: TObject; User: boolean); private { private declarations } FAutoSized: Boolean; procedure UpdateBtnStates; public { public declarations } end; var HierarchFrm: THierarchFrm; implementation uses Math, Utils; { THierarchFrm } procedure THierarchFrm.ResetBtnClick(Sender: TObject); var i: integer; begin VarList.Clear; PredList.Clear; StdChk.Checked := false; ReplaceChk.Checked := false; StatsChk.Checked := false; PlotChk.Checked := false; MaxGrpsChk.Checked := false; VarChk.Checked := false; MaxGrps.Text := ''; for i := 1 to NoVariables do VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); UpdateBtnStates; end; procedure THierarchFrm.FormActivate(Sender: TObject); var w: Integer; begin if FAutoSized then exit; w := MaxValue([ResetBtn.Width, ComputeBtn.Width, CloseBtn.Width]); ResetBtn.Constraints.MinWidth := w; ComputeBtn.Constraints.MinWidth := w; CloseBtn.Constraints.MinWidth := w; VarList.Constraints.MinWidth := PredList.Width; Constraints.MinWidth := Width; Constraints.MinHeight := Height; FAutoSized := true; end; procedure THierarchFrm.FormCreate(Sender: TObject); begin Assert(OS3MainFrm <> nil); if GraphFrm = nil then Application.CreateForm(TGraphFrm, GraphFrm); end; procedure THierarchFrm.FormShow(Sender: TObject); begin ResetBtnClick(self); end; procedure THierarchFrm.ComputeBtnClick(Sender: TObject); label next1; var varlabels, rowlabels : StrDyneVec; cellstring : string; i, j, k, k1, k3, L, w3, n3, n4, n5, M, col, count: integer; GrpCnt, Nrows, Ncols, NoSelected: integer; w2, k4, k5, L1 : IntDyneVec; ColSelected : IntDyneVec; X, Y, d1, x1, MaxError : double; W, XAxis, YAxis, means, variances, stddevs : DblDyneVec; Distance : DblDyneMat; lReport: TStrings; begin if MaxGrpsChk.Checked then begin if MaxGrps.Text = '' then begin MessageDlg('Maximum number of groups not specified.', mtError, [mbOK], 0); exit; end; if not TryStrToInt(MaxGrps.Text, k1) or (k1 < 1) then begin Messagedlg('No valid number of groups given.', mtError, [mbOK], 0); exit; end; end; MaxError := 0.0; GrpCnt := 0; NoSelected := PredList.Items.Count; if not VarChk.Checked then begin SetLength(w2,NoCases); SetLength(k4,NoCases); SetLength(k5,NoCases); SetLength(L1,NoCases); SetLength(W,NoSelected); SetLength(XAxis,NoCases); SetLength(YAxis,NoCases); SetLength(means,NoSelected); SetLength(variances,NoSelected); SetLength(stddevs,NoSelected); SetLength(Distance,NoCases,NoCases); SetLength(varlabels,NoSelected); SetLength(rowlabels,NoCases); SetLength(ColSelected,NoSelected); Ncols := NoSelected; Nrows := NoCases; for i := 0 to Ncols - 1 do begin cellstring := PredList.Items.Strings[i]; for j := 1 to NoVariables do begin if (cellstring = OS3MainFrm.DataGrid.Cells[j,0]) then begin varlabels[i] := cellstring; ColSelected[i] := j; end; end; end; for i := 0 to NoCases-1 do rowlabels[i] := IntToStr(i); end else begin SetLength(w2,NoSelected); SetLength(k4,NoSelected); SetLength(k5,NoSelected); SetLength(L1,NoSelected); SetLength(W,NoCases); SetLength(XAxis,NoSelected); SetLength(YAxis,NoSelected); SetLength(means,NoCases); SetLength(variances,NoCases); SetLength(stddevs,NoCases); SetLength(Distance,NoSelected,NoCases); SetLength(varlabels,NoCases); SetLength(rowlabels,NoSelected); SetLength(ColSelected,NoSelected); Ncols := NoCases; Nrows := NoSelected; //Get labels of selected variables for i := 0 to Nrows - 1 do begin cellstring := PredList.Items.Strings[i]; for j := 1 to NoVariables do begin if (cellstring = OS3MainFrm.DataGrid.Cells[j,0]) then begin ColSelected[i] := j; rowlabels[i] := cellstring; end; end; end; for i := 0 to NoCases-1 do varlabels[i] := IntToStr(i); end; if MembersChk.Checked then k3 := 1 else k3 := 0; for j := 0 to Ncols-1 do begin means[j] := 0.0; variances[j] := 0.0; stddevs[j] := 0.0; end; if VarChk.Checked = false then begin // Get labels of rows // for i := 1 to Nrows do rowlabels[i-1] := MainFrm.Grid.Cells[0,i]; // Get data into the distance matrix count := 0; for i := 1 to Nrows do begin if (not GoodRecord(i,NoSelected,ColSelected)) then continue; count := count + 1; for j := 1 to Ncols do begin col := ColSelected[j-1]; X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[col,i])); means[j-1] := means[j-1] + X; variances[j-1] := variances[j-1] + (X * X); Distance[i-1,j-1] := X; end; end; end else begin // cluster variables // Get labels of columns // for i := 1 to Nrows do rowlabels[i-1] := MainFrm.Grid.Cells[i,0]; // Get data into the distance matrix count := 0; for i := 1 to Nrows do // actually grid column in this case begin // if (not GoodRecord(i,NoSelected,ColSelected)) then continue; count := count + 1; for j := 1 to Ncols do // actually grid rows in this case begin // if (not GoodRecord(j,NoSelected,ColSelected)) then continue; col := ColSelected[i-1]; X := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[col,j])); means[j-1] := means[j-1] + X; variances[j-1] := variances[j-1] + (X * X); Distance[i-1,j-1] := X; end; end; end; // Calculate means and standard deviations of variables for j := 0 to Ncols-1 do begin variances[j] := variances[j] - (means[j] * means[j] / count); variances[j] := variances[j] / (count - 1); stddevs[j] := sqrt(variances[j]); means[j] := means[j] / count; end; // Ready the output form lReport := TStringList.Create; try lReport.Add('HIERARCHICAL CLUSTER ANALYSIS'); lReport.Add(''); lReport.Add('Number of objects to cluster: %d on %d variables.', [Nrows, Ncols]); lReport.Add(''); if StatsChk.Checked then begin DynVectorPrint(means, Ncols, 'Variable Means', varlabels, count, lReport); DynVectorPrint(variances, Ncols, 'Variable Variances', varlabels, count, lReport); DynVectorPrint(stddevs, Ncols, 'Variable Standard Deviations', varlabels, count, lReport); lReport.Add(DIVIDER); lReport.Add(''); end; // Standardize the distance scores if elected if StdChk.Checked then begin for j := 0 to Ncols-1 do for i := 0 to Nrows-1 do Distance[i,j] := (Distance[i,j] - means[j]) / stddevs[j]; end; // replace original values in grid with z scores if elected if ReplaceChk.Checked then begin for i := 1 to Nrows do begin if not GoodRecord(i,NoSelected,ColSelected) then continue; for j := 1 to Ncols do begin col := ColSelected[j-1]; OS3MainFrm.DataGrid.Cells[col,i] := Format('%6.4f', [Distance[i-1,j-1]]); end; end; end; // Convert data matrix to initial matrix of error potentials for i := 1 to Nrows do begin // if (not GoodRecord(i,NoSelected,ColSelected)) then continue; for j := 1 to Ncols do W[j-1] := Distance[i-1,j-1]; for j := i to Nrows do begin // if (not GoodRecord(i,NoSelected,ColSelected)) then continue; Distance[i-1,j-1] := 0.0; for k := 1 to Ncols do Distance[i-1,j-1] := Distance[i-1,j-1] + (Distance[j-1,k-1] - W[k-1]) * (Distance[j-1,k-1] - W[k-1]); Distance[i-1,j-1] := Distance[i-1,j-1] / 2.0; end; end; for i := 1 to Nrows do for j := i to Nrows do Distance[j-1,i-1] := 0.0; // Now, group the cases for maximum groups down if MaxGrpsChk.Checked then begin k1 := StrToInt(MaxGrps.Text); n3 := Nrows; end else begin k1 := 2; n3 := Nrows; end; // Initialize group membership and group-n vectors for i := 0 to Nrows-1 do begin k4[i] := i+1; k5[i] := i+1; w2[i] := 1; end; // Locate optimal combination, if more than 2 groups remain next1: n3 := n3 - 1; if (n3 > 1) then begin x1 := 100000000000.0; for i := 1 to Nrows do begin if (k5[i-1] = i) then begin for j := i to Nrows do begin if ((i <> j) and (k5[j-1] = j)) then begin d1 := Distance[i-1,j-1] - Distance[i-1,i-1] - Distance[j-1,j-1]; if (d1 < x1) then begin x1 := d1; L := i; M := j; end; // end if end; // end if end; // next j end; // end if end; // next i n4 := w2[L-1]; n5 := w2[M-1]; GrpCnt := GrpCnt + 1; XAxis[GrpCnt-1] := n3; YAxis[GrpCnt-1] := x1; if (x1 > MaxError) then MaxError := x1; lReport.Add('%2.d groups after combining group %2.d (n = %2.d) and group %2.d (n = %2.d), error: %7.3f', [n3, L, n4, M, n5, x1]); w3 := w2[L-1] + w2[M-1]; x1 := Distance[L-1,M-1] * w3; Y := Distance[L-1,L-1] * w2[L-1] + Distance[M-1,M-1] * w2[M-1]; Distance[L-1,L-1] := Distance[L-1,M-1]; for i := 1 to Nrows do if (k5[i-1] = M) then k5[i-1] := L; for i := 1 to Nrows do begin if ((i <> L) and (k5[i-1] = i)) then begin if (i <= L) then begin Distance[i-1,L-1] := Distance[i-1,L-1] * (w2[i-1] + w2[L-1]) + Distance[i-1,M-1] * (w2[i-1] + w2[M-1]) + x1 - Y - Distance[i-1,i-1] * w2[i-1]; Distance[i-1,L-1] := Distance[i-1,L-1] / (w2[i-1] + w3); end else begin Distance[L-1,i-1] := Distance[L-1,i-1] * (w2[L-1] + w2[i-1]) + (Distance[M-1,i-1] + Distance[i-1,M-1]) * (w2[M-1] + w2[i-1]); Distance[L-1,i-1] := (Distance[L-1,i-1]+ x1 - Y - Distance[i-1,i-1] * w2[i-1]) / (w2[i-1] + w3); end; end; end; w2[L-1] := w3; if (n3 > k1) then goto next1; // print group memberships of all objects, if optioned for i := 1 to Nrows do begin if (k5[i-1] = i) then begin L := 0; for j := 1 to Nrows do begin if (k5[j-1] = i) then begin L := L + 1; L1[L-1] := k4[j-1]; if k3 = 1 then L1[L-1] := j; end; end; if k3 = 1 then begin lReport.Add('Group %d (n = %d)', [i, L]); for j := 1 to L do lReport.Add(' Object: %s', [rowlabels[L1[j-1]-1]]); end; // end if end; // end if end; // next i goto next1; end; // end if DisplayReport(lReport); if PlotChk.Checked then begin SetLength(GraphFrm.Ypoints,1,GrpCnt); SetLength(GraphFrm.Xpoints,1,GrpCnt); for i := 1 to GrpCnt do begin GraphFrm.Ypoints[0,i-1] := YAxis[i-1]; GraphFrm.Xpoints[0,i-1] := XAxis[i-1]; end; GraphFrm.nosets := 1; GraphFrm.nbars := GrpCnt; GraphFrm.Heading := 'NO. GROUPS VERSUS GROUPING ERROR'; GraphFrm.XTitle := 'NO. GROUPS'; GraphFrm.YTitle := 'ERROR'; // GraphFrm.Ypoints[1] := YAxis; // GraphFrm.Xpoints[1] := XAxis; GraphFrm.AutoScaled := true; GraphFrm.PtLabels := false; GraphFrm.GraphType := 7; // 2d points GraphFrm.BackColor := clCream; GraphFrm.ShowBackWall := true; GraphFrm.ShowModal; end; finally lReport.Free; ColSelected := nil; rowlabels := nil; varlabels := nil; Distance := nil; stddevs := nil; variances := nil; means := nil; YAxis := nil; XAxis := nil; W := nil; L1 := nil; k5 := nil; k4 := nil; w2 := nil; GraphFrm.Xpoints := nil; GraphFrm.Ypoints := nil; end; end; procedure THierarchFrm.PredInClick(Sender: TObject); var i: integer; begin i := 0; while i < VarList.Items.Count do begin if VarList.Selected[i] then begin PredList.Items.Add(VarList.Items[i]); VarList.Items.Delete(i); i := 0; end else i := i + 1; end; UpdateBtnStates; end; procedure THierarchFrm.PredOutClick(Sender: TObject); var i: integer; begin i := 0; while i < PredList.Items.Count do begin if PredList.Selected[i] then begin VarList.Items.Add(PredList.Items[i]); PredList.Items.Delete(i); i := 0; end else i := i + 1; end; UpdateBtnStates; end; procedure THierarchFrm.UpdateBtnStates; begin PredIn.Enabled := AnySelected(VarList); PredOut.Enabled := AnySelected(PredList); end; procedure THierarchFrm.VarListSelectionChange(Sender: TObject; User: boolean); begin UpdateBtnStates; end; initialization {$I hierarchunit.lrs} end.