diff --git a/applications/lazstats/docs/HelpNDoc/LazStats.hnd b/applications/lazstats/docs/HelpNDoc/LazStats.hnd index e7298fe46..9c749bbe5 100644 Binary files a/applications/lazstats/docs/HelpNDoc/LazStats.hnd and b/applications/lazstats/docs/HelpNDoc/LazStats.hnd differ diff --git a/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.lfm b/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.lfm index b163ca5b5..133a89b43 100644 --- a/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.lfm +++ b/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.lfm @@ -2,21 +2,21 @@ object KMeansFrm: TKMeansFrm Left = 664 Height = 349 Top = 318 - Width = 407 + Width = 422 AutoSize = True Caption = 'k Means Clustering ' ClientHeight = 349 - ClientWidth = 407 + ClientWidth = 422 OnActivate = FormActivate OnCreate = FormCreate OnShow = FormShow Position = poMainFormCenter LCLVersion = '2.1.0.0' object ResetBtn: TButton - AnchorSideRight.Control = CancelBtn + AnchorSideRight.Control = ComputeBtn AnchorSideBottom.Control = Owner AnchorSideBottom.Side = asrBottom - Left = 106 + Left = 201 Height = 25 Top = 316 Width = 54 @@ -30,29 +30,11 @@ object KMeansFrm: TKMeansFrm OnClick = ResetBtnClick TabOrder = 3 end - object CancelBtn: TButton - AnchorSideRight.Control = ComputeBtn - AnchorSideBottom.Control = Owner - AnchorSideBottom.Side = asrBottom - Left = 172 - Height = 25 - Top = 316 - Width = 62 - Anchors = [akRight, akBottom] - AutoSize = True - BorderSpacing.Left = 12 - BorderSpacing.Top = 8 - BorderSpacing.Right = 12 - BorderSpacing.Bottom = 8 - Caption = 'Cancel' - ModalResult = 2 - TabOrder = 4 - end object ComputeBtn: TButton - AnchorSideRight.Control = ReturnBtn + AnchorSideRight.Control = CloseBtn AnchorSideBottom.Control = Owner AnchorSideBottom.Side = asrBottom - Left = 246 + Left = 267 Height = 25 Top = 316 Width = 76 @@ -64,33 +46,33 @@ object KMeansFrm: TKMeansFrm BorderSpacing.Bottom = 8 Caption = 'Compute' OnClick = ComputeBtnClick - TabOrder = 5 + TabOrder = 4 end - object ReturnBtn: TButton + object CloseBtn: TButton AnchorSideRight.Control = Owner AnchorSideRight.Side = asrBottom AnchorSideBottom.Control = Owner AnchorSideBottom.Side = asrBottom - Left = 334 + Left = 355 Height = 25 Top = 316 - Width = 61 + Width = 55 Anchors = [akRight, akBottom] AutoSize = True BorderSpacing.Left = 12 BorderSpacing.Top = 8 BorderSpacing.Right = 12 BorderSpacing.Bottom = 8 - Caption = 'Return' - ModalResult = 1 - TabOrder = 6 + Caption = 'Close' + ModalResult = 11 + TabOrder = 5 end object HelpBtn: TButton Tag = 129 AnchorSideRight.Control = ResetBtn AnchorSideBottom.Control = Owner AnchorSideBottom.Side = asrBottom - Left = 43 + Left = 138 Height = 25 Top = 316 Width = 51 @@ -108,11 +90,11 @@ object KMeansFrm: TKMeansFrm AnchorSideLeft.Control = Owner AnchorSideRight.Control = Owner AnchorSideRight.Side = asrBottom - AnchorSideBottom.Control = ReturnBtn + AnchorSideBottom.Control = CloseBtn Left = 0 Height = 8 Top = 300 - Width = 407 + Width = 422 Anchors = [akLeft, akRight, akBottom] Shape = bsBottomLine end @@ -121,16 +103,17 @@ object KMeansFrm: TKMeansFrm AnchorSideTop.Control = Owner AnchorSideRight.Control = Owner AnchorSideRight.Side = asrBottom + AnchorSideBottom.Control = Bevel1 Left = 0 Height = 101 - Top = 0 - Width = 407 - Anchors = [akTop, akLeft, akRight] + Top = 199 + Width = 422 + Anchors = [akLeft, akRight, akBottom] AutoSize = True BevelOuter = bvNone ClientHeight = 101 - ClientWidth = 407 - TabOrder = 0 + ClientWidth = 422 + TabOrder = 1 object Label1: TLabel AnchorSideLeft.Control = Panel1 AnchorSideTop.Control = NoClustersEdit @@ -235,22 +218,21 @@ object KMeansFrm: TKMeansFrm end object Panel2: TPanel AnchorSideLeft.Control = Owner - AnchorSideTop.Control = Panel1 - AnchorSideTop.Side = asrBottom + AnchorSideTop.Control = Owner AnchorSideRight.Control = Owner AnchorSideRight.Side = asrBottom - AnchorSideBottom.Control = Bevel1 + AnchorSideBottom.Control = Panel1 Left = 0 Height = 191 - Top = 109 - Width = 407 + Top = 8 + Width = 422 Anchors = [akTop, akLeft, akRight, akBottom] AutoSize = True BorderSpacing.Top = 8 BevelOuter = bvNone ClientHeight = 191 - ClientWidth = 407 - TabOrder = 1 + ClientWidth = 422 + TabOrder = 0 object Label3: TLabel AnchorSideLeft.Control = Panel2 AnchorSideTop.Control = Panel2 @@ -272,20 +254,21 @@ object KMeansFrm: TKMeansFrm Left = 8 Height = 173 Top = 18 - Width = 164 + Width = 172 Anchors = [akTop, akLeft, akRight, akBottom] BorderSpacing.Left = 8 BorderSpacing.Top = 3 BorderSpacing.Right = 8 ItemHeight = 0 MultiSelect = True + OnSelectionChange = VarListSelectionChange TabOrder = 0 end object VarInBtn: TBitBtn AnchorSideLeft.Control = Panel2 AnchorSideLeft.Side = asrCenter AnchorSideTop.Control = VarList - Left = 189 + Left = 197 Height = 28 Top = 18 Width = 28 @@ -334,7 +317,7 @@ object KMeansFrm: TKMeansFrm AnchorSideLeft.Side = asrCenter AnchorSideTop.Control = VarInBtn AnchorSideTop.Side = asrBottom - Left = 189 + Left = 197 Height = 28 Top = 50 Width = 28 @@ -384,7 +367,7 @@ object KMeansFrm: TKMeansFrm AnchorSideLeft.Side = asrCenter AnchorSideTop.Control = VarOutBtn AnchorSideTop.Side = asrBottom - Left = 180 + Left = 188 Height = 25 Top = 102 Width = 46 @@ -395,7 +378,7 @@ object KMeansFrm: TKMeansFrm Spacing = 0 TabOrder = 3 end - object ListBox1: TListBox + object SelList: TListBox AnchorSideLeft.Control = AllBtn AnchorSideLeft.Side = asrBottom AnchorSideTop.Control = Label4 @@ -404,21 +387,23 @@ object KMeansFrm: TKMeansFrm AnchorSideRight.Side = asrBottom AnchorSideBottom.Control = Panel2 AnchorSideBottom.Side = asrBottom - Left = 234 + Left = 242 Height = 173 Top = 18 - Width = 165 + Width = 172 Anchors = [akTop, akLeft, akRight, akBottom] BorderSpacing.Left = 8 BorderSpacing.Top = 3 BorderSpacing.Right = 8 ItemHeight = 0 + MultiSelect = True + OnSelectionChange = VarListSelectionChange TabOrder = 4 end object Label4: TLabel - AnchorSideLeft.Control = ListBox1 + AnchorSideLeft.Control = SelList AnchorSideTop.Control = Panel2 - Left = 234 + Left = 242 Height = 15 Top = 0 Width = 93 diff --git a/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.pas b/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.pas index 1eba95662..e382e3290 100644 --- a/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.pas +++ b/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.pas @@ -1,3 +1,6 @@ +// File for testing: cansas.laz, all variables +// In the help file example No of Desired Clusters is 4 + unit KMeansUnit; {$mode objfpc}{$H+} @@ -23,9 +26,8 @@ type VarOutBtn: TBitBtn; AllBtn: TBitBtn; ResetBtn: TButton; - CancelBtn: TButton; ComputeBtn: TButton; - ReturnBtn: TButton; + CloseBtn: TButton; StdChkBox: TCheckBox; RepChkBox: TCheckBox; GroupBox1: TGroupBox; @@ -33,7 +35,7 @@ type Label2: TLabel; Label3: TLabel; Label4: TLabel; - ListBox1: TListBox; + SelList: TListBox; VarList: TListBox; NoClustersEdit: TEdit; Label1: TLabel; @@ -45,6 +47,7 @@ type procedure HelpBtnClick(Sender: TObject); procedure ResetBtnClick(Sender: TObject); procedure VarInBtnClick(Sender: TObject); + procedure VarListSelectionChange(Sender: TObject; User: boolean); procedure VarOutBtnClick(Sender: TObject); private { private declarations } @@ -70,6 +73,7 @@ type VAR AN2 : DblDyneVec; VAR NCP : IntDyneVec; VAR D : DblDyneVec; VAR ITRAN : IntDyneVec; INDX : integer); + procedure UpdateBtnStates; public { public declarations } @@ -81,68 +85,65 @@ var implementation uses - Math; + Math, Utils; { TKMeansFrm } procedure TKMeansFrm.ResetBtnClick(Sender: TObject); -VAR cellstring : string; - i : integer; +var + i: integer; begin - VarList.Clear; - ListBox1.Clear; - for i := 1 to NoVariables do - begin - cellstring := OS3MainFrm.DataGrid.Cells[i,0]; - VarList.Items.Add(cellstring); - end; - RepChkBox.Checked := false; - StdChkBox.Checked := true; - VarOutBtn.Enabled := false; - DescChkBox.Checked := false; - NoClustersEdit.Text := ''; - ItersEdit.Text := '100'; + VarList.Clear; + SelList.Clear; + for i := 1 to NoVariables do + VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); + RepChkBox.Checked := false; + StdChkBox.Checked := true; + DescChkBox.Checked := false; + NoClustersEdit.Text := ''; + ItersEdit.Text := '100'; + UpdateBtnStates; end; procedure TKMeansFrm.VarInBtnClick(Sender: TObject); -VAR - index, count, i : integer; - cellstring : string; +var + i: integer; begin - count := 0; - index := ListBox1.Items.Count; - for i := 0 to index - 1 do - begin - if (VarList.Selected[i]) then - begin - cellstring := VarList.Items.strings[i]; - ListBox1.Items.Add(cellstring); - count := count + 1; - end; - end; + i := 0; + while i < VarList.Items.Count do + begin + if VarList.Selected[i] then + begin + SelList.Items.Add(VarList.Items[i]); + VarList.Items.Delete(i); + i := 0; + end else + i := i + 1; + end; + UpdateBtnStates; +end; - while (count > 0) do - begin - for i := 0 to VarList.Items.Count - 1 do - begin - if (VarList.Selected[i]) then - begin - VarList.Items.Delete(i); - count := count - 1; - end; - end; - end; - VarOutBtn.Enabled := true; +procedure TKMeansFrm.VarListSelectionChange(Sender: TObject; User: boolean); +begin + UpdateBtnStates; end; procedure TKMeansFrm.VarOutBtnClick(Sender: TObject); -VAR index : integer; - cellstring : string; +var + i: integer; begin - index := ListBox1.ItemIndex; - cellstring := ListBox1.Items.Strings[index]; - VarList.Items.Add(cellstring); - ListBox1.Items.Delete(index); + i := 0; + while i < SelList.Items.Count do + begin + if SelList.Selected[i] then + begin + VarList.Items.Add(SelList.Items[i]); + SelList.Items.Delete(i); + i := 0; + end else + i := i + 1; + end; + UpdateBtnStates; end; procedure TKMeansFrm.FormActivate(Sender: TObject); @@ -152,12 +153,11 @@ begin if FAutoSized then exit; - w := MaxValue([HelpBtn.Width, ResetBtn.Width, CancelBtn.Width, ComputeBtn.Width, ReturnBtn.Width]); + w := MaxValue([HelpBtn.Width, ResetBtn.Width, ComputeBtn.Width, CloseBtn.Width]); HelpBtn.Constraints.MinWidth := w; ResetBtn.Constraints.MinWidth := w; - CancelBtn.Constraints.MinWidth := w; ComputeBtn.Constraints.MinWidth := w; - ReturnBtn.Constraints.MinWidth := w; + CloseBtn.Constraints.MinWidth := w; Constraints.MinWidth := Width; Constraints.MinHeight := Height; @@ -168,8 +168,6 @@ end; procedure TKMeansFrm.FormCreate(Sender: TObject); begin Assert(OS3MainFrm <> nil); - if OutputFrm = nil then - Application.CreateForm(TOutputFrm, OutputFrm); end; procedure TKMeansFrm.FormShow(Sender: TObject); @@ -185,257 +183,250 @@ begin end; procedure TKMeansFrm.AllBtnClick(Sender: TObject); -VAR - index, noitems : integer; - cellstring : string; +var + index: integer; + cellstring: string; begin - noitems := VarList.Items.Count; - for index := 0 to noitems - 1 do - begin - cellstring := VarList.Items.Strings[index]; - ListBox1.Items.Add(cellstring); - end; - VarList.Clear; - VarOutBtn.Enabled := true; + for index := 0 to VarList.Items.Count - 1 do + begin + cellstring := VarList.Items[index]; + SelList.Items.Add(cellstring); + end; + VarList.Clear; + UpdateBtnStates; end; procedure TKMeansFrm.ComputeBtnClick(Sender: TObject); VAR i, j, L, Ncols, N, M, K,IFAULT, ITER, col : integer; - center, itemp : integer; + center: integer; IC1, IC2, NC, NCP, ITRAN, LIVE, ColSelected : IntDyneVec; A, C : DblDyneMat; - D, AN1, AN2, WSS, DT : DblDyneVec; + D, AN1, AN2, WSS: DblDyneVec; cellstring: string; outline : string; - strval : string; varlabels, rowlabels : StrDyneVec; Mean, stddev : double; -label cleanup; - + lReport: TStrings; begin - Ncols := ListBox1.Items.Count; - if (Ncols <= 0) then - begin - ShowMessage('ERROR! No variables selected to cluster.'); - exit; - end; + Ncols := SelList.Items.Count; + if (Ncols <= 0) then + begin + MessageDlg('No variables selected to cluster.', mtError, [mbOK], 0); + exit; + end; - N := Ncols; - M := NoCases; - K := StrToInt(NoClustersEdit.Text); - IFAULT := 0; - ITER := StrToInt(ItersEdit.Text); + if NoClustersEdit.Text = '' then + begin + NoClustersEdit.SetFocus; + MessageDlg('You must enter the desired number of clusters.', mtError, [mbOK], 0); + exit; + end; + if not TryStrToInt(NoClustersEdit.Text, K) or (K <= 0) then + begin + NoClustersEdit.SetFocus; + MessageDlg('You must enter the desired number of clusters as a positive value.', mtError, [mbOK], 0); + exit; + end; - SetLength(varlabels,Ncols); - SetLength(rowlabels,NoCases); - SetLength(ColSelected,Ncols); - SetLength(A,M+1,N+1); - SetLength(C,K+1,N+1); - SetLength(D,M+1); - SetLength(AN1,K+1); - SetLength(AN2,K+1); - SetLength(WSS,K+1); - SetLength(DT,3); - SetLength(IC1,M+1); - SetLength(IC2,M+1); - SetLength(NC,K+1); - SetLength(NCP,K+1); - SetLength(ITRAN,K+1); - SetLength(LIVE,K+1); + if ItersEdit.Text = '' then + begin + ItersEdit.SetFocus; + MessageDlg('This field cannot be empty.', mtError, [mbOK], 0); + exit; + end; + if not TryStrToInt(ItersEdit.Text, ITER) or (ITER <= 0) then + begin + ItersEdit.SetFocus; + MessageDlg('Invalid input.', mtError, [mbOK], 0); + exit; + end; - if (K <= 0) then - begin - ShowMessage('ERROR! You must enter the desired number of clusters.'); - goto cleanup; - end; + N := Ncols; + M := NoCases; + IFAULT := 0; - // initialize arrays - for i := 1 to K do - begin - AN1[i] := 0.0; - AN2[i] := 0.0; - WSS[i] := 0.0; - NC[i] := 0; - NCP[i] := 0; - ITRAN[i] := 0; - LIVE[i] := 0; - for j := 1 to N do C[i,j] := 0.0; - end; - for i := 1 to M do - begin - IC1[i] := 0; - IC2[i] := 0; - D[i] := 0.0; - end; + SetLength(varlabels,Ncols); + SetLength(rowlabels,NoCases); + SetLength(ColSelected,Ncols); + SetLength(A,M+1,N+1); + SetLength(C,K+1,N+1); + SetLength(D,M+1); + SetLength(AN1,K+1); + SetLength(AN2,K+1); + SetLength(WSS,K+1); + SetLength(IC1,M+1); + SetLength(IC2,M+1); + SetLength(NC,K+1); + SetLength(NCP,K+1); + SetLength(ITRAN,K+1); + SetLength(LIVE,K+1); - //Get labels and columns of selected variables - for i := 0 to Ncols - 1 do - begin - cellstring := ListBox1.Items.Strings[i]; - for j := 0 to NoVariables - 1 do + // initialize arrays + for i := 1 to K do + begin + AN1[i] := 0.0; + AN2[i] := 0.0; + WSS[i] := 0.0; + NC[i] := 0; + NCP[i] := 0; + ITRAN[i] := 0; + LIVE[i] := 0; + for j := 1 to N do C[i,j] := 0.0; + end; + for i := 1 to M do + begin + IC1[i] := 0; + IC2[i] := 0; + D[i] := 0.0; + end; + + //Get labels and columns of selected variables + for i := 0 to Ncols - 1 do + begin + cellstring := SelList.Items.Strings[i]; + for j := 0 to NoVariables - 1 do + begin + if (cellstring = OS3MainFrm.DataGrid.Cells[j+1,0]) then + begin + varlabels[i] := cellstring; + ColSelected[i] := j+1; + end; + end; + end; + + // Get labels of rows + for i := 0 to NoCases - 1 do + rowlabels[i] := OS3MainFrm.DataGrid.Cells[0,i+1]; + + // read the data + for i := 1 to M do + begin + if not GoodRecord(i, N, ColSelected) then continue; + for j := 1 to N do + begin + col := ColSelected[j-1]; + A[i,j] := StrToFloat(OS3MainFrm.DataGrid.Cells[col,i]); + end; + end; + + lReport := TStringList.Create; + try + lReport.Add('K-MEANS CLUSTERING'); + lReport.Add('Adapted from AS 136 APPL. STATIST. (1979) VOL.28, NO.1'); + lReport.Add(''); + lReport.Add('File: %s', [OS3MainFrm.FileNameEdit.Text]); + lReport.Add('No. Cases: %d, No. Variables: %d, No. Clusters: %d',[M, N, K]); + lReport.Add(''); + + // transform to z scores if needed + if StdChkBox.Checked then + begin + for j := 1 to N do + begin + Mean := 0.0; + stddev := 0.0; + for i := 1 to M do begin - if (cellstring = OS3MainFrm.DataGrid.Cells[j+1,0]) then - begin - varlabels[i] := cellstring; - ColSelected[i] := j+1; - end; + Mean := Mean + A[i,j]; + stddev := stddev + sqr(A[i,j]); end; - end; - - // Get labels of rows - for i := 0 to NoCases - 1 do rowlabels[i] := OS3MainFrm.DataGrid.Cells[0,i+1]; - - // read the data - for i := 1 to M do - begin - if (NOT GoodRecord(i,N,ColSelected)) then continue; - for j := 1 to N do - begin - col := ColSelected[j-1]; - A[i,j] := StrToFloat(OS3MainFrm.DataGrid.Cells[col,i]); - end; - end; - - OutputFrm.RichEdit.Clear; - OutputFrm.RichEdit.Lines.Add('K-Means Clustering. Adapted from AS 136 APPL. STATIST. (1979) VOL.28, NO.1'); - OutputFrm.RichEdit.Lines.Add(''); - outline := format('File := %s',[OS3MainFrm.FileNameEdit.Text]); - OutputFrm.RichEdit.Lines.Add(outline); - outline := format('No. Cases := %d, No. Variables := %d, No. Clusters := %d',[M,N,K]); - OutputFrm.RichEdit.Lines.Add(outline); - OutputFrm.RichEdit.Lines.Add(''); - - // transform to z scores if needed - if (StdChkBox.Checked = true) then - begin - for j := 1 to N do + stddev := stddev - Mean * Mean / M; + stddev := stddev / (M - 1); + Mean := Mean / M; + if DescChkBox.Checked then + lReport.Add('Mean: %8.3f, Std.Dev.: %8.3f for %s', [Mean, stddev, varlabels[j-1]]); + for i := 1 to M do begin - Mean := 0.0; - stddev := 0.0; - for i := 1 to M do - begin - Mean := Mean + A[i,j]; - stddev := stddev + (A[i,j] * A[i,j]); - end; - stddev := stddev - Mean * Mean / M; - stddev := stddev / (M - 1); - Mean := Mean / M; - if (DescChkBox.Checked) then - begin - outline := format('Mean := %8.3f, Std.Dev. := %8.3f for %s',[Mean,stddev,varlabels[j-1]]); - OutputFrm.RichEdit.Lines.Add(outline); - end; - for i := 1 to M do - begin - A[i,j] := (A[i,j] - Mean) / stddev; - if (RepChkBox.Checked = true) then - begin - col := ColSelected[j-1]; - outline := format('%8.5f',[A[i,j]]); - OS3MainFrm.DataGrid.Cells[col,i] := outline; - end; - end; + A[i,j] := (A[i,j] - Mean) / stddev; + if RepChkBox.Checked then + begin + col := ColSelected[j-1]; + OS3MainFrm.DataGrid.Cells[col,i] := Format('%8.5f', [A[i,j]]); + end; end; - end; + end; + end; - // Now enter initial points - for L := 1 to K do - begin - center := 1 + (L-1) * (M div K); // initial cluster center - for j := 1 to N do C[L,j] := A[center,j]; - end; + // Now enter initial points + for L := 1 to K do + begin + center := 1 + (L-1) * (M div K); // initial cluster center // wp: why not ((L-1)*M) div K + for j := 1 to N do + C[L, j] := A[center, j]; + end; - // do analysis - KMNS(A,M,N,C,K,IC1,IC2,NC,AN1,AN2,NCP,D,ITRAN,LIVE,ITER,WSS,IFAULT); + // do analysis + KMNS(A,M,N,C,K,IC1,IC2,NC,AN1,AN2,NCP,D,ITRAN,LIVE,ITER,WSS,IFAULT); - // show results + // show results - // sort subjects by cluster - for i := 1 to M do IC2[i] := i; // store ids in here - for i := 1 to M - 1 do - begin - for j := i+1 to M do - begin - if (IC1[i] > IC1[j]) then // swap these clusters and ids - begin - itemp := IC1[i]; - IC1[i] := IC1[j]; - IC1[j] := itemp; - itemp := IC2[i]; - IC2[i] := IC2[j]; - IC2[j] := itemp; - end; - end; - end; + // sort subjects by cluster + for i := 1 to M do + IC2[i] := i; // store ids in here + for i := 1 to M - 1 do + begin + for j := i+1 to M do + begin + if (IC1[i] > IC1[j]) then // swap these clusters and ids + begin + Exchange(IC1[i], IC1[j]); + Exchange(IC2[i], IC2[j]); + end; + end; + end; - OutputFrm.RichEdit.Lines.Add(''); - OutputFrm.RichEdit.Lines.Add('NUMBER OF SUBJECTS IN EACH CLUSTER'); - for i := 1 to K do - begin - outline := format('Cluster := %d with %d cases.',[i,NC[i]]); - OutputFrm.RichEdit.Lines.Add(outline); - end; + lReport.Add(''); + lReport.Add('NUMBER OF SUBJECTS IN EACH CLUSTER'); + for i := 1 to K do + lReport.Add('Cluster %d with %d cases.', [i, NC[i]]); - OutputFrm.RichEdit.Lines.Add(''); - OutputFrm.RichEdit.Lines.Add('PLACEMENT OF SUBJECTS IN CLUSTERS'); - OutputFrm.RichEdit.Lines.Add('CLUSTER SUBJECT'); - for i := 1 to M do - begin - outline := format(' %3d %3d',[IC1[i],IC2[i]]); - OutputFrm.RichEdit.Lines.Add(outline); - end; + lReport.Add(''); + lReport.Add('PLACEMENT OF SUBJECTS IN CLUSTERS'); + lReport.Add('CLUSTER SUBJECT'); + for i := 1 to M do + lReport.Add(' %3d %3d', [IC1[i], IC2[i]]); - OutputFrm.RichEdit.Lines.Add(''); - OutputFrm.RichEdit.Lines.Add('AVERAGE VARIABLE VALUES BY CLUSTER'); - outline := ' VARIABLES'; - OutputFrm.RichEdit.Lines.Add(outline); - outline := 'CLUSTER'; - for j := 1 to N do - begin - strval := format(' %3d ',[j]); - outline := outline + strval; - end; - OutputFrm.RichEdit.Lines.Add(outline); - OutputFrm.RichEdit.Lines.Add(' '); - for i := 1 to K do - begin - outline := format(' %3d ',[i]); - for j := 1 to N do - begin - strval := format('%5.2f ',[C[i,j]]); - outline := outline + strval; - end; - OutputFrm.RichEdit.Lines.Add(outline); - end; - OutputFrm.RichEdit.Lines.Add(''); - OutputFrm.RichEdit.Lines.Add('WITHIN CLUSTER SUMS OF SQUARES'); - for i := 1 to K do - begin - outline := format('Cluster %d := %6.3f',[i,WSS[i]]); - OutputFrm.RichEdit.Lines.Add(outline); - end; + lReport.Add(''); + lReport.Add('AVERAGE VARIABLE VALUES BY CLUSTER'); + lReport.Add(' VARIABLES'); + outline := 'CLUSTER'; + for j := 1 to N do + outline := outline + Format(' %3d ',[j]); + lReport.Add(outline); + lReport.Add(' '); + for i := 1 to K do + begin + outline := format(' %3d ',[i]); + for j := 1 to N do + outline := outline + Format('%5.2f ', [C[i,j]]); + lReport.Add(outline); + end; + lReport.Add(''); + lReport.Add('WITHIN CLUSTER SUMS OF SQUARES'); + for i := 1 to K do + lReport.Add('Cluster %d: %6.3f', [i, WSS[i]]); - OutputFrm.ShowModal; + DisplayReport(lReport); - // cleanup -cleanup: - LIVE := nil; - ITRAN := nil; - NCP := nil; - NC := nil; - IC2 := nil; - IC1 := nil; - DT := nil; - WSS := nil; - AN2 := nil; - AN1 := nil; - D := nil; - C := nil; - A := nil; - ColSelected := nil; - rowlabels := nil; - varlabels := nil; + finally + lReport.Free; + LIVE := nil; + ITRAN := nil; + NCP := nil; + NC := nil; + IC2 := nil; + IC1 := nil; + WSS := nil; + AN2 := nil; + AN1 := nil; + D := nil; + C := nil; + A := nil; + ColSelected := nil; + rowlabels := nil; + varlabels := nil; + end; end; procedure TKMeansFrm.KMNS(VAR A : DblDyneMat; M, N : integer; @@ -445,188 +436,194 @@ procedure TKMeansFrm.KMNS(VAR A : DblDyneMat; M, N : integer; VAR NCP : IntDyneVec; VAR D : DblDyneVec; VAR ITRAN : IntDyneVec; VAR LIVE : IntDyneVec; ITER : integer; VAR WSS : DblDyneVec; IFAULT : integer); +const + BIG = 1.0E30; + ZERO = 0.0; + ONE = 1.0; VAR - DT : array[0..2] of double; - BIG : double; - ZERO : double; - ONE : double; - DA, DB, DC, TEMP, AA : double; - L, II, INDX, I, J, IL, IJ : integer; -label cont50, cont40, cont150; + DT: array[0..2] of double; + DA, DB, DC, TEMP, AA: double; + L, II, INDX, I, J, IL, IJ: integer; + +label + cont50, cont40, cont150; begin - // SUBROUTINE KMNS(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, - // * ITRAN, LIVE, ITER, WSS, IFAULT) - // - // ALGORITHM AS 136 APPL. STATIST. (1979) VOL.28, NO.1 - // Divide M points in N-dimensional space into K clusters so that - // the within cluster sum of squares is minimized. - // - // INTEGER IC1(M), IC2(M), NC(K), NCP(K), ITRAN(K), LIVE(K) - // REAL A(M,N), D(M), C(K,N), AN1(K), AN2(K), WSS(K), DT(2) - // REAL ZERO, ONE - // - // Define BIG to be a very large positive number - // - // DATA BIG /1.E30/, ZERO /0.0/, ONE /1.0/ - // - BIG := 1.0e30; - ZERO := 0.0; - ONE := 1.0; - IFAULT := 3; - if ((K <= 1) or (K >= M)) then - begin - ShowMessage('The no. of clusters must be less than the no. of variables.'); - exit; - end; + // SUBROUTINE KMNS(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, + // * ITRAN, LIVE, ITER, WSS, IFAULT) + // + // ALGORITHM AS 136 APPL. STATIST. (1979) VOL.28, NO.1 + // Divide M points in N-dimensional space into K clusters so that + // the within cluster sum of squares is minimized. + // + // INTEGER IC1(M), IC2(M), NC(K), NCP(K), ITRAN(K), LIVE(K) + // REAL A(M,N), D(M), C(K,N), AN1(K), AN2(K), WSS(K), DT(2) + // REAL ZERO, ONE + // + // Define BIG to be a very large positive number + // + // DATA BIG /1.E30/, ZERO /0.0/, ONE /1.0/ + // + IFAULT := 3; + if (K <= 1) or (K >= M) then + begin + MessageDlg('The no. of clusters must be less than the no. of variables.', mtError, [mbOK], 0); + exit; + end; - // For each point I, find its two closest centres, IC1(I) and - // IC2(I). Assign it to IC1(I). - // - for I := 1 to M do - begin - IC1[I] := 1; - IC2[I] := 2; - for IL := 1 to 2 do - begin - DT[IL] := ZERO; - for J := 1 to N do - begin - DA := A[I,J] - C[IL,J]; - DT[IL] := DT[IL] + (DA * DA); //(squared difference for this comparison) - end; // 10 CONTINUE - end; // 10 CONTINUE - if (DT[1] > DT[2]) then // THEN swap - begin - IC1[I] := 2; - IC2[I] := 1; - TEMP := DT[1]; - DT[1] := DT[2]; - DT[2] := TEMP; - end; // END IF - for L := 3 to K do // (remaining clusters) - begin - DB := ZERO; - for J := 1 to N do // (variables) - begin - DC := A[I,J] - C[L,J]; - DB := DB + DC * DC; - if (DB >= DT[2]) then goto cont50; - end; - if (DB < DT[1]) then goto cont40; - DT[2] := DB; - IC2[I] := L; - goto cont50; -cont40: DT[2] := DT[1]; - IC2[I] := IC1[I]; - DT[1] := DB; - IC1[I] := L; -cont50: end; - end; // 50 CONTINUE (next case) - - // Update cluster centres to be the average of points contained - // within them. - // - for L := 1 to K do // (clusters) - begin - NC[L] := 0; - for J := 1 to N do C[L,J] := ZERO; //(initialize clusters) - end; - for I := 1 to M do // (subjects) - begin - L := IC1[I]; // which cluster the Ith case is in - NC[L] := NC[L] + 1; // no. in the cluster L - for J := 1 to N do C[L,J] := C[L,J] + A[I,J]; // sum of var. values in the cluster L - end; - - // Check to see if there is any empty cluster at this stage - // - for L := 1 to K do - begin - if (NC[L] = 0) then - begin - IFAULT := 1; - exit; - end; - AA := NC[L]; - for J := 1 to N do C[L,J] := C[L,J] / AA; // average the values in the cluster - - // Initialize AN1, AN2, ITRAN & NCP - // AN1(L) := NC(L) / (NC(L) - 1) - // AN2(L) := NC(L) / (NC(L) + 1) - // ITRAN(L) := 1 if cluster L is updated in the quick-transfer stage, - // := 0 otherwise - // In the optimal-transfer stage, NCP(L) stores the step at which - // cluster L is last updated. - // In the quick-transfer stage, NCP(L) stores the step at which - // cluster L is last updated plus M. - // - AN2[L] := AA / (AA + ONE); - AN1[L] := BIG; - if (AA > ONE) then AN1[L] := AA / (AA - ONE); - ITRAN[L] := 1; - NCP[L] := -1; - end; - INDX := 0; - for IJ := 1 to ITER do - begin - // - // In this stage, there is only one pass through the data. Each - // point is re-allocated, if necessary, to the cluster that will - // induce the maximum reduction in within-cluster sum of squares. - // - OPTRA(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, ITRAN, LIVE, INDX); - // - // Stop if no transfer took place in the last M optimal transfer - // steps. - // - if (INDX = M) then goto cont150; - // - // Each point is tested in turn to see if it should be re-allocated - // to the cluster to which it is most likely to be transferred, - // IC2(I), from its present cluster, IC1(I). Loop through the - // data until no further change is to take place. - // - QTRAN(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, ITRAN, INDX); - // - // If there are only two clusters, there is no need to re-enter the - // optimal transfer stage. - // - if (K = 2) then goto cont150; - // - // NCP has to be set to 0 before entering OPTRA. - // - for L := 1 to K do NCP[L] := 0; - end; - // - // Since the specified number of iterations has been exceeded, set - // IFAULT := 2. This may indicate unforeseen looping. - // - IFAULT := 2; - // - // Compute within-cluster sum of squares for each cluster. - // -cont150: - for L := 1 to K do - begin - WSS[L] := ZERO; - for J := 1 to N do C[L,J] := ZERO; - end; - for I := 1 to M do - begin - II := IC1[I]; - for J := 1 to N do C[II,J] := C[II,J] + A[I,J]; - end; + // For each point I, find its two closest centres, IC1(I) and + // IC2(I). Assign it to IC1(I). + // + for I := 1 to M do + begin + IC1[I] := 1; + IC2[I] := 2; + for IL := 1 to 2 do + begin + DT[IL] := ZERO; for J := 1 to N do begin - for L := 1 to K do C[L,J] := C[L,J] / (NC[L]); - for I := 1 to M do - begin - II := IC1[I]; - DA := A[I,J] - C[II,J]; - WSS[II] := WSS[II] + DA * DA; - end; - end; // 190 CONTINUE + DA := A[I,J] - C[IL,J]; + DT[IL] := DT[IL] + (DA * DA); //(squared difference for this comparison) + end; // 10 CONTINUE + end; // 10 CONTINUE + + if (DT[1] > DT[2]) then // THEN swap + begin + IC1[I] := 2; + IC2[I] := 1; + TEMP := DT[1]; + DT[1] := DT[2]; + DT[2] := TEMP; + end; // END IF + + for L := 3 to K do // (remaining clusters) + begin + DB := ZERO; + for J := 1 to N do // (variables) + begin + DC := A[I,J] - C[L,J]; + DB := DB + DC * DC; + if (DB >= DT[2]) then goto cont50; + end; + if (DB < DT[1]) then goto cont40; + DT[2] := DB; + IC2[I] := L; + goto cont50; + +cont40: + DT[2] := DT[1]; + IC2[I] := IC1[I]; + DT[1] := DB; + IC1[I] := L; + +cont50: + end; + end; // 50 CONTINUE (next case) + + // Update cluster centres to be the average of points contained + // within them. + // + for L := 1 to K do // (clusters) + begin + NC[L] := 0; + for J := 1 to N do C[L,J] := ZERO; //(initialize clusters) + end; + for I := 1 to M do // (subjects) + begin + L := IC1[I]; // which cluster the Ith case is in + NC[L] := NC[L] + 1; // no. in the cluster L + for J := 1 to N do C[L,J] := C[L,J] + A[I,J]; // sum of var. values in the cluster L + end; + + // Check to see if there is any empty cluster at this stage + // + for L := 1 to K do + begin + if (NC[L] = 0) then + begin + IFAULT := 1; + exit; + end; + AA := NC[L]; + for J := 1 to N do C[L,J] := C[L,J] / AA; // average the values in the cluster + + // Initialize AN1, AN2, ITRAN & NCP + // AN1(L) := NC(L) / (NC(L) - 1) + // AN2(L) := NC(L) / (NC(L) + 1) + // ITRAN(L) := 1 if cluster L is updated in the quick-transfer stage, + // := 0 otherwise + // In the optimal-transfer stage, NCP(L) stores the step at which + // cluster L is last updated. + // In the quick-transfer stage, NCP(L) stores the step at which + // cluster L is last updated plus M. + // + AN2[L] := AA / (AA + ONE); + AN1[L] := BIG; + if (AA > ONE) then AN1[L] := AA / (AA - ONE); + ITRAN[L] := 1; + NCP[L] := -1; + end; + + INDX := 0; + for IJ := 1 to ITER do + begin + // + // In this stage, there is only one pass through the data. Each + // point is re-allocated, if necessary, to the cluster that will + // induce the maximum reduction in within-cluster sum of squares. + // + OPTRA(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, ITRAN, LIVE, INDX); + // + // Stop if no transfer took place in the last M optimal transfer steps. + // + if (INDX = M) then goto cont150; + // + // Each point is tested in turn to see if it should be re-allocated + // to the cluster to which it is most likely to be transferred, + // IC2(I), from its present cluster, IC1(I). Loop through the + // data until no further change is to take place. + // + QTRAN(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, ITRAN, INDX); + // + // If there are only two clusters, there is no need to re-enter the + // optimal transfer stage. + // + if (K = 2) then goto cont150; + // + // NCP has to be set to 0 before entering OPTRA. + // + for L := 1 to K do NCP[L] := 0; + end; + // + // Since the specified number of iterations has been exceeded, set + // IFAULT := 2. This may indicate unforeseen looping. + // + IFAULT := 2; + // + // Compute within-cluster sum of squares for each cluster. + // +cont150: + for L := 1 to K do + begin + WSS[L] := ZERO; + for J := 1 to N do C[L,J] := ZERO; + end; + for I := 1 to M do + begin + II := IC1[I]; + for J := 1 to N do C[II,J] := C[II,J] + A[I,J]; + end; + for J := 1 to N do + begin + for L := 1 to K do C[L,J] := C[L,J] / (NC[L]); + for I := 1 to M do + begin + II := IC1[I]; + DA := A[I,J] - C[II,J]; + WSS[II] := WSS[II] + DA * DA; + end; + end; // 190 CONTINUE end; @@ -898,6 +895,13 @@ cont60: goto cont10; end; +procedure TKMeansFrm.UpdateBtnStates; +begin + VarInBtn.Enabled := AnySelected(VarList); + VarOutBtn.Enabled := AnySelected(SelList); + AllBtn.Enabled := VarList.Items.Count > 0; +end; + initialization {$I kmeansunit.lrs}