lazarus-ccr/applications/lazstats/source/forms/analysis/multivariate/kmeansunit.pas

unit KMeansUnit;

{$mode objfpc}{$H+}

interface

uses
  Classes, SysUtils, FileUtil, LResources, Forms, Controls, Graphics, Dialogs,
  StdCtrls, Buttons, ExtCtrls,
  MainUnit, Globals, DataProcs, OutputUnit, ContextHelpUnit;

type

  { TKMeansFrm }

  TKMeansFrm = class(TForm)
    Bevel1: TBevel;
    DescChkBox: TCheckBox;
    HelpBtn: TButton;
    Panel1: TPanel;
    Panel2: TPanel;
    VarInBtn: TBitBtn;
    VarOutBtn: TBitBtn;
    AllBtn: TBitBtn;
    ResetBtn: TButton;
    CancelBtn: TButton;
    ComputeBtn: TButton;
    ReturnBtn: TButton;
    StdChkBox: TCheckBox;
    RepChkBox: TCheckBox;
    GroupBox1: TGroupBox;
    ItersEdit: TEdit;
    Label2: TLabel;
    Label3: TLabel;
    Label4: TLabel;
    ListBox1: TListBox;
    VarList: TListBox;
    NoClustersEdit: TEdit;
    Label1: TLabel;
    procedure AllBtnClick(Sender: TObject);
    procedure ComputeBtnClick(Sender: TObject);
    procedure FormActivate(Sender: TObject);
    procedure FormCreate(Sender: TObject);
    procedure FormShow(Sender: TObject);
    procedure HelpBtnClick(Sender: TObject);
    procedure ResetBtnClick(Sender: TObject);
    procedure VarInBtnClick(Sender: TObject);
    procedure VarOutBtnClick(Sender: TObject);
  private
    { private declarations }
    FAutoSized: Boolean;
    procedure KMNS(VAR A : DblDyneMat; M, N : integer;
                   VAR C : DblDyneMat; K : integer; VAR IC1 : IntDyneVec;
                   VAR IC2 : IntDyneVec; VAR NC : IntDyneVec;
                   VAR AN1 : DblDyneVec; VAR AN2 : DblDyneVec;
                   VAR NCP : IntDyneVec; VAR D : DblDyneVec;
                   VAR ITRAN : IntDyneVec; VAR LIVE : IntDyneVec;
                   ITER : integer; VAR WSS : DblDyneVec; IFAULT : integer);
    procedure OPTRA(VAR A : DblDyneMat; M, N : integer;
                    VAR C : DblDyneMat; K : integer;
                    VAR IC1 : IntDyneVec; VAR IC2 : IntDyneVec;
                    VAR NC : IntDyneVec; VAR AN1 : DblDyneVec;
                    VAR AN2 : DblDyneVec; VAR NCP : IntDyneVec;
                    VAR D : DblDyneVec; VAR ITRAN : IntDyneVec;
                    VAR LIVE : IntDyneVec; INDX : integer);
   procedure QTRAN(VAR A : DblDyneMat; M, N : integer;
                           VAR C : DblDyneMat; K : integer;
                           VAR IC1 : IntDyneVec; VAR IC2 : IntDyneVec;
                           VAR NC : IntDyneVec; VAR AN1 : DblDyneVec;
                           VAR AN2 : DblDyneVec; VAR NCP : IntDyneVec;
                           VAR D : DblDyneVec; VAR ITRAN : IntDyneVec;
                           INDX : integer);

  public
    { public declarations }
  end; 

var
  KMeansFrm: TKMeansFrm;

implementation

uses
  Math;

{ TKMeansFrm }

procedure TKMeansFrm.ResetBtnClick(Sender: TObject);
VAR cellstring : string;
    i : integer;
begin
     VarList.Clear;
     ListBox1.Clear;
     for i := 1 to NoVariables do
     begin
         cellstring := OS3MainFrm.DataGrid.Cells[i,0];
         VarList.Items.Add(cellstring);
     end;
     RepChkBox.Checked := false;
     StdChkBox.Checked := true;
     VarOutBtn.Enabled := false;
     DescChkBox.Checked := false;
     NoClustersEdit.Text := '';
     ItersEdit.Text := '100';
end;

procedure TKMeansFrm.VarInBtnClick(Sender: TObject);
VAR
   index, count, i : integer;
   cellstring : string;
begin
     count := 0;
     index := ListBox1.Items.Count;
     for i := 0 to index - 1 do
     begin
         if (VarList.Selected[i]) then
         begin
            cellstring := VarList.Items.strings[i];
            ListBox1.Items.Add(cellstring);
            count := count + 1;
         end;
     end;

     while (count > 0) do
     begin
           for i := 0 to VarList.Items.Count - 1 do
           begin
               if (VarList.Selected[i]) then
               begin
                  VarList.Items.Delete(i);
                  count := count - 1;
               end;
           end;
     end;
     VarOutBtn.Enabled := true;
end;

procedure TKMeansFrm.VarOutBtnClick(Sender: TObject);
VAR index : integer;
    cellstring : string;
begin
     index := ListBox1.ItemIndex;
     cellstring := ListBox1.Items.Strings[index];
     VarList.Items.Add(cellstring);
     ListBox1.Items.Delete(index);
end;

procedure TKMeansFrm.FormActivate(Sender: TObject);
var
  w: Integer;
begin
  if FAutoSized then
    exit;

  w := MaxValue([HelpBtn.Width, ResetBtn.Width, CancelBtn.Width, ComputeBtn.Width, ReturnBtn.Width]);
  HelpBtn.Constraints.MinWidth := w;
  ResetBtn.Constraints.MinWidth := w;
  CancelBtn.Constraints.MinWidth := w;
  ComputeBtn.Constraints.MinWidth := w;
  ReturnBtn.Constraints.MinWidth := w;

  Constraints.MinWidth := Width;
  Constraints.MinHeight := Height;

  FAutoSized := true;
end;

procedure TKMeansFrm.FormCreate(Sender: TObject);
begin
  Assert(OS3MainFrm <> nil);
  if OutputFrm = nil then
    Application.CreateForm(TOutputFrm, OutputFrm);
end;

procedure TKMeansFrm.FormShow(Sender: TObject);
begin
  ResetBtnClick(self);
end;

procedure TKMeansFrm.HelpBtnClick(Sender: TObject);
begin
  if ContextHelpForm = nil then
    Application.CreateForm(TContextHelpForm, ContextHelpForm);
  ContextHelpForm.HelpMessage((Sender as TButton).Tag);
end;

procedure TKMeansFrm.AllBtnClick(Sender: TObject);
VAR
   index, noitems : integer;
   cellstring : string;
begin
     noitems := VarList.Items.Count;
     for index := 0 to noitems - 1 do
     begin
          cellstring := VarList.Items.Strings[index];
          ListBox1.Items.Add(cellstring);
     end;
     VarList.Clear;
     VarOutBtn.Enabled := true;
end;

procedure TKMeansFrm.ComputeBtnClick(Sender: TObject);
VAR
   i, j, L, Ncols, N, M, K,IFAULT, ITER, col : integer;
   center, itemp : integer;
   IC1, IC2, NC, NCP, ITRAN, LIVE, ColSelected : IntDyneVec;
   A, C : DblDyneMat;
   D, AN1, AN2, WSS, DT : DblDyneVec;
   cellstring: string;
   outline : string;
   strval : string;
   varlabels, rowlabels : StrDyneVec;
   Mean, stddev : double;
label cleanup;

begin
     Ncols := ListBox1.Items.Count;
     if (Ncols <= 0) then
     begin
        ShowMessage('ERROR!  No variables selected to cluster.');
        exit;
     end;

     N := Ncols;
     M := NoCases;
     K := StrToInt(NoClustersEdit.Text);
     IFAULT := 0;
     ITER := StrToInt(ItersEdit.Text);

     SetLength(varlabels,Ncols);
     SetLength(rowlabels,NoCases);
     SetLength(ColSelected,Ncols);
     SetLength(A,M+1,N+1);
     SetLength(C,K+1,N+1);
     SetLength(D,M+1);
     SetLength(AN1,K+1);
     SetLength(AN2,K+1);
     SetLength(WSS,K+1);
     SetLength(DT,3);
     SetLength(IC1,M+1);
     SetLength(IC2,M+1);
     SetLength(NC,K+1);
     SetLength(NCP,K+1);
     SetLength(ITRAN,K+1);
     SetLength(LIVE,K+1);

     if (K <= 0) then
     begin
        ShowMessage('ERROR! You must enter the desired number of clusters.');
        goto cleanup;
     end;

     // initialize arrays
     for i := 1 to K do
     begin
         AN1[i] := 0.0;
         AN2[i] := 0.0;
         WSS[i] := 0.0;
         NC[i] := 0;
         NCP[i] := 0;
         ITRAN[i] := 0;
         LIVE[i] := 0;
         for j := 1 to N do C[i,j] := 0.0;
     end;
     for i := 1 to M do
     begin
         IC1[i] := 0;
         IC2[i] := 0;
         D[i] := 0.0;
     end;

     //Get labels and columns of selected variables
     for i := 0 to Ncols - 1 do
     begin
        cellstring := ListBox1.Items.Strings[i];
        for j := 0 to NoVariables - 1 do
        begin
            if (cellstring = OS3MainFrm.DataGrid.Cells[j+1,0]) then
            begin
                varlabels[i] := cellstring;
                ColSelected[i] := j+1;
            end;
        end;
     end;

     // Get labels of rows
     for i := 0 to NoCases - 1 do rowlabels[i] := OS3MainFrm.DataGrid.Cells[0,i+1];

     // read the data
     for i := 1 to M do
     begin
         if (NOT GoodRecord(i,N,ColSelected)) then continue;
         for j := 1 to N do
         begin
             col := ColSelected[j-1];
             A[i,j] := StrToFloat(OS3MainFrm.DataGrid.Cells[col,i]);
         end;
     end;

     OutputFrm.RichEdit.Clear;
     OutputFrm.RichEdit.Lines.Add('K-Means Clustering.  Adapted from AS 136  APPL. STATIST. (1979) VOL.28, NO.1');
     OutputFrm.RichEdit.Lines.Add('');
     outline := format('File := %s',[OS3MainFrm.FileNameEdit.Text]);
     OutputFrm.RichEdit.Lines.Add(outline);
     outline := format('No. Cases := %d, No. Variables := %d, No. Clusters := %d',[M,N,K]);
     OutputFrm.RichEdit.Lines.Add(outline);
     OutputFrm.RichEdit.Lines.Add('');

     // transform to z scores if needed
     if (StdChkBox.Checked = true) then
     begin
        for j := 1 to N do
        begin
            Mean := 0.0;
            stddev := 0.0;
            for i := 1 to M do
            begin
                Mean := Mean + A[i,j];
                stddev := stddev + (A[i,j] * A[i,j]);
            end;
            stddev := stddev - Mean * Mean / M;
            stddev := stddev / (M - 1);
            Mean := Mean / M;
            if (DescChkBox.Checked) then
            begin
               outline := format('Mean := %8.3f, Std.Dev. := %8.3f for %s',[Mean,stddev,varlabels[j-1]]);
               OutputFrm.RichEdit.Lines.Add(outline);
            end;
            for i := 1 to M do
            begin
                A[i,j] := (A[i,j] - Mean) / stddev;
                if (RepChkBox.Checked = true) then
                begin
                   col := ColSelected[j-1];
                   outline := format('%8.5f',[A[i,j]]);
                   OS3MainFrm.DataGrid.Cells[col,i] := outline;
                end;
            end;
        end;
     end;

     // Now enter initial points
     for L := 1 to K do
     begin
         center := 1 + (L-1) * (M div K); // initial cluster center
         for j := 1 to N do C[L,j] := A[center,j];
     end;

     // do analysis
     KMNS(A,M,N,C,K,IC1,IC2,NC,AN1,AN2,NCP,D,ITRAN,LIVE,ITER,WSS,IFAULT);

     // show results

     // sort subjects by cluster
     for i := 1 to M do IC2[i] := i; // store ids in here
     for i := 1 to M - 1 do
     begin
         for j := i+1 to M do
         begin
             if (IC1[i] > IC1[j]) then // swap these clusters and ids
             begin
                itemp := IC1[i];
                IC1[i] := IC1[j];
                IC1[j] := itemp;
                itemp := IC2[i];
                IC2[i] := IC2[j];
                IC2[j] := itemp;
             end;
         end;
     end;

     OutputFrm.RichEdit.Lines.Add('');
     OutputFrm.RichEdit.Lines.Add('NUMBER OF SUBJECTS IN EACH CLUSTER');
     for i := 1 to K do
     begin
         outline := format('Cluster := %d with %d cases.',[i,NC[i]]);
         OutputFrm.RichEdit.Lines.Add(outline);
     end;

     OutputFrm.RichEdit.Lines.Add('');
     OutputFrm.RichEdit.Lines.Add('PLACEMENT OF SUBJECTS IN CLUSTERS');
     OutputFrm.RichEdit.Lines.Add('CLUSTER SUBJECT');
     for i := 1 to M do
     begin
         outline := format('   %3d    %3d',[IC1[i],IC2[i]]);
         OutputFrm.RichEdit.Lines.Add(outline);
     end;

     OutputFrm.RichEdit.Lines.Add('');
     OutputFrm.RichEdit.Lines.Add('AVERAGE VARIABLE VALUES BY CLUSTER');
     outline := '               VARIABLES';
     OutputFrm.RichEdit.Lines.Add(outline);
     outline := 'CLUSTER';
     for j := 1 to N do
     begin
         strval := format('  %3d ',[j]);
         outline := outline + strval;
     end;
     OutputFrm.RichEdit.Lines.Add(outline);
     OutputFrm.RichEdit.Lines.Add('       ');
     for i := 1 to K do
     begin
         outline := format('   %3d ',[i]);
         for j := 1 to N do
         begin
             strval := format('%5.2f ',[C[i,j]]);
             outline := outline + strval;
         end;
         OutputFrm.RichEdit.Lines.Add(outline);
     end;
     OutputFrm.RichEdit.Lines.Add('');
     OutputFrm.RichEdit.Lines.Add('WITHIN CLUSTER SUMS OF SQUARES');
     for i := 1 to K do
     begin
         outline := format('Cluster %d := %6.3f',[i,WSS[i]]);
         OutputFrm.RichEdit.Lines.Add(outline);
     end;

     OutputFrm.ShowModal;

     // cleanup
cleanup:
     LIVE := nil;
     ITRAN := nil;
     NCP := nil;
     NC := nil;
     IC2 := nil;
     IC1 := nil;
     DT := nil;
     WSS := nil;
     AN2 := nil;
     AN1 := nil;
     D := nil;
     C := nil;
     A := nil;
     ColSelected := nil;
     rowlabels := nil;
     varlabels := nil;
end;

procedure TKMeansFrm.KMNS(VAR A : DblDyneMat; M, N : integer;
                           VAR C : DblDyneMat; K : integer; VAR IC1 : IntDyneVec;
                           VAR IC2 : IntDyneVec; VAR NC : IntDyneVec;
                           VAR AN1 : DblDyneVec; VAR AN2 : DblDyneVec;
                           VAR NCP : IntDyneVec; VAR D : DblDyneVec;
                           VAR ITRAN : IntDyneVec; VAR LIVE : IntDyneVec;
                           ITER : integer; VAR WSS : DblDyneVec; IFAULT : integer);
VAR
      DT : array[0..2] of double;
      BIG : double;
      ZERO : double;
      ONE : double;
      DA, DB, DC, TEMP, AA : double;
      L, II, INDX, I, J, IL, IJ : integer;
label cont50, cont40, cont150;

begin
      //      SUBROUTINE KMNS(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D,
      //     *    ITRAN, LIVE, ITER, WSS, IFAULT)
      //
      //     ALGORITHM AS 136  APPL. STATIST. (1979) VOL.28, NO.1
      //     Divide M points in N-dimensional space into K clusters so that
      //     the within cluster sum of squares is minimized.
      //
      //      INTEGER IC1(M), IC2(M), NC(K), NCP(K), ITRAN(K), LIVE(K)
      //      REAL    A(M,N), D(M), C(K,N), AN1(K), AN2(K), WSS(K), DT(2)
      //      REAL    ZERO, ONE
      //
      //     Define BIG to be a very large positive number
      //
      //      DATA BIG /1.E30/, ZERO /0.0/, ONE /1.0/
      //
      BIG := 1.0e30;
      ZERO := 0.0;
      ONE := 1.0;
      IFAULT := 3;
      if ((K <= 1) or (K >= M)) then
      begin
         ShowMessage('The no. of clusters must be less than the no. of variables.');
         exit;
      end;

      //     For each point I, find its two closest centres, IC1(I) and
      //     IC2(I).     Assign it to IC1(I).
      //
      for I := 1 to M do
      begin
	      IC1[I] := 1;
	      IC2[I] := 2;
	      for IL := 1 to 2 do
              begin
	          DT[IL] := ZERO;
	          for J := 1 to N do
                  begin
	              DA := A[I,J] - C[IL,J];
	              DT[IL] := DT[IL] + (DA * DA); //(squared difference for this comparison)
                  end; // 10   CONTINUE
              end; // 10 CONTINUE
	      if (DT[1] > DT[2]) then // THEN swap
              begin
	          IC1[I] := 2;
	          IC2[I] := 1;
	          TEMP := DT[1];
	          DT[1] := DT[2];
	          DT[2] := TEMP;
              end; // END IF
	      for L := 3 to K do // (remaining clusters)
              begin
	          DB := ZERO;
	          for J := 1 to N do // (variables)
                  begin
	              DC := A[I,J] - C[L,J];
	              DB := DB + DC * DC;
	              if (DB >= DT[2]) then  goto cont50;
                  end;
	          if (DB < DT[1]) then goto cont40;
	          DT[2] := DB;
	          IC2[I] := L;
	          goto cont50;
cont40:           DT[2] := DT[1];
	          IC2[I] := IC1[I];
	          DT[1] := DB;
	          IC1[I] := L;
cont50:       end;
      end; // 50 CONTINUE (next case)

      //     Update cluster centres to be the average of points contained
      //     within them.
      //
      for L := 1 to K do  // (clusters)
      begin
	      NC[L] := 0;
	      for J := 1 to N do C[L,J] := ZERO;  //(initialize clusters)
      end;
      for I := 1 to M do // (subjects)
      begin
	      L := IC1[I];  // which cluster the Ith case is in
	      NC[L] := NC[L] + 1; // no. in the cluster L
	      for J := 1 to N do  C[L,J] := C[L,J] + A[I,J]; // sum of var. values in the cluster L
      end;

      //     Check to see if there is any empty cluster at this stage
      //
      for L := 1 to K do
      begin
          if (NC[L] = 0) then
          begin
	          IFAULT := 1;
                  exit;
          end;
	  AA := NC[L];
	  for J := 1 to N do C[L,J] := C[L,J] / AA; // average the values in the cluster

          //     Initialize AN1, AN2, ITRAN & NCP
          //     AN1(L) := NC(L) / (NC(L) - 1)
          //     AN2(L) := NC(L) / (NC(L) + 1)
          //     ITRAN(L) := 1 if cluster L is updated in the quick-transfer stage,
          //              := 0 otherwise
          //     In the optimal-transfer stage, NCP(L) stores the step at which
          //     cluster L is last updated.
          //     In the quick-transfer stage, NCP(L) stores the step at which
          //     cluster L is last updated plus M.
          //
	  AN2[L] := AA / (AA + ONE);
	  AN1[L] := BIG;
	  if (AA > ONE) then AN1[L] := AA / (AA - ONE);
	  ITRAN[L] := 1;
	  NCP[L] := -1;
      end;
      INDX := 0;
      for IJ := 1 to ITER do
      begin
          //
          //     In this stage, there is only one pass through the data.   Each
          //     point is re-allocated, if necessary, to the cluster that will
          //     induce the maximum reduction in within-cluster sum of squares.
          //
	  OPTRA(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, ITRAN, LIVE, INDX);
          //
          //     Stop if no transfer took place in the last M optimal transfer
          //     steps.
          //
	  if (INDX = M) then goto cont150;
          //
          //     Each point is tested in turn to see if it should be re-allocated
          //     to the cluster to which it is most likely to be transferred,
          //     IC2(I), from its present cluster, IC1(I).   Loop through the
          //     data until no further change is to take place.
          //
	  QTRAN(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D, ITRAN, INDX);
          //
          //     If there are only two clusters, there is no need to re-enter the
          //     optimal transfer stage.
          //
	  if (K = 2) then goto cont150;
          //
          //     NCP has to be set to 0 before entering OPTRA.
          //
	  for L := 1 to K do NCP[L] := 0;
      end;
      //
      //     Since the specified number of iterations has been exceeded, set
      //     IFAULT := 2.   This may indicate unforeseen looping.
      //
      IFAULT := 2;
      //
      //     Compute within-cluster sum of squares for each cluster.
      //
cont150:
      for L := 1 to K do
      begin
	      WSS[L] := ZERO;
	      for J := 1 to N do C[L,J] := ZERO;
      end;
      for I := 1 to M do
      begin
	      II := IC1[I];
	      for J := 1 to N do C[II,J] := C[II,J] + A[I,J];
      end;
      for J := 1 to N do
      begin
	      for L := 1 to K do C[L,J] := C[L,J] / (NC[L]);
	      for I := 1 to M do
              begin
	          II := IC1[I];
	          DA := A[I,J] - C[II,J];
	          WSS[II] := WSS[II] + DA * DA;
              end;
      end; // 190 CONTINUE
end;


procedure TKMeansFrm.OPTRA(VAR A : DblDyneMat; M, N : integer;
                           VAR C : DblDyneMat; K : integer;
                           VAR IC1 : IntDyneVec; VAR IC2 : IntDyneVec;
                           VAR NC : IntDyneVec; VAR AN1 : DblDyneVec;
                           VAR AN2 : DblDyneVec; VAR NCP : IntDyneVec;
                           VAR D : DblDyneVec; VAR ITRAN : IntDyneVec;
                           VAR LIVE : IntDyneVec; INDX : integer);
VAR
      ZERO, ONE, BIG,DE, DF, DD, DC, DB, DA, R2, RR, AL1, AL2, ALT, ALW : double;
      I, J, L, L1, L2, LL : integer;
label cont30, cont60, cont70, cont90;

begin
      //      SUBROUTINE OPTRA(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D,
      //     *      ITRAN, LIVE, INDX)
      //
      //     ALGORITHM AS 136.1  APPL. STATIST. (1979) VOL.28, NO.1
      //
      //     This is the optimal transfer stage.
      //
      //     Each point is re-allocated, if necessary, to the cluster that
      //     will induce a maximum reduction in the within-cluster sum of
      //     squares.
      //
      //  INTEGER IC1(M), IC2(M), NC(K), NCP(K), ITRAN(K), LIVE(K)
      //  REAL    A(M,N), D(M), C(K,N), AN1(K), AN2(K), ZERO, ONE
      //
      //     Define BIG to be a very large positive number.
      //
      //  DATA BIG /1.0E30/, ZERO /0.0/, ONE/1.0/
      //
      //     If cluster L is updated in the last quick-transfer stage, it
      //     belongs to the live set throughout this stage.   Otherwise, at
      //     each step, it is not in the live set if it has not been updated
      //     in the last M optimal transfer steps.
      //

      ZERO := 0.0;
      ONE := 1.0;
      BIG := 1.0e30;

      for L := 1 to K do
      begin
	      if (ITRAN[L] = 1) then  LIVE[L] := M + 1;
      end; // 10 CONTINUE

      for I := 1 to M do
      begin
	      INDX := INDX + 1;
	      L1 := IC1[I];
	      L2 := IC2[I];
	      LL := L2;
          //
          //     If point I is the only member of cluster L1, no transfer.
          //
	  if (NC[L1] = 1) then goto cont90; // GO TO 90
          //
          //     If L1 has not yet been updated in this stage, no need to
          //     re-compute D(I).
          //
	  if (NCP[L1] = 0) then goto cont30; // GO TO 30
	  DE := ZERO;
	  for J := 1 to N do
          begin
	          DF := A[I,J] - C[L1,J];
	          DE := DE + DF * DF;
          end;
	  D[I] := DE * AN1[L1];
          //
          //     Find the cluster with minimum R2.
          //
cont30:
          DA := ZERO;
	  for J := 1 to N do
          begin
	          DB := A[I,J] - C[L2,J];
	          DA := DA + DB * DB;
          end;
	  R2 := DA * AN2[L2];
	  for L := 1 to K do
          begin
              //
              //     If I >:= LIVE(L1), then L1 is not in the live set.   If this is
              //     true, we only need to consider clusters that are in the live set
              //     for possible transfer of point I.   Otherwise, we need to consider
              //     all possible clusters.
              //
	      if ((I >= LIVE[L1]) and (I >= LIVE[L]) or (L = L1) or (L = LL)) then goto cont60;
	      RR := R2 / AN2[L];
	      DC := ZERO;
	      for J := 1 to N do
              begin
	              DD := A[I,J] - C[L,J];
	              DC := DC + DD * DD;
	              if (DC >= RR) then goto cont60;
              end;
	      R2 := DC * AN2[L];
	      L2 := L;
cont60:
          end; // 60     CONTINUE
	  if (R2 < D[I]) then goto cont70;
          //
          //     If no transfer is necessary, L2 is the new IC2(I).
          //
	  IC2[I] := L2;
	  goto cont90; // GO TO 90
          //
          //     Update cluster centres, LIVE, NCP, AN1 & AN2 for clusters L1 and
          //     L2, and update IC1(I) & IC2(I).
          //
cont70:
          INDX := 0;
	  LIVE[L1] := M + I;
	  LIVE[L2] := M + I;
	  NCP[L1] := I;
	  NCP[L2] := I;
	  AL1 := NC[L1];
	  ALW := AL1 - ONE;
	  AL2 := NC[L2];
	  ALT := AL2 + ONE;
	  for J := 1 to N do
          begin
	          C[L1,J] := (C[L1,J] * AL1 - A[I,J]) / ALW;
	          C[L2,J] := (C[L2,J] * AL2 + A[I,J]) / ALT;
          end;
	  NC[L1] := NC[L1] - 1;
	  NC[L2] := NC[L2] + 1;
	  AN2[L1] := ALW / AL1;
	  AN1[L1] := BIG;
	  if (ALW > ONE) then AN1[L1] := ALW / (ALW - ONE);
	  AN1[L2] := ALT / AL2;
	  AN2[L2] := ALT / (ALT + ONE);
	  IC1[I] := L2;
	  IC2[I] := L1;
cont90:
          // 90   CONTINUE
	  if (INDX = M) then exit;
      end; // 100 CONTINUE
      for L := 1 to K do
      begin
              //
              //     ITRAN(L) := 0 before entering QTRAN.   Also, LIVE(L) has to be
              //     decreased by M before re-entering OPTRA.
              //
	      ITRAN[L] := 0;
	      LIVE[L] := LIVE[L] - M;
      end; // 110 CONTINUE
end;

procedure TKMeansFrm.QTRAN(VAR A : DblDyneMat; M, N : integer;
                           VAR C : DblDyneMat; K : integer;
                           VAR IC1 : IntDyneVec; VAR IC2 : IntDyneVec;
                           VAR NC : IntDyneVec; VAR AN1 : DblDyneVec;
                           VAR AN2 : DblDyneVec; VAR NCP : IntDyneVec;
                           VAR D : DblDyneVec; VAR ITRAN : IntDyneVec;
                           INDX : integer);
VAR
      BIG, ZERO, ONE, DA, DB, DE, DD, R2, AL1, ALW, AL2, ALT : double;
      I, J, ICOUN, ISTEP, L1, L2 : integer;
label cont10, cont30, cont60;

begin
      //      SUBROUTINE QTRAN(A, M, N, C, K, IC1, IC2, NC, AN1, AN2, NCP, D,
      //     *    ITRAN, INDX)
      //
      //     ALGORITHM AS 136.2  APPL. STATIST. (1979) VOL.28, NO.1
      //
      //     This is the quick transfer stage.
      //     IC1(I) is the cluster which point I belongs to.
      //     IC2(I) is the cluster which point I is most likely to be
      //         transferred to.
      //     For each point I, IC1(I) & IC2(I) are switched, if necessary, to
      //     reduce within-cluster sum of squares.  The cluster centres are
      //     updated after each step.
      //
      // INTEGER IC1(M), IC2(M), NC(K), NCP(K), ITRAN(K)
      // REAL    A(M,N), D(M), C(K,N), AN1(K), AN2(K), ZERO, ONE
      //
      //     Define BIG to be a very large positive number
      //
      // DATA BIG /1.0E30/, ZERO /0.0/, ONE /1.0/
      //
      //     In the optimal transfer stage, NCP(L) indicates the step at which
      //     cluster L is last updated.   In the quick transfer stage, NCP(L)
      //     is equal to the step at which cluster L is last updated plus M.
      //
      BIG := 1.0e30;
      ZERO := 0.0;
      ONE := 1.0;
      ICOUN := 0;
      ISTEP := 0;
cont10:
      for I := 1 to M do
      begin
	  ICOUN := ICOUN + 1;
	  ISTEP := ISTEP + 1;
	  L1 := IC1[I];
	  L2 := IC2[I];
          //
          //     If point I is the only member of cluster L1, no transfer.
          //
	  if (NC[L1] = 1) then goto cont60;
          //
          //     If ISTEP > NCP(L1), no need to re-compute distance from point I to
          //     cluster L1.   Note that if cluster L1 is last updated exactly M
          //     steps ago, we still need to compute the distance from point I to
          //     cluster L1.
          //
	  if (ISTEP > NCP[L1]) then goto cont30;
	  DA := ZERO;
	  for J := 1 to N do
          begin
	          DB := A[I,J] - C[L1,J];
	          DA := DA + DB * DB;
          end;
	  D[I] := DA * AN1[L1];
          //
          //     If ISTEP >:= both NCP(L1) & NCP(L2) there will be no transfer of
          //     point I at this step.
          //
cont30:
          if ((ISTEP >= NCP[L1]) and (ISTEP >= NCP[L2])) then goto cont60;
	  R2 := D[I] / AN2[L2];
	  DD := ZERO;
	  for J := 1 to N do
          begin
	          DE := A[I,J] - C[L2,J];
	          DD := DD + DE * DE;
	          if (DD >= R2) then goto cont60;
          end; // 40   CONTINUE
          //
          //     Update cluster centres, NCP, NC, ITRAN, AN1 & AN2 for clusters
          //     L1 & L2.   Also update IC1(I) & IC2(I).   Note that if any
          //     updating occurs in this stage, INDX is set back to 0.
          //
	  ICOUN := 0;
	  INDX := 0;
	  ITRAN[L1] := 1;
	  ITRAN[L2] := 1;
	  NCP[L1] := ISTEP + M;
	  NCP[L2] := ISTEP + M;
	  AL1 := NC[L1];
	  ALW := AL1 - ONE;
	  AL2 := NC[L2];
	  ALT := AL2 + ONE;
	  for J := 1 to N do
          begin
	          C[L1,J] := (C[L1,J] * AL1 - A[I,J]) / ALW;
	          C[L2,J] := (C[L2,J] * AL2 + A[I,J]) / ALT;
          end; // 50   CONTINUE
	  NC[L1] := NC[L1] - 1;
	  NC[L2] := NC[L2] + 1;
	  AN2[L1] := ALW / AL1;
	  AN1[L1] := BIG;
	  if (ALW > ONE) then AN1[L1] := ALW / (ALW - ONE);
	  AN1[L2] := ALT / AL2;
	  AN2[L2] := ALT / (ALT + ONE);
	  IC1[I] := L2;
	  IC2[I] := L1;
          //
          //     If no re-allocation took place in the last M steps, return.
          //
cont60:
          if (ICOUN = M) then exit;
      end; // 70 CONTINUE
      goto cont10;
end;

initialization
  {$I kmeansunit.lrs}

end.