// File for testing: cansas_rotated.laz // NOTE: Run Correlation > Product-Moment with option Save Matrix to Grid // before executing the Average Link Clustering command in order to // have a symmetrical matrix. unit AvgLinkUnit; {$mode objfpc}{$H+} interface uses Classes, SysUtils, FileUtil, LResources, Forms, Controls, Graphics, Dialogs, StdCtrls, ExtCtrls, MainUnit, Globals, OutputUnit, ContextHelpUnit; type { TAvgLinkFrm } TAvgLinkFrm = class(TForm) Bevel1: TBevel; ComputeBtn: TButton; HelpBtn: TButton; CloseBtn: TButton; MatrixTypeGroup: TRadioGroup; procedure ComputeBtnClick(Sender: TObject); procedure FormActivate(Sender: TObject); procedure FormCreate(Sender: TObject); procedure FormShow(Sender: TObject); procedure HelpBtnClick(Sender: TObject); private { private declarations } procedure PreTree(NN, CRIT: integer; LST: IntDyneVec; KLUS: IntDyneMat; AReport: TStrings); procedure TreePlot(Clusters: IntDyneMat; Lst: IntDyneVec; NoPoints: integer; AReport: TStrings); public { public declarations } end; var AvgLinkFrm: TAvgLinkFrm; implementation uses Math; { TAvgLinkFrm } procedure TAvgLinkFrm.FormActivate(Sender: TObject); var w: Integer; begin w := MaxValue([HelpBtn.Width, ComputeBtn.Width, CloseBtn.Width]); HelpBtn.Constraints.MinWidth := w; ComputeBtn.Constraints.MinWidth := w; CloseBtn.Constraints.MinWidth := w; end; procedure TAvgLinkFrm.FormCreate(Sender: TObject); begin Assert(OS3MainFrm <> nil); end; procedure TAvgLinkFrm.FormShow(Sender: TObject); begin MatrixTypeGroup.ItemIndex := 0; end; procedure TAvgLinkFrm.HelpBtnClick(Sender: TObject); begin if ContextHelpForm = nil then Application.CreateForm(TContextHelpForm, ContextHelpForm); ContextHelpForm.HelpMessage((Sender as TButton).tag); end; procedure TAvgLinkFrm.ComputeBtnClick(Sender: TObject); const SIM_DIS: array[0..1] of String = ('Similarity', 'Dissimilarity'); VAR X : DblDyneMat; // similarity or dissimilarity matrix KLUS : IntDyneMat; LST : IntDyneVec; RX, SAV, SAV2, RRRMIN : double; NIN, NVAR : IntDyneVec; I, J, K, L, M, MN, N, CRIT, ITR, LIMIT : integer; // ROWS : StrDyneVec; nvalues : integer; lReport: TStrings; label label300, label60, label70; begin // Reference: Anderberg, M. R. (1973). Cluster analysis for // applications. New York: Academic press. // // Almost any text on cluster analysis should have a good // description of the average-linkage hierarchical clustering // algorithm. The algorithm begins with an initial similarity // or dissimilarity matrix between pairs of objects. The // algorithm proceeds in an iterative way. At each iteration // the two most similar (we assume similarities for explanation) // objects are combined into one group. At each successive // iteration, the two most similar objects or groups of objects are // merged. Similarity between groups is defined as the average // similarity between objects in one group with objects in the other. // // INPUT: A correlation matrix (or some other similarity or // dissimilarity matrix) in a file named MATRIX.DAT // This must contain all the elements of a full // (n x n), symmetrical matrix. Any format is // allowable, as long as numbers are separated by // blanks. // // OUTPUT: Output consists of a cluster history and a tree // diagram (dendogram). The cluster history // indicates, for each iteration, the objects // or clusters merged, and the average pairwise // similarity or dissimilarity in the resulting // cluster. // // Author: John Uebersax if (NoVariables <= 0) then begin MessageDlg('You must first load a matrix into the grid.', mtError, [mbOK], 0); exit; end; nvalues := NoVariables; SetLength(X,nvalues+1,nvalues+1); SetLength(KLUS,nvalues+1,3); SetLength(LST,nvalues+1); SetLength(NIN,nvalues+1); SetLength(NVAR,nvalues+1); lReport := TStringList.Create; try lReport.Add('AVERAGE LINK CLUSTER ANALYSIS'); lReport.Add('Adopted from ClusBas by John S. Uebersax'); lReport.Add(''); // This section does the cluster analysis, taking data from the Main Form. // Parameters controlling the analysis are obtained from the dialog form. M := nvalues; CRIT := MatrixTypeGroup.ItemIndex; // 0 := Similarity, 1 := dissimilarity // get matrix of data from OS3MainFrm for i := 1 to NoVariables do begin for j := 1 to NoVariables do X[i,j] := StrToFloat(OS3MainFrm.DataGrid.Cells[i,j]); end; LIMIT := M - 1; for i := 1 to M do begin NVAR[i] := i; NIN[i] := 1; end; // cluster analysis ITR := 0; label300: ITR := ITR + 1; // // determine groups to be merged this iteration // if (CRIT = 1) then // (BSCAN) dissimilarity matrix begin // This section looks for the minimum dissimilarity. It finds // element (K, L), where K and L are the most dissimilar objects // or groups. // N := 1; RRRMIN := 1000000.0; MN := M - 1; for i := 1 to MN do begin N := N + 1; for j := N to M do begin if (RRRMIN < 0.0) then continue; K := i; L := j; RRRMIN := X[i,j]; end; end; RX := RRRMIN; end else // SCAN procedure begin // This section looks for the maximum similarity. It finds // element (K, L), where K and L are the most similar objects or // groups. // N := 1; RX := -10000.0; for i := 1 to M do begin N := N + 1; for j := N to M do begin if (RX - X[i,j] > 0.0) then continue; K := i; L := j; RX := X[i,j]; end; end; end; // ARRANGE // // This section updates the similarity or dissimilarity matrix. // If two objects/groups K and L are merged, it calculates the // similarity or dissimilarity of the new group with all other objects // or groups. It does this by averaging the elements in row K of // X() with those in row L, and similarly for columns K and L. // The new elements are put in row K and column L (K < L). Row K // and column L are deleted. Columns and rows greater than L are // shifted up one column or row to fill in the gap. The resulting // matrix X() thus has one less column and row then at the beginning // of the subroutine. MN := M - 1; SAV := X[K,L]; SAV2 := X[K,K]; // Calculate similarity or dissimilarity of group formed by merging I // and J to all other groups by averaging the similarities or // dissimilarities of I and J with other groups for I := 1 to M do begin X[I,K] := (X[I,K] * NIN[K] + X[I,L] * NIN[L]) / (NIN[K] + NIN[L]); X[K,I] := X[I,K]; end; X[K,K] := SAV2 * NIN[K] * (NIN[K] - 1) + X[L,L] * NIN[L] * (NIN[L] - 1); X[K,K] := X[K,K] + SAV * 2 * NIN[K] * NIN[L]; X[K,K] := X[K,K] / ((NIN[K] + NIN[L]) * (NIN[K] + NIN[L] - 1)); if (L = M) then goto label60; for I := 1 to M do begin // Shift columns after J up one place for J := L to MN do X[I,J] := X[I,J+1]; end; for I := L to MN do begin // Shift rows after J up one place for J := 1 to M do X[I,J] := X[I+1,J]; end; NIN[K] := NIN[K] + NIN[L]; for I := L to MN do NIN[I] := NIN[I+1]; goto label70; label60: // Update number of objects in each cluster NIN[K] := NIN[K] + NIN[L]; label70: // end of ARRANGE procedure // continuation of CLUSV1 procedure // OUTPUT lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d %s: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, SIM_DIS[CRIT], RX]); { if (CRIT = 0) then lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d SIM: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, RX]) else lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d DIS: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, RX]); } KLUS[ITR,1] := NVAR[K]; // save in KLUS rather than write out to file as in KLUS[ITR,2] := NVAR[L]; // original program if not(L = M) then begin MN := M - 1; for i := L to MN do NVAR[i] := NVAR[i+1]; end; M := M - 1; if (ITR < LIMIT) then goto label300; lReport.Add(''); // End of CLUSV1 procedure // do pre-tree processing PreTree(nvalues, CRIT, LST, KLUS, lReport); lReport.Add(''); lReport.Add(DIVIDER); lReport.Add(''); // do TREE procedure TreePlot(KLUS, LST, nvalues, lReport); DisplayReport(lReport); finally lReport.Free; NVAR := nil; NIN := nil; LST := nil; KLUS := nil; X := nil; end; end; procedure TAvgLinkFrm.TreePlot(Clusters: IntDyneMat; Lst: IntDyneVec; NoPoints: integer; AReport: TStrings); VAR outline : array[0..501] of char; aline : array[0..82] of char; valstr : string; tempstr : string; plotline : string; star : char; blank : char; col1, col2, colpos1, colpos2 : integer; noparts, startcol, endcol : integer; Results : StrDyneVec; ColPos : IntDyneVec; i, j, k, L, linecount, newcol, howlong, count: integer; begin linecount := 1; star := '*'; blank := ' '; SetLength(ColPos,NoPoints+2); SetLength(Results,NoPoints*2+3); //AReport.Add(''); // store initial column positions of vertical linkages for i := 1 to NoPoints do ColPos[Lst[i]] := 4 + (i * 5); // create column heading indented 10 spaces tempstr := 'UNIT '; for i := 1 to NoPoints do begin valstr := format('%5d',[Lst[i]]); tempstr := tempstr + valstr; end; Results[linecount] := tempstr; linecount := linecount + 1; // create beginning of vertical linkages plotline := 'STEP '; for i := 1 to NoPoints do plotline := plotline + ' *'; Results[linecount] := plotline; linecount := linecount + 1; // start dendoplot for i := 1 to NoPoints - 1 do begin outline := ''; valstr := Format('%5d',[i]); // put step no. first outline := valstr; // clear remainder of outline for j := 5 to (5 + NoPoints * 5) do outline[j] := ' '; outline[6 + NoPoints * 5] := #0; col1 := Clusters[i,1]; col2 := Clusters[i,2]; // find column positions for each variable colpos1 := ColPos[col1]; colpos2 := ColPos[col2]; for k := colpos1 to colpos2 do outline[k] := star; // change column positions 1/2 way between the matched ones newcol := colpos1 + ((colpos2 - colpos1) div 2); for k := 1 to NoPoints do if ((ColPos[k] = colpos1) or (ColPos[k] = colpos2)) then ColPos[k] := newcol; for k := 1 to NoPoints do begin L := ColPos[k]; if ((L <> colpos1) and (L <> colpos2)) then outline[L] := star; end; Results[linecount] := outline; linecount := linecount + 1; // add a line of connectors to next grouping outline := ' '; for j := 5 to (5 + NoPoints * 5) do outline[j] := blank; for j := 1 to NoPoints do begin colpos1 := ColPos[j]; outline[colpos1] := star; end; Results[linecount] := outline; linecount := linecount + 1; end; // output the Results in parts // determine number of pages needed for whole plot noparts := 0; howlong := Length(Results[1]); noparts := round(howlong / 80.0); if (noparts <= 0) then noparts := 1; if (noparts = 1) then // simply print the list for i := 0 to linecount - 1 do AReport.Add(Results[i]) else // break lines into strings of 15 units begin startcol := 0; endcol := 80; for i := 1 to noparts do begin AReport.Add('PART %d OUTPUT', [i]); for j := 0 to 80 do aline[j] := blank; for j := 0 to linecount - 1 do begin count := 0; outline := Results[j]; for k := startcol to endcol do begin aline[count] := outline[k]; count := count + 1; end; aline[count+1] := #0; AReport.Add(aline); end; AReport.Add(''); startcol := endcol + 1; endcol := endcol + 80; if (endcol > howlong) then endcol := howlong; end; end; Results := nil; ColPos := nil; end; procedure TAvgLinkFrm.PreTree(NN, CRIT: integer; LST: IntDyneVec; KLUS: IntDyneMat; AReport: TStrings); VAR I, II, J, NI, NJ, L, M, N, Ina, INEND, NHOLD, NLINES, INDX, ICOL, JCOL: integer; KSH, JEND, MSH: integer; JHOLD, NIN1: IntDyneVec; outline: string; label label2015, label2020, label2030, label2040, label2055, label2060; begin // PRETRE procedure SetLength(JHOLD,NN+1); SetLength(NIN1,NN+1); // int NN := nvalues; N := NN - 1; AReport.Add('No. of objects: %3d', [NN]); if (CRIT = 0) then AReport.Add('Matrix defined similarities among objects.') else AReport.Add('Matrix defined dissimilarities among objects.'); for I := 1 to NN do begin LST[I] := I; NIN1[I] := 1; end; for II := 1 to N do begin // name tabs I := KLUS[II][1]; J := KLUS[II][2]; NI := NIN1[I]; NJ := NIN1[J]; L := 1; label2015: if (LST[L] = I) then goto label2020; L := L + 1; if (L <= NN) then goto label2015; label2020: ICOL := L; Ina := ICOL + NI; INEND := Ina + NJ - 1; L := L + 1; label2030: if (LST[L] = J) then goto label2040; L := L + 1; if (L <= NN) then goto label2030; label2040: JCOL := L; JEND := JCOL + NJ - 1; NHOLD := 1; // remove J vector and store in HOLD for M := JCOL to JEND do begin JHOLD[NHOLD] := LST[M]; NHOLD := NHOLD + 1; end; // shift MSH := JEND; label2055: if (MSH = INEND) then goto label2060; KSH := MSH - NJ; LST[MSH] := LST[KSH]; MSH := MSH - 1; goto label2055; // insert hold vector label2060: NHOLD := 1; for M := Ina to INEND do begin LST[M] := JHOLD[NHOLD]; NHOLD := NHOLD + 1; end; NIN1[I] := NI + NJ; end; NLINES := (NN div 20) + 1; INDX := 0; for I := 1 to NLINES do begin outline := ' '; for J := 1 to 20 do begin INDX := INDX + 1; if (INDX <= NN) then // wp: This outline is not printed anywhere !!! outline := outline + Format(' %3d', [LST[INDX]]); end; end; AReport.Add(outline); // wp: added, without it outline would not be used anywhere NIN1 := nil; JHOLD := nil; // End of PRETRE procedure end; initialization {$I avglinkunit.lrs} end.