Files
lazarus-ccr/applications/lazstats/source/forms/analysis/multivariate/avglinkunit.pas

528 lines
15 KiB
ObjectPascal
Raw Normal View History

// File for testing: cansas_rotated.laz
// NOTE: Run Correlation > Product-Moment with option Save Matrix to Grid
// before executing the Average Link Clustering command in order to
// have a symmetrical matrix.
unit AvgLinkUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, Forms, Controls, Graphics, StdCtrls, ExtCtrls,
MainUnit, Globals, BasicStatsReportFormUnit;
type
{ TAvgLinkForm }
TAvgLinkForm = class(TBasicStatsReportForm)
Bevel1: TBevel;
MatrixTypeGroup: TRadioGroup;
private
{ private declarations }
procedure PreTree(NN, CRIT: integer; LST: IntDyneVec; KLUS: IntDyneMat; AReport: TStrings);
procedure TreePlot(Clusters: IntDyneMat; Lst: IntDyneVec; NoPoints: integer; AReport: TStrings);
protected
procedure AdjustConstraints; override;
procedure Compute; override;
function Validate(out AMsg: String; out AControl: TWinControl): Boolean; override;
public
{ public declarations }
end;
var
AvgLinkForm: TAvgLinkForm;
implementation
{$R *.lfm}
uses
Math;
{ TAvgLinkForm }
procedure TAvgLinkForm.AdjustConstraints;
begin
inherited;
ParamsPanel.Constraints.MinWidth := Max(
3*CloseBtn.Width + 2*CloseBtn.BorderSpacing.Left,
MatrixTypeGroup.Width
);
ParamsPanel.Constraints.MinHeight :=
MatrixTypeGroup.Height + ButtonBevel.Height + CloseBtn.BorderSpacing.Top +
CloseBtn.Height;
end;
{ Reference: Anderberg, M. R. (1973). Cluster analysis for applications.
New York: Academic press.
Almost any text on cluster analysis should have a good description of the
average-linkage hierarchical clustering algorithm.
The algorithm begins with an initial similarity or dissimilarity matrix
between pairs of objects.
The algorithm proceeds in an iterative way. At each iteration the two
most similar (we assume similarities for explanation) objects are combined
into one group.
At each successive iteration, the two most similar objects or groups of
objects are merged. Similarity between groups is defined as the average
similarity between objects in one group with objects in the other.
INPUT: A correlation matrix (or some other similarity or
dissimilarity matrix) in a file named MATRIX.DAT
This must contain all the elements of a full
(n x n), symmetrical matrix. Any format is
allowable, as long as numbers are separated by
blanks.
OUTPUT: Output consists of a cluster history and a tree
diagram (dendogram). The cluster history
indicates, for each iteration, the objects
or clusters merged, and the average pairwise
similarity or dissimilarity in the resulting
cluster.
Author: John Uebersax
}
procedure TAvgLinkForm.Compute;
const
SIM_DIS: array[0..1] of String = ('Similarity', 'Dissimilarity');
var
X: DblDyneMat = nil; // similarity or dissimilarity matrix
KLUS: IntDyneMat = nil;
LST: IntDyneVec = nil;
RX, SAV, SAV2, RRRMIN: double;
NIN: IntDyneVec = nil;
NVAR: IntDyneVec = nil;
I, J, K, L, M, MN, N, CRIT, ITR, LIMIT: integer;
nValues: integer;
lReport: TStrings;
label
label300, label60, label70;
begin
nValues := NoVariables;
SetLength(X, nValues+1, nvalues+1);
SetLength(KLUS, nValues+1, 3);
SetLength(LST, nValues+1);
SetLength(NIN, nValues+1);
SetLength(NVAR, nValues+1);
lReport := TStringList.Create;
try
lReport.Add('AVERAGE LINK CLUSTER ANALYSIS');
lReport.Add('Adopted from ClusBas by John S. Uebersax');
lReport.Add('');
// This section does the cluster analysis, taking data from the Main Form.
// Parameters controlling the analysis are obtained from the dialog form.
M := nvalues;
CRIT := MatrixTypeGroup.ItemIndex; // 0 := Similarity, 1 := dissimilarity
// get matrix of data from OS3MainFrm
for i := 1 to NoVariables do
begin
for j := 1 to NoVariables do
X[i,j] := StrToFloat(OS3MainFrm.DataGrid.Cells[i,j]);
end;
LIMIT := M - 1;
for i := 1 to M do
begin
NVAR[i] := i;
NIN[i] := 1;
end;
// cluster analysis
ITR := 0;
label300:
ITR := ITR + 1;
//
// determine groups to be merged this iteration
//
if (CRIT = 1) then // (BSCAN) dissimilarity matrix
begin
// This section looks for the minimum dissimilarity. It finds
// element (K, L), where K and L are the most dissimilar objects
// or groups.
//
N := 1;
RRRMIN := 1000000.0;
MN := M - 1;
for i := 1 to MN do
begin
N := N + 1;
for j := N to M do
begin
if (RRRMIN < 0.0) then continue;
K := i;
L := j;
RRRMIN := X[i,j];
end;
end;
RX := RRRMIN;
end else // SCAN procedure
begin
// This section looks for the maximum similarity. It finds
// element (K, L), where K and L are the most similar objects or
// groups.
//
N := 1;
RX := -10000.0;
for i := 1 to M do
begin
N := N + 1;
for j := N to M do
begin
if (RX - X[i,j] > 0.0) then continue;
K := i;
L := j;
RX := X[i,j];
end;
end;
end;
// ARRANGE
//
// This section updates the similarity or dissimilarity matrix.
// If two objects/groups K and L are merged, it calculates the
// similarity or dissimilarity of the new group with all other objects
// or groups. It does this by averaging the elements in row K of
// X() with those in row L, and similarly for columns K and L.
// The new elements are put in row K and column L (K < L). Row K
// and column L are deleted. Columns and rows greater than L are
// shifted up one column or row to fill in the gap. The resulting
// matrix X() thus has one less column and row then at the beginning
// of the subroutine.
MN := M - 1;
SAV := X[K,L];
SAV2 := X[K,K];
// Calculate similarity or dissimilarity of group formed by merging I
// and J to all other groups by averaging the similarities or
// dissimilarities of I and J with other groups
for I := 1 to M do
begin
X[I,K] := (X[I,K] * NIN[K] + X[I,L] * NIN[L]) / (NIN[K] + NIN[L]);
X[K,I] := X[I,K];
end;
X[K,K] := SAV2 * NIN[K] * (NIN[K] - 1) + X[L,L] * NIN[L] * (NIN[L] - 1);
X[K,K] := X[K,K] + SAV * 2 * NIN[K] * NIN[L];
X[K,K] := X[K,K] / ((NIN[K] + NIN[L]) * (NIN[K] + NIN[L] - 1));
if (L = M) then goto label60;
for I := 1 to M do
begin
// Shift columns after J up one place
for J := L to MN do X[I,J] := X[I,J+1];
end;
for I := L to MN do
begin
// Shift rows after J up one place
for J := 1 to M do X[I,J] := X[I+1,J];
end;
NIN[K] := NIN[K] + NIN[L];
for I := L to MN do NIN[I] := NIN[I+1];
goto label70;
label60:
// Update number of objects in each cluster
NIN[K] := NIN[K] + NIN[L];
label70: // end of ARRANGE procedure
// continuation of CLUSV1 procedure
// OUTPUT
lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d %s: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, SIM_DIS[CRIT], RX]);
{
if (CRIT = 0) then
lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d SIM: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, RX])
else
lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d DIS: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, RX]);
}
KLUS[ITR,1] := NVAR[K]; // save in KLUS rather than write out to file as in
KLUS[ITR,2] := NVAR[L]; // original program
if (L <> M) then
begin
MN := M - 1;
for i := L to MN do NVAR[i] := NVAR[i+1];
end;
M := M - 1;
if (ITR < LIMIT) then goto label300;
lReport.Add('');
// End of CLUSV1 procedure
// do pre-tree processing
PreTree(nvalues, CRIT, LST, KLUS, lReport);
lReport.Add('');
lReport.Add(DIVIDER_AUTO);
lReport.Add('');
// do TREE procedure
TreePlot(KLUS, LST, nvalues, lReport);
FReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
end;
procedure TAvgLinkForm.TreePlot(Clusters: IntDyneMat; Lst: IntDyneVec;
NoPoints: integer; AReport: TStrings);
VAR
outline : array[0..501] of char;
aline : array[0..82] of char;
tempstr : string;
plotline : string;
star : char;
blank : char;
col1, col2, colpos1, colpos2 : integer;
noparts, startcol, endcol : integer;
Results : StrDyneVec = nil;
ColPos : IntDyneVec = nil;
i, j, k, L, linecount, newcol, howlong, count: integer;
begin
linecount := 1;
star := '*';
blank := ' ';
SetLength(ColPos, NoPoints + 2);
SetLength(Results, NoPoints*2 + 3);
// store initial column positions of vertical linkages
for i := 1 to NoPoints do ColPos[Lst[i]] := 4 + (i * 5);
// create column heading indented 10 spaces
tempstr := 'UNIT ';
for i := 1 to NoPoints do
tempstr := tempstr + Format('%5d', [Lst[i]]);
Results[linecount] := tempstr;
linecount := linecount + 1;
// create beginning of vertical linkages
plotline := 'STEP ';
for i := 1 to NoPoints do plotline := plotline + ' *';
Results[linecount] := plotline;
linecount := linecount + 1;
// start dendoplot
for i := 1 to NoPoints - 1 do
begin
// put step no. first
outline := Format('%5d', [i]);
// clear remainder of outline
for j := 5 to (5 + NoPoints * 5) do outline[j] := ' ';
outline[6 + NoPoints * 5] := #0;
col1 := Clusters[i,1];
col2 := Clusters[i,2];
// find column positions for each variable
colpos1 := ColPos[col1];
colpos2 := ColPos[col2];
for k := colpos1 to colpos2 do
outline[k] := star;
// change column positions 1/2 way between the matched ones
newcol := colpos1 + ((colpos2 - colpos1) div 2);
for k := 1 to NoPoints do
if ((ColPos[k] = colpos1) or (ColPos[k] = colpos2)) then
ColPos[k] := newcol;
for k := 1 to NoPoints do
begin
L := ColPos[k];
if ((L <> colpos1) and (L <> colpos2)) then
outline[L] := star;
end;
Results[linecount] := outline;
linecount := linecount + 1;
// add a line of connectors to next grouping
outline := ' ';
for j := 5 to (5 + NoPoints * 5) do
outline[j] := blank;
for j := 1 to NoPoints do
begin
colpos1 := ColPos[j];
outline[colpos1] := star;
end;
Results[linecount] := outline;
linecount := linecount + 1;
end;
// output the Results in parts
// determine number of pages needed for whole plot
howlong := Length(Results[1]);
noparts := round(howlong / 80.0);
if (noparts <= 0) then
noparts := 1;
if (noparts = 1) then // simply print the list
for i := 0 to linecount - 1 do
AReport.Add(Results[i])
else // break lines into strings of 15 units
begin
startcol := 0;
endcol := 80;
for i := 1 to noparts do
begin
AReport.Add('PART %d OUTPUT', [i]);
for j := 0 to 80 do
aline[j] := blank;
for j := 0 to linecount - 1 do
begin
count := 0;
outline := Results[j];
for k := startcol to endcol do
begin
aline[count] := outline[k];
count := count + 1;
end;
aline[count+1] := #0;
AReport.Add(aline);
end;
AReport.Add('');
startcol := endcol + 1;
endcol := endcol + 80;
if (endcol > howlong) then endcol := howlong;
end;
end;
end;
procedure TAvgLinkForm.PreTree(NN, CRIT: integer; LST: IntDyneVec;
KLUS: IntDyneMat; AReport: TStrings);
VAR
I, II, J, NI, NJ, L, M, N, Ina, INEND, NHOLD, NLINES, INDX, ICOL, JCOL: integer;
KSH, JEND, MSH: integer;
JHOLD: IntDyneVec = nil;
NIN1: IntDyneVec = nil;
outline: string;
label
label2015, label2020, label2030, label2040, label2055, label2060;
begin
// PRETRE procedure
SetLength(JHOLD, NN+1);
SetLength(NIN1, NN+1);
// int NN := nvalues;
N := NN - 1;
AReport.Add('No. of objects: %3d', [NN]);
if (CRIT = 0) then
AReport.Add('Matrix defined similarities among objects.')
else
AReport.Add('Matrix defined dissimilarities among objects.');
for I := 1 to NN do
begin
LST[I] := I;
NIN1[I] := 1;
end;
for II := 1 to N do
begin
// name tabs
I := KLUS[II][1];
J := KLUS[II][2];
NI := NIN1[I];
NJ := NIN1[J];
L := 1;
label2015:
if (LST[L] = I) then goto label2020;
L := L + 1;
if (L <= NN) then goto label2015;
label2020:
ICOL := L;
Ina := ICOL + NI;
INEND := Ina + NJ - 1;
L := L + 1;
label2030:
if (LST[L] = J) then goto label2040;
L := L + 1;
if (L <= NN) then goto label2030;
label2040:
JCOL := L;
JEND := JCOL + NJ - 1;
NHOLD := 1;
// remove J vector and store in HOLD
for M := JCOL to JEND do
begin
JHOLD[NHOLD] := LST[M];
NHOLD := NHOLD + 1;
end;
// shift
MSH := JEND;
label2055:
if (MSH = INEND) then goto label2060;
KSH := MSH - NJ;
LST[MSH] := LST[KSH];
MSH := MSH - 1;
goto label2055;
// insert hold vector
label2060:
NHOLD := 1;
for M := Ina to INEND do
begin
LST[M] := JHOLD[NHOLD];
NHOLD := NHOLD + 1;
end;
NIN1[I] := NI + NJ;
end;
NLINES := (NN div 20) + 1;
INDX := 0;
for I := 1 to NLINES do
begin
outline := ' ';
for J := 1 to 20 do
begin
INDX := INDX + 1;
if (INDX <= NN) then // wp: This outline is not printed anywhere !!!
outline := outline + Format(' %3d', [LST[INDX]]);
end;
end;
AReport.Add(outline); // wp: added, without it outline would not be used anywhere
NIN1 := nil;
JHOLD := nil;
// End of PRETRE procedure
end;
function TAvgLinkForm.Validate(out AMsg: String; out AControl: TWinControl): boolean;
begin
Result := false;
if (NoVariables <= 0) then
begin
AControl := MatrixTypeGroup;
AMsg := 'You must first load a matrix into the grid.';
exit;
end;
Result := true;
end;
end.