LazStats: Refactor AvgLinkUnit. Add data file cansas_rotated.laz.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7370 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-04-11 10:56:59 +00:00
parent 93146b63ea
commit 3f97dee397
4 changed files with 433 additions and 160 deletions

View File

@ -0,0 +1,289 @@
6
20
CASE 1
VARIABLE 1
5
F
3
99999
L
CASE 2
VARIABLE 2
5
F
3
99999
L
CASE 3
VARIABLE 3
6
F
4
99999
L
CASE 4
VARIABLE 4
5
F
3
99999
L
CASE 5
VARIABLE 5
5
F
3
99999
L
CASE 6
VARIABLE 6
5
F
3
99999
L
CASE 7
VARIABLE 7
5
F
3
99999
L
CASE 8
VARIABLE 8
5
F
3
99999
L
CASE 9
VARIABLE 9
5
F
3
99999
L
CASE 10
VARIABLE 10
6
F
4
99999
L
CASE 11
VARIABLE 11
5
F
3
99999
L
CASE 12
VARIABLE 12
6
F
4
99999
L
CASE 13
VARIABLE 13
6
F
4
99999
L
CASE 14
VARIABLE 14
5
F
3
99999
L
CASE 15
VARIABLE 15
5
F
3
99999
L
CASE 16
VARIABLE 16
6
F
4
99999
L
CASE 17
VARIABLE 17
5
F
3
99999
L
CASE 18
VARIABLE 18
5
F
3
99999
L
CASE 19
VARIABLE 19
5
F
3
99999
L
CASE 20
VARIABLE 20
5
F
3
99999
L
Case 0
CASE 1
CASE 2
CASE 3
CASE 4
CASE 5
CASE 6
CASE 7
CASE 8
CASE 9
CASE 10
CASE 11
CASE 12
CASE 13
CASE 14
CASE 15
CASE 16
CASE 17
CASE 18
CASE 19
CASE 20
Case 1
191.00
189.00
193.00
162.00
189.00
182.00
211.00
167.00
176.00
154.00
169.00
166.00
154.00
247.00
193.00
202.00
176.00
157.00
156.00
138.00
Case 2
36.00
37.00
38.00
35.00
35.00
36.00
38.00
34.00
31.00
33.00
34.00
33.00
34.00
46.00
36.00
37.00
37.00
32.00
33.00
33.00
Case 3
50.00
52.00
58.00
62.00
46.00
56.00
56.00
60.00
74.00
56.00
50.00
52.00
64.00
50.00
46.00
62.00
54.00
52.00
54.00
68.00
Case 4
5.00
2.00
12.00
12.00
13.00
4.00
8.00
6.00
15.00
17.00
17.00
13.00
14.00
1.00
6.00
12.00
4.00
11.00
15.00
2.00
Case 5
162.00
110.00
101.00
105.00
155.00
101.00
101.00
125.00
200.00
251.00
120.00
210.00
215.00
50.00
70.00
210.00
60.00
230.00
225.00
110.00
Case 6
60.00
60.00
101.00
37.00
58.00
42.00
38.00
40.00
40.00
250.00
38.00
115.00
105.00
50.00
31.00
120.00
25.00
80.00
73.00
43.00

View File

@ -1,19 +1,19 @@
object AvgLinkFrm: TAvgLinkFrm
Left = 589
Height = 136
Height = 132
Top = 409
Width = 382
Width = 383
AutoSize = True
BorderStyle = bsDialog
Caption = 'Average Linkage Hierarchical Clustering'
ClientHeight = 136
ClientWidth = 382
ClientHeight = 132
ClientWidth = 383
OnActivate = FormActivate
OnCreate = FormCreate
OnShow = FormShow
Position = poMainFormCenter
LCLVersion = '2.1.0.0'
object RadioGroup1: TRadioGroup
object MatrixTypeGroup: TRadioGroup
AnchorSideLeft.Control = Owner
AnchorSideLeft.Side = asrCenter
AnchorSideTop.Control = Owner
@ -43,31 +43,13 @@ object AvgLinkFrm: TAvgLinkFrm
)
TabOrder = 0
end
object CancelBtn: TButton
AnchorSideTop.Control = Bevel1
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = ComputeBtn
AnchorSideBottom.Side = asrBottom
Left = 147
Height = 25
Top = 96
Width = 62
Anchors = [akTop, akRight]
AutoSize = True
BorderSpacing.Top = 8
BorderSpacing.Right = 12
BorderSpacing.Bottom = 8
Caption = 'Cancel'
ModalResult = 2
TabOrder = 2
end
object ComputeBtn: TButton
AnchorSideLeft.Side = asrBottom
AnchorSideTop.Control = Bevel1
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = ReturnBtn
AnchorSideRight.Control = CloseBtn
AnchorSideBottom.Side = asrBottom
Left = 221
Left = 236
Height = 25
Top = 96
Width = 76
@ -75,48 +57,48 @@ object AvgLinkFrm: TAvgLinkFrm
AutoSize = True
BorderSpacing.Left = 8
BorderSpacing.Top = 8
BorderSpacing.Right = 12
BorderSpacing.Right = 8
BorderSpacing.Bottom = 8
Caption = 'Compute'
OnClick = ComputeBtnClick
TabOrder = 3
TabOrder = 2
end
object ReturnBtn: TButton
object CloseBtn: TButton
AnchorSideLeft.Side = asrBottom
AnchorSideTop.Control = Bevel1
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = Owner
AnchorSideRight.Side = asrBottom
AnchorSideBottom.Side = asrBottom
Left = 309
Left = 320
Height = 25
Top = 96
Width = 61
Width = 55
Anchors = [akTop, akRight]
AutoSize = True
BorderSpacing.Left = 8
BorderSpacing.Top = 8
BorderSpacing.Right = 12
BorderSpacing.Right = 8
BorderSpacing.Bottom = 8
Caption = 'Return'
ModalResult = 1
TabOrder = 4
Caption = 'Close'
ModalResult = 11
TabOrder = 3
end
object HelpBtn: TButton
Tag = 105
AnchorSideTop.Control = Bevel1
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = CancelBtn
AnchorSideRight.Control = ComputeBtn
AnchorSideBottom.Side = asrBottom
Left = 84
Left = 177
Height = 25
Top = 96
Width = 51
Anchors = [akTop, akRight]
AutoSize = True
BorderSpacing.Left = 12
BorderSpacing.Left = 8
BorderSpacing.Top = 8
BorderSpacing.Right = 12
BorderSpacing.Right = 8
BorderSpacing.Bottom = 8
Caption = 'Help'
OnClick = HelpBtnClick
@ -124,15 +106,15 @@ object AvgLinkFrm: TAvgLinkFrm
end
object Bevel1: TBevel
AnchorSideLeft.Control = Owner
AnchorSideTop.Control = RadioGroup1
AnchorSideTop.Control = MatrixTypeGroup
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = Owner
AnchorSideRight.Side = asrBottom
AnchorSideBottom.Control = ReturnBtn
AnchorSideBottom.Control = CloseBtn
Left = 0
Height = 8
Top = 80
Width = 382
Width = 383
Anchors = [akTop, akLeft, akRight]
Shape = bsBottomLine
end

View File

@ -1,3 +1,9 @@
// File for testing: cansas_rotated.laz
// NOTE: Run Correlation > Product-Moment with option Save Matrix to Grid
// before executing the Average Link Clustering command in order to
// have a symmetrical matrix.
unit AvgLinkUnit;
{$mode objfpc}{$H+}
@ -15,21 +21,19 @@ type
TAvgLinkFrm = class(TForm)
Bevel1: TBevel;
CancelBtn: TButton;
ComputeBtn: TButton;
HelpBtn: TButton;
ReturnBtn: TButton;
RadioGroup1: TRadioGroup;
CloseBtn: TButton;
MatrixTypeGroup: TRadioGroup;
procedure ComputeBtnClick(Sender: TObject);
procedure FormActivate(Sender: TObject);
procedure FormCreate(Sender: TObject);
procedure FormShow(Sender: TObject);
procedure HelpBtnClick(Sender: TObject);
procedure TreePlot(Clusters : IntDyneMat; Lst : IntDyneVec; NoPoints : integer);
procedure PreTree(NN, CRIT : integer; LST : IntDyneVec; KLUS : IntDyneMat);
private
{ private declarations }
procedure PreTree(NN, CRIT: integer; LST: IntDyneVec; KLUS: IntDyneMat; AReport: TStrings);
procedure TreePlot(Clusters: IntDyneMat; Lst: IntDyneVec; NoPoints: integer; AReport: TStrings);
public
{ public declarations }
end;
@ -48,23 +52,20 @@ procedure TAvgLinkFrm.FormActivate(Sender: TObject);
var
w: Integer;
begin
w := MaxValue([HelpBtn.Width, CancelBtn.Width, ComputeBtn.Width, ReturnBtn.Width]);
w := MaxValue([HelpBtn.Width, ComputeBtn.Width, CloseBtn.Width]);
HelpBtn.Constraints.MinWidth := w;
CancelBtn.Constraints.MinWidth := w;
ComputeBtn.Constraints.MinWidth := w;
ReturnBtn.Constraints.MinWidth := w;
CloseBtn.Constraints.MinWidth := w;
end;
procedure TAvgLinkFrm.FormCreate(Sender: TObject);
begin
Assert(OS3MainFrm <> nil);
if OutputFrm = nil then
Application.CreateForm(TOutputFrm, OutputFrm);
end;
procedure TAvgLinkFrm.FormShow(Sender: TObject);
begin
RadioGroup1.ItemIndex := 0;
MatrixTypeGroup.ItemIndex := 0;
end;
procedure TAvgLinkFrm.HelpBtnClick(Sender: TObject);
@ -75,73 +76,76 @@ begin
end;
procedure TAvgLinkFrm.ComputeBtnClick(Sender: TObject);
const
SIM_DIS: array[0..1] of String = ('Similarity', 'Dissimilarity');
VAR
X : DblDyneMat; // similarity or dissimilarity matrix
KLUS : IntDyneMat;
LST : IntDyneVec;
RX, SAV, SAV2, RRRMIN : double;
NIN, NVAR : IntDyneVec;
I, J, K, L, M, MN, N, CRIT, ITR, LIMIT : integer;
// ROWS : StrDyneVec;
DIS, Title : string;
outline : string;
nvalues : integer;
label label300, label60, label70;
X : DblDyneMat; // similarity or dissimilarity matrix
KLUS : IntDyneMat;
LST : IntDyneVec;
RX, SAV, SAV2, RRRMIN : double;
NIN, NVAR : IntDyneVec;
I, J, K, L, M, MN, N, CRIT, ITR, LIMIT : integer;
// ROWS : StrDyneVec;
nvalues : integer;
lReport: TStrings;
label
label300, label60, label70;
begin
// Reference: Anderberg, M. R. (1973). Cluster analysis for
// applications. New York: Academic press.
//
// Almost any text on cluster analysis should have a good
// description of the average-linkage hierarchical clustering
// algorithm. The algorithm begins with an initial similarity
// or dissimilarity matrix between pairs of objects. The
// algorithm proceeds in an iterative way. At each iteration
// the two most similar (we assume similarities for explanation)
// objects are combined into one group. At each successive
// iteration, the two most similar objects or groups of objects are
// merged. Similarity between groups is defined as the average
// similarity between objects in one group with objects in the other.
//
// INPUT: A correlation matrix (or some other similarity or
// dissimilarity matrix) in a file named MATRIX.DAT
// This must contain all the elements of a full
// (n x n), symmetrical matrix. Any format is
// allowable, as long as numbers are separated by
// blanks.
//
// OUTPUT: Output consists of a cluster history and a tree
// diagram (dendogram). The cluster history
// indicates, for each iteration, the objects
// or clusters merged, and the average pairwise
// similarity or dissimilarity in the resulting
// cluster.
//
// Author: John Uebersax
// Reference: Anderberg, M. R. (1973). Cluster analysis for
// applications. New York: Academic press.
//
// Almost any text on cluster analysis should have a good
// description of the average-linkage hierarchical clustering
// algorithm. The algorithm begins with an initial similarity
// or dissimilarity matrix between pairs of objects. The
// algorithm proceeds in an iterative way. At each iteration
// the two most similar (we assume similarities for explanation)
// objects are combined into one group. At each successive
// iteration, the two most similar objects or groups of objects are
// merged. Similarity between groups is defined as the average
// similarity between objects in one group with objects in the other.
//
// INPUT: A correlation matrix (or some other similarity or
// dissimilarity matrix) in a file named MATRIX.DAT
// This must contain all the elements of a full
// (n x n), symmetrical matrix. Any format is
// allowable, as long as numbers are separated by
// blanks.
//
// OUTPUT: Output consists of a cluster history and a tree
// diagram (dendogram). The cluster history
// indicates, for each iteration, the objects
// or clusters merged, and the average pairwise
// similarity or dissimilarity in the resulting
// cluster.
//
// Author: John Uebersax
nvalues := NoVariables;
if (NoVariables <= 0) then
begin
ShowMessage('ERROR! You must first load a matrix into the grid.');
exit;
end;
if (NoVariables <= 0) then
begin
MessageDlg('You must first load a matrix into the grid.', mtError, [mbOK], 0);
exit;
end;
SetLength(X,nvalues+1,nvalues+1);
SetLength(KLUS,nvalues+1,3);
SetLength(LST,nvalues+1);
SetLength(NIN,nvalues+1);
SetLength(NVAR,nvalues+1);
nvalues := NoVariables;
SetLength(X,nvalues+1,nvalues+1);
SetLength(KLUS,nvalues+1,3);
SetLength(LST,nvalues+1);
SetLength(NIN,nvalues+1);
SetLength(NVAR,nvalues+1);
Title := 'Average Linkage Cluster Analysis. Adopted from ClusBas by John S. Uebersax';
lReport := TStringList.Create;
try
lReport.Add('AVERAGE LINK CLUSTER ANALYSIS');
lReport.Add('Adopted from ClusBas by John S. Uebersax');
lReport.Add('');
// This section does the cluster analysis, taking data from the Main Form.
// Parameters controlling the analysis are obtained from the dialog form.
DIS := 'DIS';
OutputFrm.RichEdit.Clear;
OutputFrm.RichEdit.Lines.Add(Title);
OutputFrm.RichEdit.Lines.Add('');
M := nvalues;
CRIT := RadioGroup1.ItemIndex; // 0 := Similarity, 1 := dissimilarity
CRIT := MatrixTypeGroup.ItemIndex; // 0 := Similarity, 1 := dissimilarity
// get matrix of data from OS3MainFrm
for i := 1 to NoVariables do
@ -255,17 +259,14 @@ label70: // end of ARRANGE procedure
// continuation of CLUSV1 procedure
// OUTPUT
lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d %s: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, SIM_DIS[CRIT], RX]);
{
if (CRIT = 0) then
begin
outline := format('Group %3d is joined by group %3d. N is %3d ITER := %3d SIM := %10.3f',
[NVAR[K], NVAR[L],NIN[K],ITR,RX]);
OutputFrm.RichEdit.Lines.Add(outline);
end else
begin
outline := format('Group %3d is joined by group %3d. N is %3d ITER := %3d DIS := %10.3f',
[NVAR[K], NVAR[L],NIN[K],ITR,RX]);
OutputFrm.RichEdit.Lines.Add(outline);
end;
lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d SIM: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, RX])
else
lReport.Add('Group %3d is joined by group %3d. N is %3d ITER: %3d DIS: %10.3f', [NVAR[K], NVAR[L], NIN[K], ITR, RX]);
}
KLUS[ITR,1] := NVAR[K]; // save in KLUS rather than write out to file as in
KLUS[ITR,2] := NVAR[L]; // original program
if not(L = M) then
@ -275,26 +276,32 @@ label70: // end of ARRANGE procedure
end;
M := M - 1;
if (ITR < LIMIT) then goto label300;
OutputFrm.RichEdit.Lines.Add('');
// OutputFrm.ShowModal;
lReport.Add('');
// End of CLUSV1 procedure
// do pre-tree processing
PreTree(nvalues, CRIT, LST, KLUS);
OutputFrm.ShowModal;
// do TREE procedure
TreePlot(KLUS,LST,nvalues);
OutputFrm.ShowModal;
PreTree(nvalues, CRIT, LST, KLUS, lReport);
lReport.Add('');
lReport.Add(DIVIDER);
lReport.Add('');
// cleanup
// do TREE procedure
TreePlot(KLUS, LST, nvalues, lReport);
DisplayReport(lReport);
finally
lReport.Free;
NVAR := nil;
NIN := nil;
LST := nil;
KLUS := nil;
X := nil;
end;
end;
procedure TAvgLinkFrm.TreePlot(Clusters : IntDyneMat; Lst : IntDyneVec; NoPoints : integer);
procedure TAvgLinkFrm.TreePlot(Clusters: IntDyneMat; Lst: IntDyneVec;
NoPoints: integer; AReport: TStrings);
VAR
outline : array[0..501] of char;
aline : array[0..82] of char;
@ -308,15 +315,14 @@ VAR
Results : StrDyneVec;
ColPos : IntDyneVec;
i, j, k, L, linecount, newcol, howlong, count: integer;
done : boolean;
begin
linecount := 1;
star := '*';
blank := ' ';
SetLength(ColPos,NoPoints+2);
SetLength(Results,NoPoints*2+3);
OutputFrm.RichEdit.Lines.Add('');
done := false;
//AReport.Add('');
// store initial column positions of vertical linkages
for i := 1 to NoPoints do ColPos[Lst[i]] := 4 + (i * 5);
@ -340,7 +346,7 @@ begin
for i := 1 to NoPoints - 1 do
begin
outline := '';
valstr := format('%5d',[i]); // put step no. first
valstr := Format('%5d',[i]); // put step no. first
outline := valstr;
// clear remainder of outline
for j := 5 to (5 + NoPoints * 5) do outline[j] := ' ';
@ -384,21 +390,17 @@ begin
if (noparts <= 0) then noparts := 1;
if (noparts = 1) then // simply print the list
begin
for i := 0 to linecount - 1 do
begin
OutputFrm.RichEdit.Lines.Add(Results[i]);
end;
end
AReport.Add(Results[i])
else // break lines into strings of 15 units
begin
startcol := 0;
endcol := 80;
for i := 1 to noparts do
begin
outline := format('PART %d OUTPUT',[i]);
OutputFrm.RichEdit.Lines.Add(outline);
for j := 0 to 80 do aline[j] := blank;
AReport.Add('PART %d OUTPUT', [i]);
for j := 0 to 80 do
aline[j] := blank;
for j := 0 to linecount - 1 do
begin
@ -410,9 +412,9 @@ begin
count := count + 1;
end;
aline[count+1] := #0;
OutputFrm.RichEdit.Lines.Add(aline);
AReport.Add(aline);
end;
OutputFrm.RichEdit.Lines.Add('');
AReport.Add('');
startcol := endcol + 1;
endcol := endcol + 80;
if (endcol > howlong) then endcol := howlong;
@ -422,25 +424,26 @@ begin
ColPos := nil;
end;
procedure TAvgLinkFrm.PreTree(NN, CRIT : integer; LST : IntDyneVec; KLUS : IntDyneMat);
procedure TAvgLinkFrm.PreTree(NN, CRIT: integer; LST: IntDyneVec;
KLUS: IntDyneMat; AReport: TStrings);
VAR
I, II, J, NI, NJ, L, M, N, Ina, INEND, NHOLD, NLINES, INDX, ICOL, JCOL : integer;
KSH, JEND, MSH : integer;
JHOLD, NIN1 : IntDyneVec;
outline, outvalue : string;
label label2015, label2020, label2030, label2040, label2055, label2060;
I, II, J, NI, NJ, L, M, N, Ina, INEND, NHOLD, NLINES, INDX, ICOL, JCOL: integer;
KSH, JEND, MSH: integer;
JHOLD, NIN1: IntDyneVec;
outline: string;
label
label2015, label2020, label2030, label2040, label2055, label2060;
begin
// PRETRE procedure
SetLength(JHOLD,NN+1);
SetLength(NIN1,NN+1);
// int NN := nvalues;
N := NN - 1;
outline := format('No. of objects := %3d',[NN]);
OutputFrm.RichEdit.Lines.Add(outline);
if (CRIT = 0) then outline := 'Matrix defined similarities among objects.'
else outline := 'Matrix defined dissimilarities among objects.';
OutputFrm.RichEdit.Lines.Add(outline);
AReport.Add('No. of objects: %3d', [NN]);
if (CRIT = 0) then
AReport.Add('Matrix defined similarities among objects.')
else
AReport.Add('Matrix defined dissimilarities among objects.');
for I := 1 to NN do
begin
@ -509,13 +512,12 @@ label2060:
for J := 1 to 20 do
begin
INDX := INDX + 1;
if (INDX <= NN) then
begin
outvalue := format(' %3d',[LST[INDX]]);
outline := outline + outvalue;
end;
if (INDX <= NN) then // wp: This outline is not printed anywhere !!!
outline := outline + Format(' %3d', [LST[INDX]]);
end;
end;
AReport.Add(outline); // wp: added, without it outline would not be used anywhere
NIN1 := nil;
JHOLD := nil;
// End of PRETRE procedure