LazStats: Refactor KWAnovaUnit.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7825 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-10-29 00:12:24 +00:00
parent 3f1dd4c7cb
commit 5475fff630

View File

@ -42,6 +42,9 @@ type
private
MWUReportFrame: TReportFrame;
function Process_KruskalWallis(const ColNoSelected: IntDyneVec;
ANumGroups, AMinGroup: Integer): Boolean;
procedure Process_MannWhitney(const ColNoSelected: IntDyneVec;
ANumGroups: Integer);
@ -107,21 +110,10 @@ end;
procedure TKWAnovaForm.Compute;
var
i, j, k, m, ind_var, dep_var, min_grp, max_grp, group, total_n : integer;
NoTies, NoTieGroups, nogroups: integer;
i, ind_var, dep_var, min_grp, max_grp, group, total_n : integer;
nogroups: integer;
ColNoSelected : IntdyneVec = nil;
group_count : IntDyneVec = nil;
Ranks: DblDyneMat = nil;
X : DblDyneMat = nil;
RankSums: DblDyneVec = nil;
score, t, SumT, Avg, Probchi, H, CorrectedH, value : double;
Correction, TieSum: double;
lReport: TStrings;
begin
// Allocate array memory
SetLength(Ranks, NoCases, 2);
SetLength(X, NoCases, 2);
// Get column numbers of the independent and dependent variables
ind_var := GetVariableIndex(OS3MainFrm.DataGrid, GrpEdit.Text);
dep_var := GetVariableIndex(OS3MainFrm.DataGrid, DepEdit.Text);
@ -131,8 +123,8 @@ begin
// Get minimum and maximum group codes
total_n := 0;
min_grp := 10000; //atoi(MainForm.Grid.Cells[ind_var,1].c_str);
max_grp := -10000;
min_grp := MaxInt;
max_grp := -MaxInt;
for i := 1 to NoCases do
begin
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue;
@ -143,143 +135,17 @@ begin
end;
nogroups := max_grp - min_grp + 1;
NoTieGroups := 0;
SumT := 0.0;
// Execute Kruskal-Wallis ANOVA
if not Process_KruskalWallis(ColNoSelected, noGroups, min_grp) then
exit;
// Initialize arrays
SetLength(RankSums,nogroups);
SetLength(group_count,nogroups);
for i := 0 to nogroups-1 do
// Excute Mann-Whitney U Tests
if MWUChk.Checked then
begin
group_count[i] := 0;
RankSums[i] := 0.0;
end;
// Setup for printer output
lReport := TStringList.Create;
try
lReport.Add('KRUSKAL-WALLIS ONE-WAY ANALYSIS OF VARIANCE');
lReport.Add('See pages 184-194 in S. Siegel: Nonparametric Statistics for the Behavioral Sciences');
lReport.Add('');
// Get data
for i := 1 to NoCases do
begin
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue;
score := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[dep_var, i]));
group := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ind_var, i])));
group := group - min_grp + 1;
if (group > nogroups) then
begin
ErrorMsg('Group codes must be sequential like 1 and 2!');
exit;
end;
group_count[group-1] := group_count[group-1] + 1;
X[i-1, 0] := score;
X[i-1, 1] := group;
end;
//Sort all scores in ascending order
for i := 1 to total_n - 1 do
begin
for j := i + 1 to total_n do
begin
if (X[i-1,0] > X[j-1,0]) then
begin
Exchange(X[i-1, 0], X[j-1, 0]);
Exchange(X[i-1, 1], X[j-1, 1]);
end;
end;
end;
// Store ranks
for i := 0 to total_n-1 do
begin
Ranks[i,0] := i+1;
Ranks[i,1] := X[i,1];
end;
//Check for ties in ranks - replace with average rank and calculate
//T for each tie and sum of the T's
i := 1;
while i < total_n do
begin
j := i + 1;
TieSum := 0;
NoTies := 0;
while (j < total_n) do
begin
if (X[j-1,0] > X[i-1,0]) then
break;
if (X[j-1,0] = X[i-1,0]) then // match
begin
TieSum := TieSum + round(Ranks[j-1,0]);
NoTies := NoTies + 1;
end;
j := j + 1;
end;
if (NoTies > 0) then //At least one tie found
begin
TieSum := TieSum + Ranks[i-1,0];
NoTies := NoTies + 1;
Avg := TieSum / NoTies;
for j := i to i + NoTies - 1 do Ranks[j-1,0] := Avg;
t := Power(NoTies,3) - NoTies;
SumT := SumT + t;
NoTieGroups := NoTieGroups + 1;
i := i + (NoTies - 1);
end;
i := i + 1;
end; // next i
// Calculate sum of ranks in each group
for i := 0 to total_n-1 do
begin
group := round(Ranks[i, 1]);
RankSums[group-1] := RankSums[group-1] + Ranks[i, 0];
end;
// Calculate statistics
H := 0.0;
for j := 0 to nogroups-1 do
H := H + (RankSums[j] * RankSums[j] / (group_count[j]));
H := H * (12.0 / ( total_n * (total_n + 1)) );
H := H - (3.0 * (total_n + 1));
Correction := 1.0 - ( SumT / (Power(total_n,3) - total_n) );
CorrectedH := H / Correction;
k := max_grp - min_grp;
probChi := 1.0 - ChiSquaredProb(H, k);
// Report results
lReport.Add(' Score Rank Group');
lReport.Add('');
for i := 0 to total_n-1 do
lReport.Add('%10.2f %10.2f %10.0f', [X[i,0], Ranks[i,0], Ranks[i,1]]);
lReport.Add('');
lReport.Add('Sum of Ranks in each Group');
lReport.Add('Group Sum No. in Group');
for i := 0 to noGroups-1 do
lReport.Add('%3d %10.2f %5d', [i+min_grp, RankSums[i], group_count[i]]);
lReport.Add('');
lReport.Add('No. of tied rank groups %8d', [NoTieGroups]);
lReport.Add('Statistic H uncorrected for ties: %8.4f', [H]);
lReport.Add('Correction for Ties: %8.4f', [Correction]);
lReport.Add('Statistic H corrected for ties: %8.4f', [CorrectedH]);
lReport.Add('Corrected H is approx. chi-square with %d degrees of freedom and probability %.4f', [k, ProbChi]);
FReportFrame.DisplayReport(lReport);
if MWUChk.Checked then
begin
MWUPage.TabVisible := true;
Process_MannWhitney(ColNoSelected, NoGroups);
end else
MWUPage.TabVisible := false;
finally
lReport.Free;
end;
MWUPage.TabVisible := true;
Process_MannWhitney(ColNoSelected, NoGroups);
end else
MWUPage.TabVisible := false;
end;
@ -333,6 +199,159 @@ begin
end;
// Do Kruskal-Wallis One-Way ANOVA
function TKWAnovaForm.Process_KruskalWallis(const ColNoSelected: IntDyneVec;
ANumGroups, AMinGroup: Integer): Boolean;
var
i, j: Integer;
group, totalN, numTieGroups, numTies, depVar, indVar: Integer;
score, T, sumT, tieSum, avg, H, correction, correctedH, probChi: Double;
rankSums: DblDyneVec = nil;
groupCount: IntDyneVec = nil;
X: DblDyneMat = nil;
Ranks: DblDyneMat = nil;
lReport: TStrings;
begin
Result := false;
// Initialize arrays
SetLength(Ranks, NoCases, 2);
SetLength(X, NoCases, 2);
SetLength(RankSums, ANumGroups);
SetLength(groupCount, ANumGroups);
for i := 0 to ANumGroups-1 do
begin
groupCount[i] := 0;
rankSums[i] := 0.0;
end;
depVar := ColNoSelected[1];
indVar := ColNoSelected[0];
// Get data
totalN := 0;
for i := 1 to NoCases do
begin
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue;
score := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[depVar, i]));
group := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[indVar, i])));
group := group - AMinGroup + 1;
if (group > ANumGroups) then
begin
ErrorMsg('Group codes must be sequential like 1 and 2!');
exit;
end;
inc(groupCount[group-1]);
X[i-1, 0] := score;
X[i-1, 1] := group;
inc(totalN);
end;
//Sort all scores in ascending order
for i := 1 to totalN - 1 do
begin
for j := i + 1 to totalN do
begin
if (X[i-1, 0] > X[j-1, 0]) then
begin
Exchange(X[i-1, 0], X[j-1, 0]);
Exchange(X[i-1, 1], X[j-1, 1]);
end;
end;
end;
// Store ranks
for i := 0 to totalN - 1 do
begin
Ranks[i,0] := i+1;
Ranks[i,1] := X[i,1];
end;
// Check for ties in ranks - replace with average rank and calculate
// T for each tie and sum of the T's
sumT := 0.0;
numTieGroups := 0;
i := 1;
while i < totalN do
begin
j := i + 1;
tieSum := 0;
numTies := 0;
while (j < totalN) do
begin
if (X[j-1, 0] > X[i-1, 0]) then
break;
if (X[j-1, 0] = X[i-1, 0]) then // match
begin
tieSum := tieSum + round(Ranks[j-1,0]);
inc(numTies);
end;
inc(j);
end;
if (numTies > 0) then // At least one tie found
begin
tieSum := tieSum + Ranks[i-1,0];
numTies := numTies + 1;
avg := tieSum / numTies;
for j := i to i + numTies - 1 do
Ranks[j-1,0] := avg;
t := numTies * numTies * numTies - numTies;
sumT := sumT + t;
inc(numTieGroups);
i := i + (numTies - 1);
end;
inc(i);
end; // next i
// Calculate sum of ranks in each group
for i := 0 to totalN - 1 do
begin
group := round(Ranks[i, 1]);
rankSums[group-1] := rankSums[group-1] + Ranks[i, 0];
end;
// Calculate statistics
H := 0.0;
for j := 0 to ANumGroups-1 do
H := H + (rankSums[j] * RankSums[j] / (groupCount[j]));
H := H * (12.0 / ( totalN * (totalN + 1)) );
H := H - (3.0 * (totalN + 1));
correction := 1.0 - ( sumT / (totalN * totalN * totalN - totalN) );
correctedH := H / correction;
probChi := 1.0 - ChiSquaredProb(H, ANumGroups-1);
// Report results
lReport := TStringList.Create;
try
lReport.Add('KRUSKAL-WALLIS ONE-WAY ANALYSIS OF VARIANCE');
lReport.Add('See pages 184-194 in S. Siegel: Nonparametric Statistics for the Behavioral Sciences');
lReport.Add('');
lReport.Add(' Score Rank Group');
lReport.Add('');
for i := 0 to totalN-1 do
lReport.Add('%10.2f %10.2f %10.0f', [X[i,0], Ranks[i,0], Ranks[i,1]]);
lReport.Add('');
lReport.Add('Sum of Ranks in each Group');
lReport.Add('Group Sum No. in Group');
for i := 0 to ANumGroups-1 do
lReport.Add('%3d %10.2f %5d', [i+AMinGroup, RankSums[i], groupCount[i]]);
lReport.Add('');
lReport.Add('No. of tied rank groups %8d', [numTieGroups]);
lReport.Add('Statistic H uncorrected for ties: %8.4f', [H]);
lReport.Add('Correction for Ties: %8.4f', [Correction]);
lReport.Add('Statistic H corrected for ties: %8.4f', [CorrectedH]);
lReport.Add('Corrected H is approx. chi-square with %d degrees of freedom and probability %.4f', [ANumGroups-1, ProbChi]);
FReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
Result := true;
end;
// Do Mann-Whitney U tests on group pairs
procedure TKWAnovaForm.Process_MannWhitney(const ColNoSelected: IntDyneVec;
ANumGroups: Integer);