You've already forked lazarus-ccr
LazStats: Refactor KWAnovaUnit.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7825 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -42,6 +42,9 @@ type
|
|||||||
private
|
private
|
||||||
MWUReportFrame: TReportFrame;
|
MWUReportFrame: TReportFrame;
|
||||||
|
|
||||||
|
function Process_KruskalWallis(const ColNoSelected: IntDyneVec;
|
||||||
|
ANumGroups, AMinGroup: Integer): Boolean;
|
||||||
|
|
||||||
procedure Process_MannWhitney(const ColNoSelected: IntDyneVec;
|
procedure Process_MannWhitney(const ColNoSelected: IntDyneVec;
|
||||||
ANumGroups: Integer);
|
ANumGroups: Integer);
|
||||||
|
|
||||||
@ -107,21 +110,10 @@ end;
|
|||||||
|
|
||||||
procedure TKWAnovaForm.Compute;
|
procedure TKWAnovaForm.Compute;
|
||||||
var
|
var
|
||||||
i, j, k, m, ind_var, dep_var, min_grp, max_grp, group, total_n : integer;
|
i, ind_var, dep_var, min_grp, max_grp, group, total_n : integer;
|
||||||
NoTies, NoTieGroups, nogroups: integer;
|
nogroups: integer;
|
||||||
ColNoSelected : IntdyneVec = nil;
|
ColNoSelected : IntdyneVec = nil;
|
||||||
group_count : IntDyneVec = nil;
|
|
||||||
Ranks: DblDyneMat = nil;
|
|
||||||
X : DblDyneMat = nil;
|
|
||||||
RankSums: DblDyneVec = nil;
|
|
||||||
score, t, SumT, Avg, Probchi, H, CorrectedH, value : double;
|
|
||||||
Correction, TieSum: double;
|
|
||||||
lReport: TStrings;
|
|
||||||
begin
|
begin
|
||||||
// Allocate array memory
|
|
||||||
SetLength(Ranks, NoCases, 2);
|
|
||||||
SetLength(X, NoCases, 2);
|
|
||||||
|
|
||||||
// Get column numbers of the independent and dependent variables
|
// Get column numbers of the independent and dependent variables
|
||||||
ind_var := GetVariableIndex(OS3MainFrm.DataGrid, GrpEdit.Text);
|
ind_var := GetVariableIndex(OS3MainFrm.DataGrid, GrpEdit.Text);
|
||||||
dep_var := GetVariableIndex(OS3MainFrm.DataGrid, DepEdit.Text);
|
dep_var := GetVariableIndex(OS3MainFrm.DataGrid, DepEdit.Text);
|
||||||
@ -131,8 +123,8 @@ begin
|
|||||||
|
|
||||||
// Get minimum and maximum group codes
|
// Get minimum and maximum group codes
|
||||||
total_n := 0;
|
total_n := 0;
|
||||||
min_grp := 10000; //atoi(MainForm.Grid.Cells[ind_var,1].c_str);
|
min_grp := MaxInt;
|
||||||
max_grp := -10000;
|
max_grp := -MaxInt;
|
||||||
for i := 1 to NoCases do
|
for i := 1 to NoCases do
|
||||||
begin
|
begin
|
||||||
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue;
|
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue;
|
||||||
@ -143,143 +135,17 @@ begin
|
|||||||
end;
|
end;
|
||||||
nogroups := max_grp - min_grp + 1;
|
nogroups := max_grp - min_grp + 1;
|
||||||
|
|
||||||
NoTieGroups := 0;
|
// Execute Kruskal-Wallis ANOVA
|
||||||
SumT := 0.0;
|
if not Process_KruskalWallis(ColNoSelected, noGroups, min_grp) then
|
||||||
|
|
||||||
// Initialize arrays
|
|
||||||
SetLength(RankSums,nogroups);
|
|
||||||
SetLength(group_count,nogroups);
|
|
||||||
for i := 0 to nogroups-1 do
|
|
||||||
begin
|
|
||||||
group_count[i] := 0;
|
|
||||||
RankSums[i] := 0.0;
|
|
||||||
end;
|
|
||||||
|
|
||||||
// Setup for printer output
|
|
||||||
lReport := TStringList.Create;
|
|
||||||
try
|
|
||||||
lReport.Add('KRUSKAL-WALLIS ONE-WAY ANALYSIS OF VARIANCE');
|
|
||||||
lReport.Add('See pages 184-194 in S. Siegel: Nonparametric Statistics for the Behavioral Sciences');
|
|
||||||
lReport.Add('');
|
|
||||||
|
|
||||||
// Get data
|
|
||||||
for i := 1 to NoCases do
|
|
||||||
begin
|
|
||||||
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue;
|
|
||||||
score := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[dep_var, i]));
|
|
||||||
group := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ind_var, i])));
|
|
||||||
group := group - min_grp + 1;
|
|
||||||
if (group > nogroups) then
|
|
||||||
begin
|
|
||||||
ErrorMsg('Group codes must be sequential like 1 and 2!');
|
|
||||||
exit;
|
exit;
|
||||||
end;
|
|
||||||
group_count[group-1] := group_count[group-1] + 1;
|
|
||||||
X[i-1, 0] := score;
|
|
||||||
X[i-1, 1] := group;
|
|
||||||
end;
|
|
||||||
|
|
||||||
//Sort all scores in ascending order
|
|
||||||
for i := 1 to total_n - 1 do
|
|
||||||
begin
|
|
||||||
for j := i + 1 to total_n do
|
|
||||||
begin
|
|
||||||
if (X[i-1,0] > X[j-1,0]) then
|
|
||||||
begin
|
|
||||||
Exchange(X[i-1, 0], X[j-1, 0]);
|
|
||||||
Exchange(X[i-1, 1], X[j-1, 1]);
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
|
|
||||||
// Store ranks
|
|
||||||
for i := 0 to total_n-1 do
|
|
||||||
begin
|
|
||||||
Ranks[i,0] := i+1;
|
|
||||||
Ranks[i,1] := X[i,1];
|
|
||||||
end;
|
|
||||||
|
|
||||||
//Check for ties in ranks - replace with average rank and calculate
|
|
||||||
//T for each tie and sum of the T's
|
|
||||||
i := 1;
|
|
||||||
while i < total_n do
|
|
||||||
begin
|
|
||||||
j := i + 1;
|
|
||||||
TieSum := 0;
|
|
||||||
NoTies := 0;
|
|
||||||
while (j < total_n) do
|
|
||||||
begin
|
|
||||||
if (X[j-1,0] > X[i-1,0]) then
|
|
||||||
break;
|
|
||||||
if (X[j-1,0] = X[i-1,0]) then // match
|
|
||||||
begin
|
|
||||||
TieSum := TieSum + round(Ranks[j-1,0]);
|
|
||||||
NoTies := NoTies + 1;
|
|
||||||
end;
|
|
||||||
j := j + 1;
|
|
||||||
end;
|
|
||||||
|
|
||||||
if (NoTies > 0) then //At least one tie found
|
|
||||||
begin
|
|
||||||
TieSum := TieSum + Ranks[i-1,0];
|
|
||||||
NoTies := NoTies + 1;
|
|
||||||
Avg := TieSum / NoTies;
|
|
||||||
for j := i to i + NoTies - 1 do Ranks[j-1,0] := Avg;
|
|
||||||
t := Power(NoTies,3) - NoTies;
|
|
||||||
SumT := SumT + t;
|
|
||||||
NoTieGroups := NoTieGroups + 1;
|
|
||||||
i := i + (NoTies - 1);
|
|
||||||
end;
|
|
||||||
i := i + 1;
|
|
||||||
end; // next i
|
|
||||||
|
|
||||||
// Calculate sum of ranks in each group
|
|
||||||
for i := 0 to total_n-1 do
|
|
||||||
begin
|
|
||||||
group := round(Ranks[i, 1]);
|
|
||||||
RankSums[group-1] := RankSums[group-1] + Ranks[i, 0];
|
|
||||||
end;
|
|
||||||
|
|
||||||
// Calculate statistics
|
|
||||||
H := 0.0;
|
|
||||||
for j := 0 to nogroups-1 do
|
|
||||||
H := H + (RankSums[j] * RankSums[j] / (group_count[j]));
|
|
||||||
H := H * (12.0 / ( total_n * (total_n + 1)) );
|
|
||||||
H := H - (3.0 * (total_n + 1));
|
|
||||||
Correction := 1.0 - ( SumT / (Power(total_n,3) - total_n) );
|
|
||||||
CorrectedH := H / Correction;
|
|
||||||
k := max_grp - min_grp;
|
|
||||||
probChi := 1.0 - ChiSquaredProb(H, k);
|
|
||||||
|
|
||||||
// Report results
|
|
||||||
lReport.Add(' Score Rank Group');
|
|
||||||
lReport.Add('');
|
|
||||||
for i := 0 to total_n-1 do
|
|
||||||
lReport.Add('%10.2f %10.2f %10.0f', [X[i,0], Ranks[i,0], Ranks[i,1]]);
|
|
||||||
lReport.Add('');
|
|
||||||
lReport.Add('Sum of Ranks in each Group');
|
|
||||||
lReport.Add('Group Sum No. in Group');
|
|
||||||
for i := 0 to noGroups-1 do
|
|
||||||
lReport.Add('%3d %10.2f %5d', [i+min_grp, RankSums[i], group_count[i]]);
|
|
||||||
lReport.Add('');
|
|
||||||
lReport.Add('No. of tied rank groups %8d', [NoTieGroups]);
|
|
||||||
lReport.Add('Statistic H uncorrected for ties: %8.4f', [H]);
|
|
||||||
lReport.Add('Correction for Ties: %8.4f', [Correction]);
|
|
||||||
lReport.Add('Statistic H corrected for ties: %8.4f', [CorrectedH]);
|
|
||||||
lReport.Add('Corrected H is approx. chi-square with %d degrees of freedom and probability %.4f', [k, ProbChi]);
|
|
||||||
|
|
||||||
FReportFrame.DisplayReport(lReport);
|
|
||||||
|
|
||||||
|
// Excute Mann-Whitney U Tests
|
||||||
if MWUChk.Checked then
|
if MWUChk.Checked then
|
||||||
begin
|
begin
|
||||||
MWUPage.TabVisible := true;
|
MWUPage.TabVisible := true;
|
||||||
Process_MannWhitney(ColNoSelected, NoGroups);
|
Process_MannWhitney(ColNoSelected, NoGroups);
|
||||||
end else
|
end else
|
||||||
MWUPage.TabVisible := false;
|
MWUPage.TabVisible := false;
|
||||||
|
|
||||||
finally
|
|
||||||
lReport.Free;
|
|
||||||
end;
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
@ -333,6 +199,159 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
// Do Kruskal-Wallis One-Way ANOVA
|
||||||
|
function TKWAnovaForm.Process_KruskalWallis(const ColNoSelected: IntDyneVec;
|
||||||
|
ANumGroups, AMinGroup: Integer): Boolean;
|
||||||
|
var
|
||||||
|
i, j: Integer;
|
||||||
|
group, totalN, numTieGroups, numTies, depVar, indVar: Integer;
|
||||||
|
score, T, sumT, tieSum, avg, H, correction, correctedH, probChi: Double;
|
||||||
|
rankSums: DblDyneVec = nil;
|
||||||
|
groupCount: IntDyneVec = nil;
|
||||||
|
X: DblDyneMat = nil;
|
||||||
|
Ranks: DblDyneMat = nil;
|
||||||
|
lReport: TStrings;
|
||||||
|
begin
|
||||||
|
Result := false;
|
||||||
|
|
||||||
|
// Initialize arrays
|
||||||
|
SetLength(Ranks, NoCases, 2);
|
||||||
|
SetLength(X, NoCases, 2);
|
||||||
|
SetLength(RankSums, ANumGroups);
|
||||||
|
SetLength(groupCount, ANumGroups);
|
||||||
|
for i := 0 to ANumGroups-1 do
|
||||||
|
begin
|
||||||
|
groupCount[i] := 0;
|
||||||
|
rankSums[i] := 0.0;
|
||||||
|
end;
|
||||||
|
|
||||||
|
depVar := ColNoSelected[1];
|
||||||
|
indVar := ColNoSelected[0];
|
||||||
|
|
||||||
|
// Get data
|
||||||
|
totalN := 0;
|
||||||
|
for i := 1 to NoCases do
|
||||||
|
begin
|
||||||
|
if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue;
|
||||||
|
score := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[depVar, i]));
|
||||||
|
group := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[indVar, i])));
|
||||||
|
group := group - AMinGroup + 1;
|
||||||
|
if (group > ANumGroups) then
|
||||||
|
begin
|
||||||
|
ErrorMsg('Group codes must be sequential like 1 and 2!');
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
inc(groupCount[group-1]);
|
||||||
|
X[i-1, 0] := score;
|
||||||
|
X[i-1, 1] := group;
|
||||||
|
inc(totalN);
|
||||||
|
end;
|
||||||
|
|
||||||
|
//Sort all scores in ascending order
|
||||||
|
for i := 1 to totalN - 1 do
|
||||||
|
begin
|
||||||
|
for j := i + 1 to totalN do
|
||||||
|
begin
|
||||||
|
if (X[i-1, 0] > X[j-1, 0]) then
|
||||||
|
begin
|
||||||
|
Exchange(X[i-1, 0], X[j-1, 0]);
|
||||||
|
Exchange(X[i-1, 1], X[j-1, 1]);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
// Store ranks
|
||||||
|
for i := 0 to totalN - 1 do
|
||||||
|
begin
|
||||||
|
Ranks[i,0] := i+1;
|
||||||
|
Ranks[i,1] := X[i,1];
|
||||||
|
end;
|
||||||
|
|
||||||
|
// Check for ties in ranks - replace with average rank and calculate
|
||||||
|
// T for each tie and sum of the T's
|
||||||
|
sumT := 0.0;
|
||||||
|
numTieGroups := 0;
|
||||||
|
i := 1;
|
||||||
|
while i < totalN do
|
||||||
|
begin
|
||||||
|
j := i + 1;
|
||||||
|
tieSum := 0;
|
||||||
|
numTies := 0;
|
||||||
|
while (j < totalN) do
|
||||||
|
begin
|
||||||
|
if (X[j-1, 0] > X[i-1, 0]) then
|
||||||
|
break;
|
||||||
|
if (X[j-1, 0] = X[i-1, 0]) then // match
|
||||||
|
begin
|
||||||
|
tieSum := tieSum + round(Ranks[j-1,0]);
|
||||||
|
inc(numTies);
|
||||||
|
end;
|
||||||
|
inc(j);
|
||||||
|
end;
|
||||||
|
|
||||||
|
if (numTies > 0) then // At least one tie found
|
||||||
|
begin
|
||||||
|
tieSum := tieSum + Ranks[i-1,0];
|
||||||
|
numTies := numTies + 1;
|
||||||
|
avg := tieSum / numTies;
|
||||||
|
for j := i to i + numTies - 1 do
|
||||||
|
Ranks[j-1,0] := avg;
|
||||||
|
t := numTies * numTies * numTies - numTies;
|
||||||
|
sumT := sumT + t;
|
||||||
|
inc(numTieGroups);
|
||||||
|
i := i + (numTies - 1);
|
||||||
|
end;
|
||||||
|
inc(i);
|
||||||
|
end; // next i
|
||||||
|
|
||||||
|
// Calculate sum of ranks in each group
|
||||||
|
for i := 0 to totalN - 1 do
|
||||||
|
begin
|
||||||
|
group := round(Ranks[i, 1]);
|
||||||
|
rankSums[group-1] := rankSums[group-1] + Ranks[i, 0];
|
||||||
|
end;
|
||||||
|
|
||||||
|
// Calculate statistics
|
||||||
|
H := 0.0;
|
||||||
|
for j := 0 to ANumGroups-1 do
|
||||||
|
H := H + (rankSums[j] * RankSums[j] / (groupCount[j]));
|
||||||
|
H := H * (12.0 / ( totalN * (totalN + 1)) );
|
||||||
|
H := H - (3.0 * (totalN + 1));
|
||||||
|
correction := 1.0 - ( sumT / (totalN * totalN * totalN - totalN) );
|
||||||
|
correctedH := H / correction;
|
||||||
|
probChi := 1.0 - ChiSquaredProb(H, ANumGroups-1);
|
||||||
|
|
||||||
|
// Report results
|
||||||
|
lReport := TStringList.Create;
|
||||||
|
try
|
||||||
|
lReport.Add('KRUSKAL-WALLIS ONE-WAY ANALYSIS OF VARIANCE');
|
||||||
|
lReport.Add('See pages 184-194 in S. Siegel: Nonparametric Statistics for the Behavioral Sciences');
|
||||||
|
lReport.Add('');
|
||||||
|
|
||||||
|
lReport.Add(' Score Rank Group');
|
||||||
|
lReport.Add('');
|
||||||
|
for i := 0 to totalN-1 do
|
||||||
|
lReport.Add('%10.2f %10.2f %10.0f', [X[i,0], Ranks[i,0], Ranks[i,1]]);
|
||||||
|
lReport.Add('');
|
||||||
|
lReport.Add('Sum of Ranks in each Group');
|
||||||
|
lReport.Add('Group Sum No. in Group');
|
||||||
|
for i := 0 to ANumGroups-1 do
|
||||||
|
lReport.Add('%3d %10.2f %5d', [i+AMinGroup, RankSums[i], groupCount[i]]);
|
||||||
|
lReport.Add('');
|
||||||
|
lReport.Add('No. of tied rank groups %8d', [numTieGroups]);
|
||||||
|
lReport.Add('Statistic H uncorrected for ties: %8.4f', [H]);
|
||||||
|
lReport.Add('Correction for Ties: %8.4f', [Correction]);
|
||||||
|
lReport.Add('Statistic H corrected for ties: %8.4f', [CorrectedH]);
|
||||||
|
lReport.Add('Corrected H is approx. chi-square with %d degrees of freedom and probability %.4f', [ANumGroups-1, ProbChi]);
|
||||||
|
|
||||||
|
FReportFrame.DisplayReport(lReport);
|
||||||
|
finally
|
||||||
|
lReport.Free;
|
||||||
|
end;
|
||||||
|
|
||||||
|
Result := true;
|
||||||
|
end;
|
||||||
|
|
||||||
// Do Mann-Whitney U tests on group pairs
|
// Do Mann-Whitney U tests on group pairs
|
||||||
procedure TKWAnovaForm.Process_MannWhitney(const ColNoSelected: IntDyneVec;
|
procedure TKWAnovaForm.Process_MannWhitney(const ColNoSelected: IntDyneVec;
|
||||||
ANumGroups: Integer);
|
ANumGroups: Integer);
|
||||||
|
Reference in New Issue
Block a user