From 5475fff630ec2d7e57aada5afc4012e133df8ec5 Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Thu, 29 Oct 2020 00:12:24 +0000 Subject: [PATCH] LazStats: Refactor KWAnovaUnit. git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7825 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../analysis/nonparametric/kwanovaunit.pas | 319 ++++++++++-------- 1 file changed, 169 insertions(+), 150 deletions(-) diff --git a/applications/lazstats/source/forms/analysis/nonparametric/kwanovaunit.pas b/applications/lazstats/source/forms/analysis/nonparametric/kwanovaunit.pas index 7caca24c7..a24768718 100644 --- a/applications/lazstats/source/forms/analysis/nonparametric/kwanovaunit.pas +++ b/applications/lazstats/source/forms/analysis/nonparametric/kwanovaunit.pas @@ -42,6 +42,9 @@ type private MWUReportFrame: TReportFrame; + function Process_KruskalWallis(const ColNoSelected: IntDyneVec; + ANumGroups, AMinGroup: Integer): Boolean; + procedure Process_MannWhitney(const ColNoSelected: IntDyneVec; ANumGroups: Integer); @@ -107,21 +110,10 @@ end; procedure TKWAnovaForm.Compute; var - i, j, k, m, ind_var, dep_var, min_grp, max_grp, group, total_n : integer; - NoTies, NoTieGroups, nogroups: integer; + i, ind_var, dep_var, min_grp, max_grp, group, total_n : integer; + nogroups: integer; ColNoSelected : IntdyneVec = nil; - group_count : IntDyneVec = nil; - Ranks: DblDyneMat = nil; - X : DblDyneMat = nil; - RankSums: DblDyneVec = nil; - score, t, SumT, Avg, Probchi, H, CorrectedH, value : double; - Correction, TieSum: double; - lReport: TStrings; begin - // Allocate array memory - SetLength(Ranks, NoCases, 2); - SetLength(X, NoCases, 2); - // Get column numbers of the independent and dependent variables ind_var := GetVariableIndex(OS3MainFrm.DataGrid, GrpEdit.Text); dep_var := GetVariableIndex(OS3MainFrm.DataGrid, DepEdit.Text); @@ -131,8 +123,8 @@ begin // Get minimum and maximum group codes total_n := 0; - min_grp := 10000; //atoi(MainForm.Grid.Cells[ind_var,1].c_str); - max_grp := -10000; + min_grp := MaxInt; + max_grp := -MaxInt; for i := 1 to NoCases do begin if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue; @@ -143,143 +135,17 @@ begin end; nogroups := max_grp - min_grp + 1; - NoTieGroups := 0; - SumT := 0.0; + // Execute Kruskal-Wallis ANOVA + if not Process_KruskalWallis(ColNoSelected, noGroups, min_grp) then + exit; - // Initialize arrays - SetLength(RankSums,nogroups); - SetLength(group_count,nogroups); - for i := 0 to nogroups-1 do + // Excute Mann-Whitney U Tests + if MWUChk.Checked then begin - group_count[i] := 0; - RankSums[i] := 0.0; - end; - - // Setup for printer output - lReport := TStringList.Create; - try - lReport.Add('KRUSKAL-WALLIS ONE-WAY ANALYSIS OF VARIANCE'); - lReport.Add('See pages 184-194 in S. Siegel: Nonparametric Statistics for the Behavioral Sciences'); - lReport.Add(''); - - // Get data - for i := 1 to NoCases do - begin - if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue; - score := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[dep_var, i])); - group := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ind_var, i]))); - group := group - min_grp + 1; - if (group > nogroups) then - begin - ErrorMsg('Group codes must be sequential like 1 and 2!'); - exit; - end; - group_count[group-1] := group_count[group-1] + 1; - X[i-1, 0] := score; - X[i-1, 1] := group; - end; - - //Sort all scores in ascending order - for i := 1 to total_n - 1 do - begin - for j := i + 1 to total_n do - begin - if (X[i-1,0] > X[j-1,0]) then - begin - Exchange(X[i-1, 0], X[j-1, 0]); - Exchange(X[i-1, 1], X[j-1, 1]); - end; - end; - end; - - // Store ranks - for i := 0 to total_n-1 do - begin - Ranks[i,0] := i+1; - Ranks[i,1] := X[i,1]; - end; - - //Check for ties in ranks - replace with average rank and calculate - //T for each tie and sum of the T's - i := 1; - while i < total_n do - begin - j := i + 1; - TieSum := 0; - NoTies := 0; - while (j < total_n) do - begin - if (X[j-1,0] > X[i-1,0]) then - break; - if (X[j-1,0] = X[i-1,0]) then // match - begin - TieSum := TieSum + round(Ranks[j-1,0]); - NoTies := NoTies + 1; - end; - j := j + 1; - end; - - if (NoTies > 0) then //At least one tie found - begin - TieSum := TieSum + Ranks[i-1,0]; - NoTies := NoTies + 1; - Avg := TieSum / NoTies; - for j := i to i + NoTies - 1 do Ranks[j-1,0] := Avg; - t := Power(NoTies,3) - NoTies; - SumT := SumT + t; - NoTieGroups := NoTieGroups + 1; - i := i + (NoTies - 1); - end; - i := i + 1; - end; // next i - - // Calculate sum of ranks in each group - for i := 0 to total_n-1 do - begin - group := round(Ranks[i, 1]); - RankSums[group-1] := RankSums[group-1] + Ranks[i, 0]; - end; - - // Calculate statistics - H := 0.0; - for j := 0 to nogroups-1 do - H := H + (RankSums[j] * RankSums[j] / (group_count[j])); - H := H * (12.0 / ( total_n * (total_n + 1)) ); - H := H - (3.0 * (total_n + 1)); - Correction := 1.0 - ( SumT / (Power(total_n,3) - total_n) ); - CorrectedH := H / Correction; - k := max_grp - min_grp; - probChi := 1.0 - ChiSquaredProb(H, k); - - // Report results - lReport.Add(' Score Rank Group'); - lReport.Add(''); - for i := 0 to total_n-1 do - lReport.Add('%10.2f %10.2f %10.0f', [X[i,0], Ranks[i,0], Ranks[i,1]]); - lReport.Add(''); - lReport.Add('Sum of Ranks in each Group'); - lReport.Add('Group Sum No. in Group'); - for i := 0 to noGroups-1 do - lReport.Add('%3d %10.2f %5d', [i+min_grp, RankSums[i], group_count[i]]); - lReport.Add(''); - lReport.Add('No. of tied rank groups %8d', [NoTieGroups]); - lReport.Add('Statistic H uncorrected for ties: %8.4f', [H]); - lReport.Add('Correction for Ties: %8.4f', [Correction]); - lReport.Add('Statistic H corrected for ties: %8.4f', [CorrectedH]); - lReport.Add('Corrected H is approx. chi-square with %d degrees of freedom and probability %.4f', [k, ProbChi]); - - FReportFrame.DisplayReport(lReport); - - if MWUChk.Checked then - begin - MWUPage.TabVisible := true; - Process_MannWhitney(ColNoSelected, NoGroups); - end else - MWUPage.TabVisible := false; - - finally - lReport.Free; - end; + MWUPage.TabVisible := true; + Process_MannWhitney(ColNoSelected, NoGroups); + end else + MWUPage.TabVisible := false; end; @@ -333,6 +199,159 @@ begin end; +// Do Kruskal-Wallis One-Way ANOVA +function TKWAnovaForm.Process_KruskalWallis(const ColNoSelected: IntDyneVec; + ANumGroups, AMinGroup: Integer): Boolean; +var + i, j: Integer; + group, totalN, numTieGroups, numTies, depVar, indVar: Integer; + score, T, sumT, tieSum, avg, H, correction, correctedH, probChi: Double; + rankSums: DblDyneVec = nil; + groupCount: IntDyneVec = nil; + X: DblDyneMat = nil; + Ranks: DblDyneMat = nil; + lReport: TStrings; +begin + Result := false; + + // Initialize arrays + SetLength(Ranks, NoCases, 2); + SetLength(X, NoCases, 2); + SetLength(RankSums, ANumGroups); + SetLength(groupCount, ANumGroups); + for i := 0 to ANumGroups-1 do + begin + groupCount[i] := 0; + rankSums[i] := 0.0; + end; + + depVar := ColNoSelected[1]; + indVar := ColNoSelected[0]; + + // Get data + totalN := 0; + for i := 1 to NoCases do + begin + if (not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected)) then continue; + score := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[depVar, i])); + group := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[indVar, i]))); + group := group - AMinGroup + 1; + if (group > ANumGroups) then + begin + ErrorMsg('Group codes must be sequential like 1 and 2!'); + exit; + end; + inc(groupCount[group-1]); + X[i-1, 0] := score; + X[i-1, 1] := group; + inc(totalN); + end; + + //Sort all scores in ascending order + for i := 1 to totalN - 1 do + begin + for j := i + 1 to totalN do + begin + if (X[i-1, 0] > X[j-1, 0]) then + begin + Exchange(X[i-1, 0], X[j-1, 0]); + Exchange(X[i-1, 1], X[j-1, 1]); + end; + end; + end; + + // Store ranks + for i := 0 to totalN - 1 do + begin + Ranks[i,0] := i+1; + Ranks[i,1] := X[i,1]; + end; + + // Check for ties in ranks - replace with average rank and calculate + // T for each tie and sum of the T's + sumT := 0.0; + numTieGroups := 0; + i := 1; + while i < totalN do + begin + j := i + 1; + tieSum := 0; + numTies := 0; + while (j < totalN) do + begin + if (X[j-1, 0] > X[i-1, 0]) then + break; + if (X[j-1, 0] = X[i-1, 0]) then // match + begin + tieSum := tieSum + round(Ranks[j-1,0]); + inc(numTies); + end; + inc(j); + end; + + if (numTies > 0) then // At least one tie found + begin + tieSum := tieSum + Ranks[i-1,0]; + numTies := numTies + 1; + avg := tieSum / numTies; + for j := i to i + numTies - 1 do + Ranks[j-1,0] := avg; + t := numTies * numTies * numTies - numTies; + sumT := sumT + t; + inc(numTieGroups); + i := i + (numTies - 1); + end; + inc(i); + end; // next i + + // Calculate sum of ranks in each group + for i := 0 to totalN - 1 do + begin + group := round(Ranks[i, 1]); + rankSums[group-1] := rankSums[group-1] + Ranks[i, 0]; + end; + + // Calculate statistics + H := 0.0; + for j := 0 to ANumGroups-1 do + H := H + (rankSums[j] * RankSums[j] / (groupCount[j])); + H := H * (12.0 / ( totalN * (totalN + 1)) ); + H := H - (3.0 * (totalN + 1)); + correction := 1.0 - ( sumT / (totalN * totalN * totalN - totalN) ); + correctedH := H / correction; + probChi := 1.0 - ChiSquaredProb(H, ANumGroups-1); + + // Report results + lReport := TStringList.Create; + try + lReport.Add('KRUSKAL-WALLIS ONE-WAY ANALYSIS OF VARIANCE'); + lReport.Add('See pages 184-194 in S. Siegel: Nonparametric Statistics for the Behavioral Sciences'); + lReport.Add(''); + + lReport.Add(' Score Rank Group'); + lReport.Add(''); + for i := 0 to totalN-1 do + lReport.Add('%10.2f %10.2f %10.0f', [X[i,0], Ranks[i,0], Ranks[i,1]]); + lReport.Add(''); + lReport.Add('Sum of Ranks in each Group'); + lReport.Add('Group Sum No. in Group'); + for i := 0 to ANumGroups-1 do + lReport.Add('%3d %10.2f %5d', [i+AMinGroup, RankSums[i], groupCount[i]]); + lReport.Add(''); + lReport.Add('No. of tied rank groups %8d', [numTieGroups]); + lReport.Add('Statistic H uncorrected for ties: %8.4f', [H]); + lReport.Add('Correction for Ties: %8.4f', [Correction]); + lReport.Add('Statistic H corrected for ties: %8.4f', [CorrectedH]); + lReport.Add('Corrected H is approx. chi-square with %d degrees of freedom and probability %.4f', [ANumGroups-1, ProbChi]); + + FReportFrame.DisplayReport(lReport); + finally + lReport.Free; + end; + + Result := true; +end; + // Do Mann-Whitney U tests on group pairs procedure TKWAnovaForm.Process_MannWhitney(const ColNoSelected: IntDyneVec; ANumGroups: Integer);