LazStats: Split ANOVA calculation off of main calculation routine in BreakDownUnit.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7706 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-09-28 10:59:21 +00:00
parent f25cde2b88
commit 4a492958a3

View File

@ -56,17 +56,11 @@ type
FBreakdownReportFrame: TReportFrame;
FANOVAReportFrame: TReportFrame;
FAutoSized: Boolean;
Minimum, Maximum, levels, displace, subscript : IntDyneVec;
Freq : IntDyneVec;
Selected : IntDyneVec;
index, ListSize, length_array : integer;
ptr1, ptr2, sum, grandsum : integer;
xsumtotal, xsqrtotal, grandsumx, grandsumx2, value, SD : double;
SST, SSW, SSB, MSW, MSB, F, FProb, DF1, DF2 : double;
cellstring : string;
outline : string;
valstr : string;
dataread : boolean;
procedure ANOVA(ListSize: Integer; Freq, Selected, Minimum, Subscript,
Levels, Displace: IntDyneVec; Mean, SS: DblDyneVec; AReport: TStrings);
procedure GetMinMax(AListSize: Integer;
const ASelected: IntDyneVec; var AMinimum, AMaximum: IntDyneVec);
function Index_Pos(const X, ADisplace: IntDyneVec; AListSize: integer): Integer;
procedure UpdateBtnStates;
@ -90,6 +84,171 @@ uses
{ TBreakDownFrm }
procedure TBreakDownFrm.ANOVA(ListSize: Integer; Freq, Selected, Minimum,
Subscript, Levels, Displace: IntDyneVec; Mean, SS: DblDyneVec; AReport: TStrings);
var
i, j: Integer;
ptr1, ptr2: Integer;
MSB, MSW, SSB, SST, SSW: Double;
grandSum: Integer;
grandSumX, grandSumX2: Double;
DF1, DF2: Double;
F, Fprob: Double;
index: Integer;
length_array: Integer;
begin
AnovaPage.Caption := 'Analysis of Variance';
AnovaPage.TabVisible := true;
AReport.Add('ANALYSES OF VARIANCE SUMMARY TABLES');
AReport.Add('');
length_array := Length(Freq) - 1; // freq is set by Setlength(freq, length_array+1);
ptr1 := ListSize - 1;
ptr2 := ListSize;
for i := 1 to ListSize do subscript[i-1] := 1;
SSB := 0.0;
SSW := 0.0;
MSB := 0.0;
MSW := 0.0;
grandsum := 0;
grandsumx := 0.0;
grandsumx2 := 0.0;
DF1 := 0.0;
DF2 := 0.0;
while true do
begin
//FirstOne:
index := Index_Pos(Subscript, Displace, ListSize);
if Freq[index] > 0 then
begin
AReport.Add('Variable levels: ');
for i := 1 to ListSize do
begin
j := Selected[i-1];
AReport.Add('%-10s level = %3d', [
OS3MainFrm.DataGrid.Cells[j,0], Minimum[i-1] + subscript[i-1] - 1
]);
end;
AReport.Add('');
// build sumsof squares for this set
DF1 := DF1 + 1;
DF2 := DF2 + Freq[index] - 1;
grandsum := grandsum + Freq[index];
grandsumx := grandsumx + mean[index];
grandsumx2 := grandsumx2 + SS[index];
SSW := SSW + SS[index] - (mean[index] * mean[index] / Freq[index]);
end;
subscript[ptr2-1] := subscript[ptr2-1] + 1;
if subscript[ptr2-1] <= levels[ptr2-1] then
Continue; // Return to start
if (grandsum > 0.0) and (DF1 > 1) and (DF2 > 1) and (SSW > 0.0) then
begin
// build and show anova table
SST := grandsumx2 - (grandsumx * grandsumx / grandsum);
SSB := SST - SSW;
DF1 := DF1 - 1.0; // no. of groups - 1
MSB := SSB / DF1;
MSW := SSW / DF2;
F := MSB / MSW;
FProb := probf(DF1,DF2,F);
AReport.Add('SOURCE D.F. SS MS F Prob.>F');
AReport.Add('GROUPS %2.0f %8.2f %8.2f %8.3f %6.4f', [DF1, SSB, MSB, F, FProb]);
AReport.Add('WITHIN %2.0f %8.2f %8.2f', [DF2, SSW, MSW]);
AReport.Add('TOTAL %2d %8.2f', [grandsum-1, SST]);
end else
AReport.Add('Insufficient data for ANOVA');
AReport.Add('');
AReport.Add(DIVIDER_SMALL_AUTO);
AReport.Add('');
SSB := 0.0;
SSW := 0.0;
MSB := 0.0;
MSW := 0.0;
grandsum := 0;
grandsumx := 0.0;
grandsumx2 := 0.0;
DF1 := 0.0;
DF2 := 0.0;
{ original: }
if ptr1 < 1 then
break;
subscript[ptr1-1] := subscript[ptr1-1] + 1;
if subscript[ptr1-1] > levels[ptr1-1] then
begin
repeat
dec(ptr1);
if ptr1 >= 1 then
begin
if subscript[ptr1-1] > levels[ptr1-1] then
continue;
inc(subscript[ptr1-1]);
if subscript[ptr1-1] <= levels[ptr1-1] then
break;
end;
until ptr1 < 1;
if ptr1 < 1 then
break;
end;
for i := ptr1+1 to ListSize do subscript[i-1] := 1;
ptr1 := ListSize - 1;
if ptr1 < 1 then
break;
end;
// do anova for all cells
AReport.Add('ANOVA FOR ALL CELLS');
AReport.Add('');
SST := 0.0;
SSW := 0.0;
DF2 := 0.0;
DF1 := 0.0;
grandsumx := 0.0;
grandsum := 0;
for i := 1 to length_array do
begin
if Freq[i] > 0 then
begin
SST := SST + SS[i];
grandsum := grandsum + Freq[i];
grandsumx := grandsumx + mean[i];
SSW := SSW + (SS[i] - (mean[i] * mean[i] / Freq[i]));
DF1 := DF1 + 1.0;
DF2 := DF2 + (Freq[i] - 1);
end;
end;
if (DF1 > 1.0) and (DF2 > 1.0) and (SSW > 0.0) then
begin
SST := SST - sqr(grandsumx) / grandsum;
SSB := SST - SSW;
DF1 := DF1 - 1;
MSB := SSB / DF1;
MSW := SSW / DF2;
F := MSB / MSW;
FProb := probf(DF1, DF2, F);
AReport.Add('SOURCE D.F. SS MS F Prob.>F');
AReport.Add('GROUPS %2.0f %8.2f %8.2f %8.3f %6.4f', [DF1, SSB, MSB, F, FProb]);
AReport.Add('WITHIN %2.0f %8.2f %8.2f', [DF2, SSW, MSW]);
AReport.Add('TOTAL %2d %8.2f', [grandsum-1, SST]);
AReport.Add('');
AReport.Add('FINISHED');
end else
begin
AReport.Add('Only 1 group. No ANOVA possible.');
end;
end;
procedure TBreakDownFrm.CloseBtnClick(Sender: TObject);
begin
Close;
@ -98,8 +257,20 @@ end;
procedure TBreakDownFrm.ComputeBtnClick(Sender: TObject);
label
Label1, Label3, Label4, NextStep, FirstOne, SecondOne, ThirdOne, LastStep;
Label1, Label3, Label4, NextStep;
var
Minimum, Maximum, levels, displace, subscript : IntDyneVec;
Freq : IntDyneVec;
Selected : IntDyneVec;
index, ListSize, length_array : integer;
ptr1, ptr2, sum, grandsum : integer;
xsumtotal, xsqrtotal, grandsumx, grandsumx2, value, SD : double;
cellstring : string;
outline : string;
valstr : string;
dataread : boolean;
mean: DblDyneVec = nil;
variance: DblDyneVec = nil;
stddev: DblDyneVec = nil;
@ -115,8 +286,8 @@ begin
if NoSelected = 0 then
begin
MessageDlg('No variables selected.', mtError, [mbOK], 0);
exit;
ErrorMsg('No variables selected.');
exit;
end;
// Get column no. of dependent variable
@ -127,22 +298,20 @@ begin
if dependentVar = 0 then
begin
MessageDlg('Continuous variable is not specified.', mtError, [mbOK], 0);
exit;
ErrorMsg('Continuous variable is not specified.');
exit;
end;
// Allocate heap
SetLength(Minimum,NoVariables);
SetLength(Maximum,NoVariables);
SetLength(levels,NoVariables);
SetLength(displace,NoVariables);
SetLength(subscript,NoVariables);
SetLength(Selected,NoVariables);
// Initialize arrays
SetLength(levels, NoVariables);
SetLength(displace, NoVariables);
SetLength(subscript, NoVariables);
// Get selected variables
SetLength(selected, NoVariables);
for i := 1 to NoSelected do
begin
cellstring := SelList.Items.Strings[i-1];
cellstring := SelList.Items[i-1];
for j := 1 to NoVariables do
if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then Selected[i-1] := j;
end;
@ -150,6 +319,10 @@ begin
ListSize := NoSelected;
// Get maximum and minimum levels in each variable
SetLength(minimum, NoVariables);
SetLength(maximum, NoVariables);
GetMinMax(ListSize, Selected, minimum, maximum);
(*
for i := 1 to ListSize do
begin
index := Selected[i-1];
@ -165,6 +338,7 @@ begin
end;
end;
end;
*)
// Calculate number of levels for each variable
for i := 1 to ListSize do
@ -365,156 +539,11 @@ begin
// Do ANOVA's if requested
if CheckGroup1.Checked[0] then
begin
AnovaPage.Caption := 'Analysis of Variance';
AnovaPage.TabVisible := true;
lReport.Add('ANALYSES OF VARIANCE SUMMARY TABLES');
lReport.Add('');
ptr1 := ListSize - 1;
ptr2 := ListSize;
for i := 1 to ListSize do subscript[i-1] := 1;
SSB := 0.0;
SSW := 0.0;
MSB := 0.0;
MSW := 0.0;
grandsum := 0;
grandsumx := 0.0;
grandsumx2 := 0.0;
DF1 := 0.0;
DF2 := 0.0;
FirstOne:
index := Index_Pos(subscript, displace, ListSize);
if Freq[index] > 0 then
begin
lReport.Add('Variable levels: ');
for i := 1 to ListSize do
begin
j := Selected[i-1];
lReport.Add('%-10s level = %3d', [
OS3MainFrm.DataGrid.Cells[j,0], Minimum[i-1] + subscript[i-1] - 1
]);
end;
lReport.Add('');
// build sumsof squares for this set
DF1 := DF1 + 1;
DF2 := DF2 + Freq[index] - 1;
grandsum := grandsum + Freq[index];
grandsumx := grandsumx + mean[index];
grandsumx2 := grandsumx2 + SS[index];
SSW := SSW + SS[index] - (mean[index] * mean[index] / Freq[index]);
end;
subscript[ptr2-1] := subscript[ptr2-1] + 1;
if subscript[ptr2-1] <= levels[ptr2-1] then
goto FirstOne;
if ((grandsum > 0.0) and (DF1 > 1) and (DF2 > 1) and (SSW > 0.0)) then
begin
// build and show anova table
SST := grandsumx2 - (grandsumx * grandsumx / grandsum);
SSB := SST - SSW;
DF1 := DF1 - 1.0; // no. of groups - 1
MSB := SSB / DF1;
MSW := SSW / DF2;
F := MSB / MSW;
FProb := probf(DF1,DF2,F);
lReport.Add('SOURCE D.F. SS MS F Prob.>F');
lReport.Add('GROUPS %2.0f %8.2f %8.2f %8.3f %6.4f', [DF1,SSB,MSB,F,FProb]);
lReport.Add('WITHIN %2.0f %8.2f %8.2f', [DF2,SSW,MSW]);
lReport.Add('TOTAL %2d %8.2f', [grandsum-1,SST]);
//OutputFrm.ShowModal;
//OutputFrm.Clear;
end else
begin
lReport.Add('Insufficient data for ANOVA');
//OutputFrm.ShowModal;
//OutputFrm.Clear;
end;
lReport.Add('');
lReport.Add('=============================================================');
lReport.Add('');
SSB := 0.0;
SSW := 0.0;
MSB := 0.0;
MSW := 0.0;
grandsum := 0;
grandsumx := 0.0;
grandsumx2 := 0.0;
DF1 := 0.0;
DF2 := 0.0;
if ptr1 < 1 then
goto LastStep;
subscript[ptr1-1] := subscript[ptr1-1] + 1;
if subscript[ptr1-1] <= levels[ptr1-1] then
goto ThirdOne;
SecondOne:
ptr1 := ptr1 - 1;
if ptr1 < 1 then goto LastStep;
if subscript[ptr1-1] > levels[ptr1-1] then
goto SecondOne;
subscript[ptr1-1] := subscript[ptr1-1] + 1;
if subscript[ptr1-1] > levels[ptr1-1] then
goto SecondOne;
ThirdOne:
for i := ptr1+1 to ListSize do subscript[i-1] := 1;
ptr1 := ListSize - 1;
if ptr1 < 1 then
goto LastStep;
goto FirstOne;
LastStep:
// do anova for all cells
lReport.Add('ANOVA FOR ALL CELLS');
lReport.Add('');
SST := 0.0;
SSW := 0.0;
DF2 := 0.0;
DF1 := 0.0;
grandsumx := 0.0;
grandsum := 0;
for i := 1 to length_array do
begin
if Freq[i] > 0 then
begin
SST := SST + SS[i];
grandsum := grandsum + Freq[i];
grandsumx := grandsumx + mean[i];
SSW := SSW + (SS[i] - (mean[i] * mean[i] / Freq[i]));
DF1 := DF1 + 1.0;
DF2 := DF2 + (Freq[i] - 1);
end;
end;
if ( (DF1 > 1.0) and (DF2 > 1.0) and (SSW > 0.0)) then
begin
SST := SST - (grandsumx * grandsumx / grandsum);
SSB := SST - SSW;
DF1 := DF1 - 1;
MSB := SSB / DF1;
MSW := SSW / DF2;
F := MSB / MSW;
FProb := probf(DF1, DF2, F);
lReport.Add('SOURCE D.F. SS MS F Prob.>F');
lReport.Add('GROUPS %2.0f %8.2f %8.2f %8.3f %6.4f', [DF1, SSB, MSB, F, FProb]);
lReport.Add('WITHIN %2.0f %8.2f %8.2f', [DF2, SSW, MSW]);
lReport.Add('TOTAL %2d %8.2f', [grandsum-1, SST]);
lReport.Add('FINISHED');
end else
begin
lReport.Add('Only 1 group. No ANOVA possible.');
end;
ANOVA(ListSize, Freq, Selected, Minimum, Subscript, Levels, Displace, Mean, SS, lReport);
// Show ANOVA in pagecontrol
FANovaReportFrame.DisplayReport(lReport);
end;
finally
lReport.Free;
end;
@ -576,6 +605,35 @@ begin
end;
procedure TBreakDownFrm.GetMinMax(AListSize: Integer;
const ASelected: IntDyneVec; var AMinimum, AMaximum: IntDyneVec);
var
i, j, index: Integer;
NoSelected: Integer;
X: Integer;
begin
SetLength(AMinimum, NoVariables);
SetLength(AMaximum, NoVariables);
NoSelected := SelList.Count;
for i := 0 to AListSize-1 do
begin
index := ASelected[i];
AMinimum[i] := round(StrToFloat(OS3MainFrm.DataGrid.Cells[index, 1]));
AMaximum[i] := AMinimum[i];
for j := 1 to NoCases do
begin
if GoodRecord(j, NoSelected, ASelected) then
begin
X := round(StrToFloat(OS3MainFrm.DataGrid.Cells[index, j]));
if X < AMinimum[i] then AMinimum[i] := X;
if X > AMaximum[i] then AMaximum[i] := X;
end;
end;
end;
SetLength(AMinimum, AListSize);
SetLengtH(AMaximum, AListSize);
end;
procedure TBreakDownFrm.HelpBtnClick(Sender: TObject);
begin
if ContextHelpForm = nil then
@ -671,12 +729,12 @@ end;
procedure TBreakDownFrm.SelVarInBtnClick(Sender: TObject);
var
index1 : integer;
index : integer;
begin
index1 := VarList.ItemIndex;
if (index1 > -1) and (DepVar.Text = '') then
index := VarList.ItemIndex;
if (index > -1) and (DepVar.Text = '') then
begin
DepVar.Text := VarList.Items[index1];
DepVar.Text := VarList.Items[index];
VarList.Items.Delete(index);
end;
UpdateBtnStates;