LazStats: Complete refactoring of CompareDistUnit.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7723 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-09-30 21:05:32 +00:00
parent 0e50c16f6b
commit 10049ca6b0
3 changed files with 382 additions and 571 deletions

View File

@ -1,37 +1,37 @@
inherited CompareDistFrm: TCompareDistFrm
Left = 459
Height = 504
Height = 535
Top = 178
Width = 924
HelpType = htKeyword
HelpKeyword = 'html/ComparisonsWithTheoreticalDistri.htm'
Caption = 'Compare Cumulative Distributions'
ClientHeight = 504
ClientHeight = 535
ClientWidth = 924
OnActivate = FormActivate
OnCreate = FormCreate
Position = poMainFormCenter
object ParamsPanel: TPanel[0]
Left = 8
Height = 488
Height = 519
Top = 8
Width = 288
Width = 296
Align = alLeft
BorderSpacing.Left = 8
BorderSpacing.Top = 8
BorderSpacing.Right = 4
BorderSpacing.Bottom = 8
BevelOuter = bvNone
ClientHeight = 488
ClientWidth = 288
ClientHeight = 519
ClientWidth = 296
TabOrder = 0
object ResetBtn: TButton
AnchorSideRight.Control = ComputeBtn
AnchorSideBottom.Control = ParamsPanel
AnchorSideBottom.Side = asrBottom
Left = 87
Left = 95
Height = 25
Top = 463
Top = 494
Width = 54
Anchors = [akRight, akBottom]
AutoSize = True
@ -39,15 +39,15 @@ inherited CompareDistFrm: TCompareDistFrm
BorderSpacing.Right = 8
Caption = 'Reset'
OnClick = ResetBtnClick
TabOrder = 0
TabOrder = 7
end
object ComputeBtn: TButton
AnchorSideRight.Control = CloseBtn
AnchorSideBottom.Control = ParamsPanel
AnchorSideBottom.Side = asrBottom
Left = 149
Left = 157
Height = 25
Top = 463
Top = 494
Width = 76
Anchors = [akRight, akBottom]
AutoSize = True
@ -56,16 +56,16 @@ inherited CompareDistFrm: TCompareDistFrm
BorderSpacing.Right = 8
Caption = 'Compute'
OnClick = ComputeBtnClick
TabOrder = 1
TabOrder = 8
end
object CloseBtn: TButton
AnchorSideRight.Control = ParamsPanel
AnchorSideRight.Side = asrBottom
AnchorSideBottom.Control = ParamsPanel
AnchorSideBottom.Side = asrBottom
Left = 233
Left = 241
Height = 25
Top = 463
Top = 494
Width = 55
Anchors = [akRight, akBottom]
AutoSize = True
@ -74,7 +74,7 @@ inherited CompareDistFrm: TCompareDistFrm
Caption = 'Close'
ModalResult = 11
OnClick = CloseBtnClick
TabOrder = 2
TabOrder = 9
end
object Bevel1: TBevel
AnchorSideLeft.Control = ParamsPanel
@ -83,8 +83,8 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideBottom.Control = ResetBtn
Left = 0
Height = 8
Top = 447
Width = 288
Top = 478
Width = 296
Anchors = [akLeft, akRight, akBottom]
Shape = bsBottomLine
end
@ -92,29 +92,31 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideLeft.Control = ParamsPanel
AnchorSideBottom.Control = Bevel1
Left = 0
Height = 83
Top = 364
Width = 298
Height = 110
Top = 368
Width = 294
Anchors = [akLeft, akBottom]
AutoSize = True
BorderSpacing.Top = 8
Caption = 'Option:'
ClientHeight = 63
ClientWidth = 294
TabOrder = 3
Caption = 'Options'
ClientHeight = 90
ClientWidth = 290
TabOrder = 6
object BothChk: TCheckBox
AnchorSideLeft.Control = OptionsGroup
AnchorSideTop.Control = OptionsGroup
AnchorSideTop.Control = NoIntervalsEdit
AnchorSideTop.Side = asrBottom
Left = 16
Height = 19
Top = 6
Top = 33
Width = 266
AutoSize = False
BorderSpacing.Left = 16
BorderSpacing.Top = 6
BorderSpacing.Right = 12
BorderSpacing.Right = 8
BorderSpacing.Bottom = 8
Caption = 'Plot both frequency and cumulative frequency'
TabOrder = 0
TabOrder = 1
end
object BarPlotBtn: TSpeedButton
AnchorSideLeft.Control = OptionsGroup
@ -122,7 +124,7 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideTop.Side = asrBottom
Left = 16
Height = 22
Top = 33
Top = 60
Width = 23
BorderSpacing.Left = 16
BorderSpacing.Bottom = 8
@ -138,13 +140,43 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideTop.Side = asrCenter
Left = 43
Height = 22
Top = 33
Top = 60
Width = 23
BorderSpacing.Left = 4
GroupIndex = 1
Images = MainDataModule.ImageList
ImageIndex = 10
end
object NoIntervalsEdit: TSpinEdit
AnchorSideLeft.Control = NoIntervalsLabel
AnchorSideLeft.Side = asrBottom
AnchorSideTop.Control = OptionsGroup
AnchorSideRight.Control = OptionsGroup
AnchorSideRight.Side = asrBottom
Left = 177
Height = 23
Top = 2
Width = 66
Alignment = taRightJustify
BorderSpacing.Left = 16
BorderSpacing.Top = 2
BorderSpacing.Right = 12
BorderSpacing.Bottom = 8
MinValue = 1
TabOrder = 0
Value = 10
end
object NoIntervalsLabel: TLabel
AnchorSideLeft.Control = BarPlotBtn
AnchorSideTop.Control = NoIntervalsEdit
AnchorSideTop.Side = asrCenter
Left = 16
Height = 15
Top = 6
Width = 145
Caption = 'Approximate interval count'
ParentColor = False
end
end
object Label1: TLabel
AnchorSideLeft.Control = ParamsPanel
@ -163,9 +195,9 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideRight.Control = Var1InBtn
AnchorSideBottom.Control = OptionsGroup
Left = 0
Height = 339
Height = 343
Top = 17
Width = 99
Width = 107
Anchors = [akTop, akLeft, akRight, akBottom]
BorderSpacing.Top = 2
BorderSpacing.Right = 6
@ -173,13 +205,13 @@ inherited CompareDistFrm: TCompareDistFrm
ItemHeight = 0
OnDblClick = VarListDblClick
OnSelectionChange = VarListSelectionChange
TabOrder = 4
TabOrder = 0
end
object Var1InBtn: TBitBtn
AnchorSideLeft.Control = CompareGroup
AnchorSideTop.Control = VarList
AnchorSideRight.Control = VarOneEdit
Left = 105
Left = 113
Height = 26
Top = 17
Width = 26
@ -189,13 +221,13 @@ inherited CompareDistFrm: TCompareDistFrm
ImageIndex = 1
OnClick = Var1InBtnClick
Spacing = 0
TabOrder = 5
TabOrder = 1
end
object Var1OutBtn: TBitBtn
AnchorSideLeft.Control = Var1InBtn
AnchorSideTop.Control = Var1InBtn
AnchorSideTop.Side = asrBottom
Left = 105
Left = 113
Height = 26
Top = 47
Width = 26
@ -203,12 +235,12 @@ inherited CompareDistFrm: TCompareDistFrm
ImageIndex = 0
OnClick = Var1OutBtnClick
Spacing = 0
TabOrder = 6
TabOrder = 2
end
object Label2: TLabel
AnchorSideLeft.Control = VarOneEdit
AnchorSideBottom.Control = VarOneEdit
Left = 139
Left = 147
Height = 15
Top = 21
Width = 66
@ -224,7 +256,7 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideRight.Side = asrBottom
AnchorSideBottom.Control = Var1OutBtn
AnchorSideBottom.Side = asrBottom
Left = 139
Left = 147
Height = 23
Top = 38
Width = 149
@ -232,7 +264,7 @@ inherited CompareDistFrm: TCompareDistFrm
BorderSpacing.Left = 8
BorderSpacing.Bottom = 12
ReadOnly = True
TabOrder = 7
TabOrder = 3
Text = 'VarOneEdit'
end
object CompareGroup: TRadioGroup
@ -241,7 +273,7 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = ParamsPanel
AnchorSideRight.Side = asrBottom
Left = 105
Left = 113
Height = 76
Top = 81
Width = 183
@ -268,7 +300,7 @@ inherited CompareDistFrm: TCompareDistFrm
'Another Variable'
)
OnClick = CompareGroupClick
TabOrder = 8
TabOrder = 4
end
object Notebook: TNotebook
AnchorSideLeft.Control = CompareGroup
@ -276,7 +308,7 @@ inherited CompareDistFrm: TCompareDistFrm
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = CompareGroup
AnchorSideRight.Side = asrBottom
Left = 105
Left = 113
Height = 191
Top = 165
Width = 183
@ -284,7 +316,7 @@ inherited CompareDistFrm: TCompareDistFrm
AutoSize = True
Anchors = [akTop, akLeft, akRight]
BorderSpacing.Bottom = 8
TabOrder = 9
TabOrder = 5
object TheoreticalDistPage: TPage
object DistGroup: TGroupBox
AnchorSideLeft.Control = TheoreticalDistPage
@ -491,10 +523,10 @@ inherited CompareDistFrm: TCompareDistFrm
end
end
object PageControl1: TPageControl[1]
Left = 309
Height = 488
Left = 317
Height = 519
Top = 8
Width = 607
Width = 599
ActivePage = CumFreqChartPage
Align = alClient
BorderSpacing.Left = 4
@ -514,8 +546,8 @@ inherited CompareDistFrm: TCompareDistFrm
end
end
object ParamsSplitter: TSplitter[2]
Left = 300
Height = 504
Left = 308
Height = 535
Top = 0
Width = 5
ResizeStyle = rsPattern

View File

@ -8,7 +8,7 @@ interface
uses
Classes, SysUtils, FileUtil, LResources, Forms, Controls, Graphics, Dialogs,
StdCtrls, Buttons, ExtCtrls, ComCtrls,
StdCtrls, Buttons, ExtCtrls, ComCtrls, Spin,
FunctionsLib, Globals, GraphLib, DataProcs, MainDM, MainUnit,
BasicStatsFormUnit, ReportFrameUnit, ChartFrameUnit;
@ -25,6 +25,8 @@ type
DistGroup: TGroupBox;
DF1Label: TLabel;
DF2Label: TLabel;
NoIntervalsEdit: TSpinEdit;
NoIntervalsLabel: TLabel;
PageControl1: TPageControl;
ParamsSplitter: TSplitter;
NormalDistChk: TRadioButton;
@ -79,12 +81,17 @@ type
CompareTo: TCompareTo;
CompareDist: TCompareDist;
procedure CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
AMin, AMax: Double; ANumIntervals, ANumCases: Integer; DF1: Integer = -1;
DF2: Integer = -1);
AMin, AIncrement: Double; ANumIntervals, AColIndex: Integer);
procedure CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
AMin, AMax: Double; ANumIntervals, ANumCases: Integer;
ACompareDist: TCompareDist; DF1: Integer = -1; DF2: Integer = -1);
procedure CalcIntervals(var AMin, AMax, AIntervalsize: Double;
out ANumIntervals: Integer);
function CalcMinMax(out AMin, AMax: Double; AColIndex: Integer): Integer;
procedure CalcTheoreticalDist(XValues, FreqValues, CumFreqValues: DblDyneVec;
ANumIntervals, ANumCases: Integer; out AName: String);
ANumIntervals, ANumCases: Integer; ACompareDist: TCompareDist; out AName: String);
procedure Plot(AChartFrame: TChartFrame; Y1Values, Y2Values: DblDyneVec;
AName1, AName2: String);
ASeriesTitle1, ASeriesTitle2, AYTitle, ATitle: String);
procedure UpdateBtnStates;
procedure UpdateDF1;
function Validate(ANumCases: Integer;
@ -111,9 +118,39 @@ uses
{ TCompareDistFrm }
{ Get frequency and cumulative frequency of cases in each interval }
procedure TCompareDistFrm.CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
AMin, AMax: Double; ANumIntervals, ANumCases: Integer; DF1: Integer = -1;
DF2: Integer = -1);
AMin, AIncrement: Double; ANumIntervals, AColIndex: Integer);
var
j, k: Integer;
value: Double;
begin
// Get border points of the intervals
for j := 0 to ANumIntervals do // no "-1" because last point is needed
XValues[j] := AMin + j*AIncrement;
// count values in these intervals
for j := 1 to NoCases do
begin
if not ValidValue(j, AColIndex) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, j]);
for k := 0 to ANumIntervals-1 do
begin
if (value >= XValues[k]) and (value < XValues[k+1]) then
FreqValues[k] := FreqValues[k] + 1;
end;
end;
// Calculate cumulative frequencies
CumFreqValues[0] := FreqValues[0];
for j := 1 to ANumIntervals-1 do
CumFreqValues[j] := CumFreqValues[j-1] + FreqValues[j];
end;
{ Calculate frequencies and cumulative frequencies for a theoretical distribution }
procedure TCompareDistFrm.CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
AMin, AMax: Double; ANumIntervals, ANumCases: Integer; ACompareDist: TCompareDist;
DF1: Integer = -1; DF2: Integer = -1);
var
dx: Double;
i: Integer;
@ -129,7 +166,7 @@ begin
XValues[i] := AMin + i * dx;
for i := 0 to ANumIntervals - 1 do
case CompareDist of
case ACompareDist of
cd_Normal:
Calc(NormalDist(XValues[i]), NormalDist(XValues[i+1]));
cd_t:
@ -140,7 +177,6 @@ begin
Calc(ProbF(XValues[i], DF1, DF2), ProbF(XValues[i+1], DF1, DF2));
cd_Poisson:
Calc(PoissonCDF(round(XValues[i]), DF1), PoissonCDF(round(XValues[i+1]), DF1));
// Calc(PoissonPDF(round(XValues[i]), DF1), 0);
end;
CumFreqValues[0] := FreqValues[0];
@ -149,8 +185,49 @@ begin
end;
procedure TCompareDistFrm.CalcIntervals(var AMin, AMax, AIntervalsize: Double;
out ANumIntervals: Integer);
var
intervalSize: Double;
m: Double;
e: Integer;
begin
intervalSize := (AMax - AMin) / NoIntervalsEdit.Value;
if intervalSize = 0 then intervalSize := 1;
MantisseAndExponent(intervalSize, m, e);
m := round(m);
AIntervalSize := m * IntPower(10, e);
AMin := floor(AMin / AIntervalSize) * AIntervalSize;
AMax := ceil(AMax / AIntervalSize) * AIntervalSize;
ANumIntervals := round((AMax - AMin) / AIntervalSize);
end;
{ Calculates minimum and maximum of the values in the given column.
Also, counts the valid numbers in this column and returns it as function result }
function TCompareDistFrm.CalcMinMax(out AMin, AMax: Double; AColIndex: Integer): Integer;
var
j: Integer;
value: Double;
numCases: Integer;
begin
AMin := Infinity;
AMax := -Infinity;
numCases := 0;
for j := 1 to NoCases do
begin
if not ValidValue(j, AColIndex) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, j]);
if value > AMax then AMax := value;
if value < AMin then AMin := value;
inc(numCases);
end;
Result := numCases;
end;
procedure TCompareDistFrm.CalcTheoreticalDist(XValues, FreqValues, CumFreqValues: DblDyneVec;
ANumIntervals, ANumCases: Integer; out AName: String);
ANumIntervals, ANumCases: Integer; ACompareDist: TCompareDist; out AName: String);
var
min, max: Double;
DF1: Integer = -1;
@ -162,7 +239,7 @@ begin
if TryStrToFloat(DF2Edit.Text, a) then
DF2 := round(a);
case CompareDist of
case ACompareDist of
cd_Normal:
begin
min := -3.0;
@ -191,7 +268,7 @@ begin
; // will be handled separately
end;
CalcFreq(XValues, FreqValues, CumFreqValues, min, max, ANumIntervals, ANumCases, DF1, DF2);
CalcFreq(XValues, FreqValues, CumFreqValues, min, max, ANumIntervals, ANumCases, ACompareDist, DF1, DF2);
end;
@ -245,21 +322,15 @@ var
xValue2: DblDyneVec = nil;
cumfreq1: DblDyneVec = nil;
cumfreq2: DblDyneVec = nil;
i, j, k, col1, col2, nCases, noInts: integer;
min1, max1, min2, max2, range1, range2, value: double;
incrSize1, incrSize2, {%H-}KS: double;
cellVal, name1, name2: string;
msg: String;
i, col1, col2, nCases, noInts: integer;
min1, max1, min2, max2: double;
incrSize1: Double = 0.0;
incrSize2: Double = 0.0;
KS: double;
cellVal, name1, name2, msg: string;
C: TWinControl;
lReport: TStrings;
begin
SetLength(var1Freq, NoCases + 1);
SetLength(var2Freq, NoCases + 1);
SetLength(xValue1, NoCases + 1);
SetLength(xValue2, NoCases + 1);
SetLength(cumfreq1, NoCases + 1);
SetLength(cumfreq2, NoCases + 1);
// Get columns of the variables
col1 := 0;
for i := 1 to NoVariables do
@ -289,255 +360,55 @@ begin
end;
// Get min and max values for variable in col1, as well as true number of cases
min1 := Infinity;
max1 := -Infinity;
nCases := 0;
for j := 1 to NoCases do
begin
if not ValidValue(j, col1) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col1, j]);
if value > max1 then max1 := value;
if value < min1 then min1 := value;
inc(nCases);
end;
nCases := CalcMinMax(min1, max1, col1);
// Validate
// Validate edit controls
if not Validate(nCases, msg, C) then begin
C.SetFocus;
ErrorMsg(msg);
end;
// Get number of intervals
noInts := NoCases - 1; // wp: why NoCases here, and not nCases?
if noInts > 20 then noints := 20;
CalcIntervals(min1, max1, incrSize1, noInts);
range1 := max1 - min1 + 1.0;
incrSize1 := range1 / noInts;
name1 := VarOneEdit.Text;
// Get mem for the arrays
SetLength(var1Freq, noInts);
Setlength(cumFreq1, noInts);
SetLength(xValue1, noInts + 1); // Border points of the intervals, one more than intervals
SetLength(var2Freq, noInts);
SetLength(cumFreq2, noInts);
Setlength(xValue2, noInts + 1);
// Repeat for variable 2 (if Compare To is selected as "Another variable")
if CompareTo = ctVariable then
begin
min2 := Infinity;
max2 := -Infinity;
for j := 1 to NoCases do
begin
if not ValidValue(j, col2) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col2, j]);
if value > max2 then max2 := value;
if value < min2 then min2 := value;
end;
range2 := max2 - min2 + 1.0;
incrSize2 := range2 / noInts;
CalcMinMax(min2, max2, col2);
incrSize2 := (max2 - min2) / noInts;
name2 := VarTwoEdit.Text;
end;
// Get frequency of cases in each interval
for j := 1 to noints+1 do
var1Freq[j-1] := 0;
for j := 1 to NoCases do
begin
if not ValidValue(j, col1) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col1, j]);
for k := 1 to noInts do
begin
if (value >= min1 + (k-1) * incrSize1) and (value < min1 + k * incrSize1) then
var1Freq[k-1] := var1Freq[k-1] + 1;
end;
end;
cumFreq1[0] := var1Freq[0];
for j := 1 to noInts+1 do
xValue1[j-1] := min1 + (j-1) * incrSize1;
for j := 1 to noInts do
cumFreq1[j] := cumFreq1[j-1] + var1Freq[j];
CalcFreq(xValue1, var1Freq, cumFreq1, min1, incrSize1, noInts, col1);
// Repeat for 2nd variable, if required
if CompareTo = ctVariable then
begin
for j := 1 to noInts+1 do
var2Freq[j-1] := 0;
for j := 1 to NoCases do
begin
if not ValidValue(j, col2) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col2, j]);
for k := 1 to noInts do
begin
if (value >= min2 + (k-1) * incrsize2) and (value < min2 + k * incrsize2) then
var2Freq[k-1] := var2Freq[k-1] + 1;
end;
end;
cumfreq2[0] := var2Freq[0];
for j := 1 to noInts+1 do
xValue2[j-1] := min2 + (j-1) * incrSize2;
for j := 1 to noInts do
cumFreq2[j] := cumFreq2[j-1] + var2Freq[j];
end;
CalcFreq(xValue2, var2Freq, cumFreq2, min2, incrSize2, noInts, col2);
// Get theoretical distribution frequencies for selected distribution, if required.
if CompareDist = cd_Poisson then
begin
CalcFreq(xValue2, var2Freq, cumFreq2, min1, min2, noInts, nCases, StrToInt(DF1Edit.Text));
name2 := 'Poisson';
end
else
CalcTheoreticalDist(xValue2, var2Freq, cumFreq2, noInts, nCases, name2);
(*
if CompareTo = ctTheoreticalDistrib then
begin
if NormalDistChk.Checked then // normal distribution curve
begin
name2 := 'Normal';
min2 := -3.0;
max2 := 3.0;
range2 := max2 - min2;
incrsize2 := range2 / noints;
Xvalue2[0] := min2;
Xvalue2[noints] := max2;
for i := 1 to noInts do
begin
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
Xvalue2[i] := min2 + (i) * incrSize2;
prob1 := probz(abs(Xvalue2[i-1]));
prob2 := probz(abs(Xvalue2[i]));
if prob1 > prob2 then
Var2Freq[i-1] := round((prob1 - prob2) * nCases)
else
Var2Freq[i-1] := round((prob2 - prob1) * nCases)
end;
Cumfreq2[0] := Var2Freq[0];
for i := 1 to noints do
Cumfreq2[i] := Cumfreq2[i-1] + Var2Freq[i];
end
else
if tDistChk.Checked then // t-distribution
begin
name2 := 't-Dist.';
min2 := -3.0;
max2 := 3.0;
df1 := nCases - 1;
range2 := max2 - min2;
incrsize2 := range2 / noints;
Xvalue2[0] := min2;
Xvalue2[noints] := max2;
for i := 1 to noInts do
begin
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
Xvalue2[i] := min2 + (i) * incrSize2;
prob1 := 0.5 * probt(Xvalue2[i-1],df1);
prob2 := 0.5 * probt(Xvalue2[i],df1);
if prob1 > prob2 then
Var2Freq[i-1] := round((prob1-prob2) * nCases)
else
Var2Freq[i-1] := round((prob2-prob1) * nCases)
end;
Cumfreq2[0] := Var2Freq[0];
for i := 1 to noInts do
Cumfreq2[i] := Cumfreq2[i-1] + Var2Freq[i];
end
else
if ChiSqDistChk.Checked then // chi squared distribution
begin
cellval := InputBox('Deg. Freedom 1 Entry','DF 1','');
df1 := StrToInt(cellval);
name2 := 'Chi Sq';
min2 := 0.0;
max2 := 20.0;
range2 := max2 - min2;
incrSize2 := range2 / noInts;
xValue2[0] := min2;
xValue2[noInts] := max2;
for i := 1 to noints do
begin
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
Xvalue2[i] := min2 + (i) * incrSize2;
prob1 := chisquaredprob(Xvalue2[i-1],df1);
prob2 := chisquaredprob(Xvalue2[i],df1);
if prob1 > prob2 then
Var2Freq[i-1] := round((prob1-prob2) * nCases)
else
Var2Freq[i-1] := round((prob2-prob1) * nCases)
end;
cumfreq2[0] := var2Freq[0];
for i := 1 to noints do
cumfreq2[i] := cumfreq2[i-1] + var2Freq[i];
end
else
if FDistChk.Checked then // F distribution
begin
// get degrees of freedom
cellval := InputBox('Deg. Freedom 1 Entry','DF 1','');
df1 := StrToInt(cellval);
cellval := InputBox('Deg. Freedom 2 Entry','DF 2','');
df2 := StrToInt(cellval);
name2 := 'F Dist.';
min2 := 0.0;
max2 := 3.0;
range2 := max2 - min2;
incrsize2 := range2 / noints;
Xvalue2[0] := min2;
Xvalue2[noints] := max2;
for i := 1 to noInts do
begin
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
Xvalue2[i] := min2 + (i) * incrSize2;
prob1 := ProbF(Xvalue2[i-1],df1,df2);
prob2 := ProbF(Xvalue2[i],df1,df2);
if prob1 > prob2 then
Var2Freq[i-1] := round((prob1-prob2) * nCases)
else
Var2Freq[i-1] := round((prob2-prob1) * nCases)
end;
cumfreq2[0] := Var2Freq[0];
for i := 1 to noInts do
cumfreq2[i] := cumfreq2[i-1] + var2Freq[i];
end
else
if PoissonDistChk.Checked then // Poisson distribution
if CompareDist = cd_Poisson then
begin
CalcFreq(xValue2, var2Freq, cumFreq2, min1, min2, noInts, nCases, compareDist, StrToInt(DF1Edit.Text));
name2 := 'Poisson';
mean := 0; // use as parameter a in pdf call
min2 := min1;
max2 := max1;
if max2 > 13 then
begin
ErrorMsg('Value > 13 found. Factorial too large - exiting.');
exit;
end;
for i := 1 to nCases do
mean := mean + StrToFloat(OS3MainFrm.DataGrid.Cells[col1, i]);
mean := mean / nCases;
cellval := IntToStr(round(mean));
cellval := InputBox('Parameter Entry (mean)', 'DF 1', cellval);
degfree := StrToFloat(cellval);
range2 := max2 - min2;
incrsize2 := range2 / noInts;
// Xvalue2[0] := min2;
Xvalue2[noints] := max2;
for i := 1 to noints do
begin
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
Xvalue2[i] := min2 + (i) * incrSize2;
poisson_pdf ( round(Xvalue2[i-1]), degfree, prob1 );
// prob1 := (Xvalue2[i-1],df1);
// prob2 := chisquaredprob(Xvalue2[i],df1);
// if prob1 > prob2 then
Var2Freq[i-1] := round((prob1) * nCases);
// else Var2Freq[i-1] := round((prob2-prob1) * Ncases)
end;
cumfreq2[0] := var2Freq[0];
for i := 1 to noInts do
cumfreq2[i] := cumfreq2[i-1] + var2Freq[i];
end;
end
else
CalcTheoreticalDist(xValue2, var2Freq, cumFreq2, noInts, nCases, compareDist, name2);
end;
name1 := VarOneEdit.Text;
*)
// Print distributions to report
lReport := TStringList.Create;
try
lReport.Add('DISTRIBUTION COMPARISON by Bill Miller');
@ -554,116 +425,24 @@ begin
XValue1[i-1], Var1Freq[i-1], Cumfreq1[i-1], XValue2[i-1], Var2Freq[i-1], Cumfreq2[i-1]
]);
lReport.Add('');
cellval := 'D';
KS := KolmogorovTest(noInts, Cumfreq1, noInts, Cumfreq2, cellVal, lReport);
// lReport.Add('Kolmogorov-Smirnov statistic := %5.3f', [KS]);
KS := KolmogorovTest(noInts, Cumfreq1, noInts, Cumfreq2, 'D', lReport);
lReport.Add('Kolmogorov-Smirnov statistic: %5.3f', [KS]);
FReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
// plot the cdfs
Plot(FCumFreqChartFrame, cumFreq1, cumFreq2, VarOneEdit.Text, name2);
// Plot the cumulative distributions
Plot(FCumFreqChartFrame, cumFreq1, cumFreq2, VarOneEdit.Text, name2,
'Cumulative Frequency', 'Plot of Cumulative Distributions');
// Plot the frequency distrigutions, if requested.
if BothChk.Checked then
Plot(FFreqChartFrame, var1Freq, var2Freq, VarOneEdit.Text, name2);
Plot(FFreqChartFrame, var1Freq, var2Freq, VarOneEdit.Text, name2,
'Frequency', 'Plot of Distributions');
FreqChartPage.TabVisible := BothChk.Checked;
(*
xtitle := 'Red = ' + VarOneEdit.Text + ' Blue = ' + name2;
cellval := 'Plot of Cumulative Distributions';
{
if LinesChk.Checked then
GraphFrm.barwideprop := 1.0
else
GraphFrm.barwideprop := 0.5;
}
GraphFrm.nosets := 2;
GraphFrm.nbars := noints+1;
GraphFrm.Heading := cellval;
GraphFrm.XTitle := xtitle;
GraphFrm.YTitle := 'Frequency';
SetLength(GraphFrm.Ypoints,2,noints+1);
SetLength(GraphFrm.Xpoints,1,noints+1);
for k := 1 to noints+1 do
begin
GraphFrm.Ypoints[0,k-1] := Cumfreq1[k-1];
GraphFrm.Ypoints[1,k-1] := CumFreq2[k-1];
GraphFrm.Xpoints[0,k-1] := k;
end;
GraphFrm.AutoScaled := true;
{
if LinesChk.Checked then
GraphFrm.GraphType := 6 // 3d lines
else
GraphFrm.GraphType := 8; // 3D points
}
GraphFrm.BackColor := clYellow;
GraphFrm.WallColor := clBlue;
GraphFrm.FloorColor := clGray;
GraphFrm.ShowLeftWall := true;
GraphFrm.ShowRightWall := true;
GraphFrm.ShowBottomWall := true;
GraphFrm.ShowBackWall := true;
GraphFrm.ShowModal;
GraphFrm.Xpoints := nil;
GraphFrm.Ypoints := nil;
if BothChk.Checked then // plot the frequencies
begin
xtitle := 'Red = ' + VarOneEdit.Text + ' Blue = ' + name2;
cellval := 'Plot of Cumulative Distributions';
{
if LinesChk.Checked then
GraphFrm.BarWideProp := 1.0
else
GraphFrm.BarWideProp := 0.5;
}
GraphFrm.nosets := 2;
GraphFrm.nbars := noints+1;
GraphFrm.Heading := cellval;
GraphFrm.XTitle := xtitle;
GraphFrm.YTitle := 'Frequency';
SetLength(GraphFrm.Ypoints,2,noints+1);
SetLength(GraphFrm.Xpoints,1,noints+1);
for k := 1 to noints+1 do
begin
GraphFrm.Ypoints[0,k-1] := Var1Freq[k-1];
GraphFrm.Ypoints[1,k-1] := Var2Freq[k-1];
GraphFrm.Xpoints[0,k-1] := k;
end;
GraphFrm.AutoScaled := true;
{
if LinesChk.Checked then
GraphFrm.GraphType := 6 // 3d lines
else
GraphFrm.GraphType := 8; // 3D points
}
GraphFrm.BackColor := clYellow;
GraphFrm.WallColor := clBlue;
GraphFrm.FloorColor := clGray;
GraphFrm.ShowLeftWall := true;
GraphFrm.ShowRightWall := true;
GraphFrm.ShowBottomWall := true;
GraphFrm.ShowBackWall := true;
GraphFrm.ShowModal;
GraphFrm.Xpoints := nil;
GraphFrm.Ypoints := nil;
end;
*)
// clean up
Cumfreq2 := nil;
Cumfreq1 := nil;
XValue1 := nil;
XValue2 := nil;
Var2Freq := nil;
Var1Freq := nil;
end;
@ -766,20 +545,23 @@ end;
procedure TCompareDistFrm.Plot(AChartFrame: TChartFrame; Y1Values, Y2Values: DblDyneVec;
AName1, AName2: String);
ASeriesTitle1, ASeriesTitle2, AYTitle, ATitle: String);
var
ser1, ser2: TChartSeries;
plotType: TPlotType;
begin
AChartFrame.Clear;
AChartFrame.SetTitle(ATitle);
AChartFrame.SetYTitle(AYTitle);
if BarPlotBtn.Down then
plotType := ptBars
else
plotType := ptLines;
ser1 := AChartFrame.PlotXY(plotType, nil, Y1Values, nil, nil, AName1, DATA_COLORS[0]);
ser2 := AChartFrame.PlotXY(plotType, nil, Y2Values, nil, nil, AName2, DATA_Colors[1]);
ser1 := AChartFrame.PlotXY(plotType, nil, Y1Values, nil, nil, ASeriesTitle1, DATA_COLORS[0]);
ser2 := AChartFrame.PlotXY(plotType, nil, Y2Values, nil, nil, ASeriesTitle2, DATA_Colors[1]);
if (ser1 is TBarSeries) then
begin

View File

@ -65,7 +65,7 @@ function factorial(x : integer) : integer;
implementation
uses
MathUnit;
Utils, MathUnit;
function chisquaredprob(X : double; k : integer) : double;
var
@ -1629,28 +1629,22 @@ begin
end;
Result := NoGood;
end;
//-------------------------------------------------------------------
{ split a value into the whole and fractional parts}
function WholeValue(value : double) : double;
{ split a value into the whole and fractional parts}
VAR
whole : double;
begin
whole := Floor(value);
Result := whole;
Result := Floor(value);
end;
//---------------------------------------------------------------------------
function FractionValue(value : double) : double;
{ split a value into the whole and fractional parts }
VAR
fraction : double;
begin
fraction := value - Floor(value);
Result := fraction;
end;
//---------------------------------------------------------------------------
Function Quartiles(TypeQ : integer; pcntile : double; N : integer;
{ split a value into the whole and fractional parts }
function FractionValue(value : double) : double;
begin
Result := value - Floor(value);
end;
function Quartiles(TypeQ : integer; pcntile : double; N : integer;
VAR values : DblDyneVec) : double;
VAR
whole, fraction, Myresult, np, avalue, avalue1 : double;
@ -1768,9 +1762,64 @@ begin
result := p;
end;
{ Statistical test whether two one-dimensional sets of points are compatible
with coming from the same parent distribution, using the Kolmogorov test.
That is, it is used to compare two experimental distributions of unbinned data.
Input:
a,b: One-dimensional arrays of length na, nb, respectively.
The elements of a and b must be given in ascending order.
option is a character string to specify options
"D" Put out a line of "Debug" printout
"M" Return the Maximum Kolmogorov distance instead of prob
Output:
The returned value prob is a calculated confidence level which gives a
statistical test for compatibility of a and b.
Values of prob close to zero are taken as indicating a small probability
of compatibility. For two point sets drawn randomly from the same parent
distribution, the value of prob should be uniformly distributed between
zero and one.
in case of error the function return -1
If the 2 sets have a different number of points, the minimum of
the two sets is used.
Method:
The Kolmogorov test is used. The test statistic is the maximum deviation
between the two integrated distribution functions, multiplied by the
normalizing factor (rdmax*sqrt(na*nb/(na+nb)).
Code adapted by Rene Brun from CERNLIB routine TKOLMO (Fred James)
(W.T. Eadie, D. Drijard, F.E. James, M. Roos and B. Sadoulet,
Statistical Methods in Experimental Physics, (North-Holland,
Amsterdam 1971) 269-271)
Method Improvement by Jason A Detwiler (JADetwiler@lbl.gov)
-----------------------------------------------------------
The nuts-and-bolts of the TMath::KolmogorovTest() algorithm is a for-loop
over the two sorted arrays a and b representing empirical distribution
functions. The for-loop handles 3 cases: when the next points to be
evaluated satisfy a>b, a<b, or a=b:
For the last case, a=b, the algorithm advances each array by one index in an
attempt to move through the equality. However, this is incorrect when one or
the other of a or b (or both) have a repeated value, call it x. For the KS
statistic to be computed properly, rdiff needs to be calculated after all of
the a and b at x have been tallied (this is due to the definition of the
empirical distribution function; another way to convince yourself that the
old CERNLIB method is wrong is that it implies that the function defined as the
difference between a and b is multi-valued at x -- besides being ugly, this
would invalidate Kolmogorov's theorem).
NOTE1
A good description of the Kolmogorov test can be seen at:
http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
}
function KolmogorovTest(na: integer; const a: DblDyneVec; nb: integer;
const b: DblDyneVec; option: String; AReport: TStrings): double;
VAR
var
prob: double;
opt: string;
rna: double; // = na;
@ -1781,75 +1830,23 @@ VAR
i, ia, ib: integer;
ok: boolean;
begin
// Statistical test whether two one-dimensional sets of points are compatible
// with coming from the same parent distribution, using the Kolmogorov test.
// That is, it is used to compare two experimental distributions of unbinned data.
//
// Input:
// a,b: One-dimensional arrays of length na, nb, respectively.
// The elements of a and b must be given in ascending order.
// option is a character string to specify options
// "D" Put out a line of "Debug" printout
// "M" Return the Maximum Kolmogorov distance instead of prob
//
// Output:
// The returned value prob is a calculated confidence level which gives a
// statistical test for compatibility of a and b.
// Values of prob close to zero are taken as indicating a small probability
// of compatibility. For two point sets drawn randomly from the same parent
// distribution, the value of prob should be uniformly distributed between
// zero and one.
//
// in case of error the function return -1
// If the 2 sets have a different number of points, the minimum of
// the two sets is used.
//
// Method:
// The Kolmogorov test is used. The test statistic is the maximum deviation
// between the two integrated distribution functions, multiplied by the
// normalizing factor (rdmax*sqrt(na*nb/(na+nb)).
//
// Code adapted by Rene Brun from CERNLIB routine TKOLMO (Fred James)
// (W.T. Eadie, D. Drijard, F.E. James, M. Roos and B. Sadoulet,
// Statistical Methods in Experimental Physics, (North-Holland,
// Amsterdam 1971) 269-271)
//
// Method Improvement by Jason A Detwiler (JADetwiler@lbl.gov)
// -----------------------------------------------------------
// The nuts-and-bolts of the TMath::KolmogorovTest() algorithm is a for-loop
// over the two sorted arrays a and b representing empirical distribution
// functions. The for-loop handles 3 cases: when the next points to be
// evaluated satisfy a>b, a<b, or a=b:
// For the last case, a=b, the algorithm advances each array by one index in an
// attempt to move through the equality. However, this is incorrect when one or
// the other of a or b (or both) have a repeated value, call it x. For the KS
// statistic to be computed properly, rdiff needs to be calculated after all of
// the a and b at x have been tallied (this is due to the definition of the
// empirical distribution function; another way to convince yourself that the
// old CERNLIB method is wrong is that it implies that the function defined as the
// difference between a and b is multi-valued at x -- besides being ugly, this
// would invalidate Kolmogorov's theorem).
//
// NOTE1
// A good description of the Kolmogorov test can be seen at:
// http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
opt := option;
// opt.ToUpper();
prob := -1;
// Require at least two points in each graph
{ Require at least two points in each graph }
if (na <= 2) or (nb <= 2) then
begin
ShowMessage('KolmogorovTest - Sets must have more than 2 points');
ErrorMsg('KolmogorovTest - Sets must have more than 2 points');
exit;
end;
// Constants needed
{ Constants needed }
rna := na;
rnb := nb;
sa := 1./rna;
sb := 1. / rnb;
// Starting values for main loop
{ Starting values for main loop }
if (a[0] < b[0]) then
begin
rdiff := -sa;
@ -1864,9 +1861,9 @@ begin
end;
rdmax := Abs(rdiff);
// Main loop over point sets to find max distance
// rdiff is the running difference, and rdmax the max.
ok := FALSE;
{ Main loop over point sets to find max distance
rdiff is the running difference, and rdmax the max. }
ok := false;
for i := 0 to na + nb - 1 do
begin
if (a[ia-1] < b[ib-1]) then
@ -1875,7 +1872,7 @@ begin
ia := ia + 1;
if (ia > na) then
begin
ok := TRUE;
ok := true;
break;
end;
end
@ -1885,7 +1882,7 @@ begin
ib := ib + 1;
if (ib > nb) then
begin
ok := TRUE;
ok := true;
break;
end;
end
@ -1904,37 +1901,37 @@ begin
end;
if (ia > na) then
begin
ok := TRUE;
ok := true;
break;
end;
if (ib > nb) then
begin
ok := TRUE;
ok := true;
break;
end;
end;
rdmax := Max(rdmax,Abs(rdiff));
rdmax := Max(rdmax, abs(rdiff));
end;
// Should never terminate this loop with ok = kFALSE!
if (ok) then
{ Should never terminate this loop with ok = FALSE! }
if ok then
begin
rdmax := Max(rdmax,Abs(rdiff));
rdmax := Max(rdmax, abs(rdiff));
z := rdmax * Sqrt(rna * rnb / (rna + rnb));
prob := KolmogorovProb(z);
end;
// debug printout
{ debug printout }
if (opt = 'D') then
AReport.Add(' Kolmogorov Probability: %g, Max Dist: %g', [prob, rdmax]);
AReport.Add(' Kolmogorov Probability: %.5f, Max Dist: %.5f', [prob, rdmax]);
if (opt = 'M') then
result := rdmax
Result := rdmax
else
result := prob;
Result := prob;
end;
(* wp: moved to MathUnit for easier testing
procedure poisson_cdf ( x : integer; a : double; VAR cdf : double );
VAR
i : integer;