You've already forked lazarus-ccr
LazStats: Complete refactoring of CompareDistUnit.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7723 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -1,37 +1,37 @@
|
||||
inherited CompareDistFrm: TCompareDistFrm
|
||||
Left = 459
|
||||
Height = 504
|
||||
Height = 535
|
||||
Top = 178
|
||||
Width = 924
|
||||
HelpType = htKeyword
|
||||
HelpKeyword = 'html/ComparisonsWithTheoreticalDistri.htm'
|
||||
Caption = 'Compare Cumulative Distributions'
|
||||
ClientHeight = 504
|
||||
ClientHeight = 535
|
||||
ClientWidth = 924
|
||||
OnActivate = FormActivate
|
||||
OnCreate = FormCreate
|
||||
Position = poMainFormCenter
|
||||
object ParamsPanel: TPanel[0]
|
||||
Left = 8
|
||||
Height = 488
|
||||
Height = 519
|
||||
Top = 8
|
||||
Width = 288
|
||||
Width = 296
|
||||
Align = alLeft
|
||||
BorderSpacing.Left = 8
|
||||
BorderSpacing.Top = 8
|
||||
BorderSpacing.Right = 4
|
||||
BorderSpacing.Bottom = 8
|
||||
BevelOuter = bvNone
|
||||
ClientHeight = 488
|
||||
ClientWidth = 288
|
||||
ClientHeight = 519
|
||||
ClientWidth = 296
|
||||
TabOrder = 0
|
||||
object ResetBtn: TButton
|
||||
AnchorSideRight.Control = ComputeBtn
|
||||
AnchorSideBottom.Control = ParamsPanel
|
||||
AnchorSideBottom.Side = asrBottom
|
||||
Left = 87
|
||||
Left = 95
|
||||
Height = 25
|
||||
Top = 463
|
||||
Top = 494
|
||||
Width = 54
|
||||
Anchors = [akRight, akBottom]
|
||||
AutoSize = True
|
||||
@ -39,15 +39,15 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
BorderSpacing.Right = 8
|
||||
Caption = 'Reset'
|
||||
OnClick = ResetBtnClick
|
||||
TabOrder = 0
|
||||
TabOrder = 7
|
||||
end
|
||||
object ComputeBtn: TButton
|
||||
AnchorSideRight.Control = CloseBtn
|
||||
AnchorSideBottom.Control = ParamsPanel
|
||||
AnchorSideBottom.Side = asrBottom
|
||||
Left = 149
|
||||
Left = 157
|
||||
Height = 25
|
||||
Top = 463
|
||||
Top = 494
|
||||
Width = 76
|
||||
Anchors = [akRight, akBottom]
|
||||
AutoSize = True
|
||||
@ -56,16 +56,16 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
BorderSpacing.Right = 8
|
||||
Caption = 'Compute'
|
||||
OnClick = ComputeBtnClick
|
||||
TabOrder = 1
|
||||
TabOrder = 8
|
||||
end
|
||||
object CloseBtn: TButton
|
||||
AnchorSideRight.Control = ParamsPanel
|
||||
AnchorSideRight.Side = asrBottom
|
||||
AnchorSideBottom.Control = ParamsPanel
|
||||
AnchorSideBottom.Side = asrBottom
|
||||
Left = 233
|
||||
Left = 241
|
||||
Height = 25
|
||||
Top = 463
|
||||
Top = 494
|
||||
Width = 55
|
||||
Anchors = [akRight, akBottom]
|
||||
AutoSize = True
|
||||
@ -74,7 +74,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
Caption = 'Close'
|
||||
ModalResult = 11
|
||||
OnClick = CloseBtnClick
|
||||
TabOrder = 2
|
||||
TabOrder = 9
|
||||
end
|
||||
object Bevel1: TBevel
|
||||
AnchorSideLeft.Control = ParamsPanel
|
||||
@ -83,8 +83,8 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideBottom.Control = ResetBtn
|
||||
Left = 0
|
||||
Height = 8
|
||||
Top = 447
|
||||
Width = 288
|
||||
Top = 478
|
||||
Width = 296
|
||||
Anchors = [akLeft, akRight, akBottom]
|
||||
Shape = bsBottomLine
|
||||
end
|
||||
@ -92,29 +92,31 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideLeft.Control = ParamsPanel
|
||||
AnchorSideBottom.Control = Bevel1
|
||||
Left = 0
|
||||
Height = 83
|
||||
Top = 364
|
||||
Width = 298
|
||||
Height = 110
|
||||
Top = 368
|
||||
Width = 294
|
||||
Anchors = [akLeft, akBottom]
|
||||
AutoSize = True
|
||||
BorderSpacing.Top = 8
|
||||
Caption = 'Option:'
|
||||
ClientHeight = 63
|
||||
ClientWidth = 294
|
||||
TabOrder = 3
|
||||
Caption = 'Options'
|
||||
ClientHeight = 90
|
||||
ClientWidth = 290
|
||||
TabOrder = 6
|
||||
object BothChk: TCheckBox
|
||||
AnchorSideLeft.Control = OptionsGroup
|
||||
AnchorSideTop.Control = OptionsGroup
|
||||
AnchorSideTop.Control = NoIntervalsEdit
|
||||
AnchorSideTop.Side = asrBottom
|
||||
Left = 16
|
||||
Height = 19
|
||||
Top = 6
|
||||
Top = 33
|
||||
Width = 266
|
||||
AutoSize = False
|
||||
BorderSpacing.Left = 16
|
||||
BorderSpacing.Top = 6
|
||||
BorderSpacing.Right = 12
|
||||
BorderSpacing.Right = 8
|
||||
BorderSpacing.Bottom = 8
|
||||
Caption = 'Plot both frequency and cumulative frequency'
|
||||
TabOrder = 0
|
||||
TabOrder = 1
|
||||
end
|
||||
object BarPlotBtn: TSpeedButton
|
||||
AnchorSideLeft.Control = OptionsGroup
|
||||
@ -122,7 +124,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideTop.Side = asrBottom
|
||||
Left = 16
|
||||
Height = 22
|
||||
Top = 33
|
||||
Top = 60
|
||||
Width = 23
|
||||
BorderSpacing.Left = 16
|
||||
BorderSpacing.Bottom = 8
|
||||
@ -138,13 +140,43 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideTop.Side = asrCenter
|
||||
Left = 43
|
||||
Height = 22
|
||||
Top = 33
|
||||
Top = 60
|
||||
Width = 23
|
||||
BorderSpacing.Left = 4
|
||||
GroupIndex = 1
|
||||
Images = MainDataModule.ImageList
|
||||
ImageIndex = 10
|
||||
end
|
||||
object NoIntervalsEdit: TSpinEdit
|
||||
AnchorSideLeft.Control = NoIntervalsLabel
|
||||
AnchorSideLeft.Side = asrBottom
|
||||
AnchorSideTop.Control = OptionsGroup
|
||||
AnchorSideRight.Control = OptionsGroup
|
||||
AnchorSideRight.Side = asrBottom
|
||||
Left = 177
|
||||
Height = 23
|
||||
Top = 2
|
||||
Width = 66
|
||||
Alignment = taRightJustify
|
||||
BorderSpacing.Left = 16
|
||||
BorderSpacing.Top = 2
|
||||
BorderSpacing.Right = 12
|
||||
BorderSpacing.Bottom = 8
|
||||
MinValue = 1
|
||||
TabOrder = 0
|
||||
Value = 10
|
||||
end
|
||||
object NoIntervalsLabel: TLabel
|
||||
AnchorSideLeft.Control = BarPlotBtn
|
||||
AnchorSideTop.Control = NoIntervalsEdit
|
||||
AnchorSideTop.Side = asrCenter
|
||||
Left = 16
|
||||
Height = 15
|
||||
Top = 6
|
||||
Width = 145
|
||||
Caption = 'Approximate interval count'
|
||||
ParentColor = False
|
||||
end
|
||||
end
|
||||
object Label1: TLabel
|
||||
AnchorSideLeft.Control = ParamsPanel
|
||||
@ -163,9 +195,9 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideRight.Control = Var1InBtn
|
||||
AnchorSideBottom.Control = OptionsGroup
|
||||
Left = 0
|
||||
Height = 339
|
||||
Height = 343
|
||||
Top = 17
|
||||
Width = 99
|
||||
Width = 107
|
||||
Anchors = [akTop, akLeft, akRight, akBottom]
|
||||
BorderSpacing.Top = 2
|
||||
BorderSpacing.Right = 6
|
||||
@ -173,13 +205,13 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
ItemHeight = 0
|
||||
OnDblClick = VarListDblClick
|
||||
OnSelectionChange = VarListSelectionChange
|
||||
TabOrder = 4
|
||||
TabOrder = 0
|
||||
end
|
||||
object Var1InBtn: TBitBtn
|
||||
AnchorSideLeft.Control = CompareGroup
|
||||
AnchorSideTop.Control = VarList
|
||||
AnchorSideRight.Control = VarOneEdit
|
||||
Left = 105
|
||||
Left = 113
|
||||
Height = 26
|
||||
Top = 17
|
||||
Width = 26
|
||||
@ -189,13 +221,13 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
ImageIndex = 1
|
||||
OnClick = Var1InBtnClick
|
||||
Spacing = 0
|
||||
TabOrder = 5
|
||||
TabOrder = 1
|
||||
end
|
||||
object Var1OutBtn: TBitBtn
|
||||
AnchorSideLeft.Control = Var1InBtn
|
||||
AnchorSideTop.Control = Var1InBtn
|
||||
AnchorSideTop.Side = asrBottom
|
||||
Left = 105
|
||||
Left = 113
|
||||
Height = 26
|
||||
Top = 47
|
||||
Width = 26
|
||||
@ -203,12 +235,12 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
ImageIndex = 0
|
||||
OnClick = Var1OutBtnClick
|
||||
Spacing = 0
|
||||
TabOrder = 6
|
||||
TabOrder = 2
|
||||
end
|
||||
object Label2: TLabel
|
||||
AnchorSideLeft.Control = VarOneEdit
|
||||
AnchorSideBottom.Control = VarOneEdit
|
||||
Left = 139
|
||||
Left = 147
|
||||
Height = 15
|
||||
Top = 21
|
||||
Width = 66
|
||||
@ -224,7 +256,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideRight.Side = asrBottom
|
||||
AnchorSideBottom.Control = Var1OutBtn
|
||||
AnchorSideBottom.Side = asrBottom
|
||||
Left = 139
|
||||
Left = 147
|
||||
Height = 23
|
||||
Top = 38
|
||||
Width = 149
|
||||
@ -232,7 +264,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
BorderSpacing.Left = 8
|
||||
BorderSpacing.Bottom = 12
|
||||
ReadOnly = True
|
||||
TabOrder = 7
|
||||
TabOrder = 3
|
||||
Text = 'VarOneEdit'
|
||||
end
|
||||
object CompareGroup: TRadioGroup
|
||||
@ -241,7 +273,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideTop.Side = asrBottom
|
||||
AnchorSideRight.Control = ParamsPanel
|
||||
AnchorSideRight.Side = asrBottom
|
||||
Left = 105
|
||||
Left = 113
|
||||
Height = 76
|
||||
Top = 81
|
||||
Width = 183
|
||||
@ -268,7 +300,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
'Another Variable'
|
||||
)
|
||||
OnClick = CompareGroupClick
|
||||
TabOrder = 8
|
||||
TabOrder = 4
|
||||
end
|
||||
object Notebook: TNotebook
|
||||
AnchorSideLeft.Control = CompareGroup
|
||||
@ -276,7 +308,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AnchorSideTop.Side = asrBottom
|
||||
AnchorSideRight.Control = CompareGroup
|
||||
AnchorSideRight.Side = asrBottom
|
||||
Left = 105
|
||||
Left = 113
|
||||
Height = 191
|
||||
Top = 165
|
||||
Width = 183
|
||||
@ -284,7 +316,7 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
AutoSize = True
|
||||
Anchors = [akTop, akLeft, akRight]
|
||||
BorderSpacing.Bottom = 8
|
||||
TabOrder = 9
|
||||
TabOrder = 5
|
||||
object TheoreticalDistPage: TPage
|
||||
object DistGroup: TGroupBox
|
||||
AnchorSideLeft.Control = TheoreticalDistPage
|
||||
@ -491,10 +523,10 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
end
|
||||
end
|
||||
object PageControl1: TPageControl[1]
|
||||
Left = 309
|
||||
Height = 488
|
||||
Left = 317
|
||||
Height = 519
|
||||
Top = 8
|
||||
Width = 607
|
||||
Width = 599
|
||||
ActivePage = CumFreqChartPage
|
||||
Align = alClient
|
||||
BorderSpacing.Left = 4
|
||||
@ -514,8 +546,8 @@ inherited CompareDistFrm: TCompareDistFrm
|
||||
end
|
||||
end
|
||||
object ParamsSplitter: TSplitter[2]
|
||||
Left = 300
|
||||
Height = 504
|
||||
Left = 308
|
||||
Height = 535
|
||||
Top = 0
|
||||
Width = 5
|
||||
ResizeStyle = rsPattern
|
||||
|
@ -8,7 +8,7 @@ interface
|
||||
|
||||
uses
|
||||
Classes, SysUtils, FileUtil, LResources, Forms, Controls, Graphics, Dialogs,
|
||||
StdCtrls, Buttons, ExtCtrls, ComCtrls,
|
||||
StdCtrls, Buttons, ExtCtrls, ComCtrls, Spin,
|
||||
FunctionsLib, Globals, GraphLib, DataProcs, MainDM, MainUnit,
|
||||
BasicStatsFormUnit, ReportFrameUnit, ChartFrameUnit;
|
||||
|
||||
@ -25,6 +25,8 @@ type
|
||||
DistGroup: TGroupBox;
|
||||
DF1Label: TLabel;
|
||||
DF2Label: TLabel;
|
||||
NoIntervalsEdit: TSpinEdit;
|
||||
NoIntervalsLabel: TLabel;
|
||||
PageControl1: TPageControl;
|
||||
ParamsSplitter: TSplitter;
|
||||
NormalDistChk: TRadioButton;
|
||||
@ -79,12 +81,17 @@ type
|
||||
CompareTo: TCompareTo;
|
||||
CompareDist: TCompareDist;
|
||||
procedure CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
|
||||
AMin, AMax: Double; ANumIntervals, ANumCases: Integer; DF1: Integer = -1;
|
||||
DF2: Integer = -1);
|
||||
AMin, AIncrement: Double; ANumIntervals, AColIndex: Integer);
|
||||
procedure CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
|
||||
AMin, AMax: Double; ANumIntervals, ANumCases: Integer;
|
||||
ACompareDist: TCompareDist; DF1: Integer = -1; DF2: Integer = -1);
|
||||
procedure CalcIntervals(var AMin, AMax, AIntervalsize: Double;
|
||||
out ANumIntervals: Integer);
|
||||
function CalcMinMax(out AMin, AMax: Double; AColIndex: Integer): Integer;
|
||||
procedure CalcTheoreticalDist(XValues, FreqValues, CumFreqValues: DblDyneVec;
|
||||
ANumIntervals, ANumCases: Integer; out AName: String);
|
||||
ANumIntervals, ANumCases: Integer; ACompareDist: TCompareDist; out AName: String);
|
||||
procedure Plot(AChartFrame: TChartFrame; Y1Values, Y2Values: DblDyneVec;
|
||||
AName1, AName2: String);
|
||||
ASeriesTitle1, ASeriesTitle2, AYTitle, ATitle: String);
|
||||
procedure UpdateBtnStates;
|
||||
procedure UpdateDF1;
|
||||
function Validate(ANumCases: Integer;
|
||||
@ -111,9 +118,39 @@ uses
|
||||
|
||||
{ TCompareDistFrm }
|
||||
|
||||
{ Get frequency and cumulative frequency of cases in each interval }
|
||||
procedure TCompareDistFrm.CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
|
||||
AMin, AMax: Double; ANumIntervals, ANumCases: Integer; DF1: Integer = -1;
|
||||
DF2: Integer = -1);
|
||||
AMin, AIncrement: Double; ANumIntervals, AColIndex: Integer);
|
||||
var
|
||||
j, k: Integer;
|
||||
value: Double;
|
||||
begin
|
||||
// Get border points of the intervals
|
||||
for j := 0 to ANumIntervals do // no "-1" because last point is needed
|
||||
XValues[j] := AMin + j*AIncrement;
|
||||
|
||||
// count values in these intervals
|
||||
for j := 1 to NoCases do
|
||||
begin
|
||||
if not ValidValue(j, AColIndex) then continue;
|
||||
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, j]);
|
||||
for k := 0 to ANumIntervals-1 do
|
||||
begin
|
||||
if (value >= XValues[k]) and (value < XValues[k+1]) then
|
||||
FreqValues[k] := FreqValues[k] + 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
// Calculate cumulative frequencies
|
||||
CumFreqValues[0] := FreqValues[0];
|
||||
for j := 1 to ANumIntervals-1 do
|
||||
CumFreqValues[j] := CumFreqValues[j-1] + FreqValues[j];
|
||||
end;
|
||||
|
||||
{ Calculate frequencies and cumulative frequencies for a theoretical distribution }
|
||||
procedure TCompareDistFrm.CalcFreq(XValues, FreqValues, CumFreqValues: DblDyneVec;
|
||||
AMin, AMax: Double; ANumIntervals, ANumCases: Integer; ACompareDist: TCompareDist;
|
||||
DF1: Integer = -1; DF2: Integer = -1);
|
||||
var
|
||||
dx: Double;
|
||||
i: Integer;
|
||||
@ -129,7 +166,7 @@ begin
|
||||
XValues[i] := AMin + i * dx;
|
||||
|
||||
for i := 0 to ANumIntervals - 1 do
|
||||
case CompareDist of
|
||||
case ACompareDist of
|
||||
cd_Normal:
|
||||
Calc(NormalDist(XValues[i]), NormalDist(XValues[i+1]));
|
||||
cd_t:
|
||||
@ -140,7 +177,6 @@ begin
|
||||
Calc(ProbF(XValues[i], DF1, DF2), ProbF(XValues[i+1], DF1, DF2));
|
||||
cd_Poisson:
|
||||
Calc(PoissonCDF(round(XValues[i]), DF1), PoissonCDF(round(XValues[i+1]), DF1));
|
||||
// Calc(PoissonPDF(round(XValues[i]), DF1), 0);
|
||||
end;
|
||||
|
||||
CumFreqValues[0] := FreqValues[0];
|
||||
@ -149,8 +185,49 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
procedure TCompareDistFrm.CalcIntervals(var AMin, AMax, AIntervalsize: Double;
|
||||
out ANumIntervals: Integer);
|
||||
var
|
||||
intervalSize: Double;
|
||||
m: Double;
|
||||
e: Integer;
|
||||
begin
|
||||
intervalSize := (AMax - AMin) / NoIntervalsEdit.Value;
|
||||
if intervalSize = 0 then intervalSize := 1;
|
||||
MantisseAndExponent(intervalSize, m, e);
|
||||
m := round(m);
|
||||
AIntervalSize := m * IntPower(10, e);
|
||||
AMin := floor(AMin / AIntervalSize) * AIntervalSize;
|
||||
AMax := ceil(AMax / AIntervalSize) * AIntervalSize;
|
||||
ANumIntervals := round((AMax - AMin) / AIntervalSize);
|
||||
end;
|
||||
|
||||
|
||||
{ Calculates minimum and maximum of the values in the given column.
|
||||
Also, counts the valid numbers in this column and returns it as function result }
|
||||
function TCompareDistFrm.CalcMinMax(out AMin, AMax: Double; AColIndex: Integer): Integer;
|
||||
var
|
||||
j: Integer;
|
||||
value: Double;
|
||||
numCases: Integer;
|
||||
begin
|
||||
AMin := Infinity;
|
||||
AMax := -Infinity;
|
||||
numCases := 0;
|
||||
for j := 1 to NoCases do
|
||||
begin
|
||||
if not ValidValue(j, AColIndex) then continue;
|
||||
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, j]);
|
||||
if value > AMax then AMax := value;
|
||||
if value < AMin then AMin := value;
|
||||
inc(numCases);
|
||||
end;
|
||||
Result := numCases;
|
||||
end;
|
||||
|
||||
|
||||
procedure TCompareDistFrm.CalcTheoreticalDist(XValues, FreqValues, CumFreqValues: DblDyneVec;
|
||||
ANumIntervals, ANumCases: Integer; out AName: String);
|
||||
ANumIntervals, ANumCases: Integer; ACompareDist: TCompareDist; out AName: String);
|
||||
var
|
||||
min, max: Double;
|
||||
DF1: Integer = -1;
|
||||
@ -162,7 +239,7 @@ begin
|
||||
if TryStrToFloat(DF2Edit.Text, a) then
|
||||
DF2 := round(a);
|
||||
|
||||
case CompareDist of
|
||||
case ACompareDist of
|
||||
cd_Normal:
|
||||
begin
|
||||
min := -3.0;
|
||||
@ -191,7 +268,7 @@ begin
|
||||
; // will be handled separately
|
||||
end;
|
||||
|
||||
CalcFreq(XValues, FreqValues, CumFreqValues, min, max, ANumIntervals, ANumCases, DF1, DF2);
|
||||
CalcFreq(XValues, FreqValues, CumFreqValues, min, max, ANumIntervals, ANumCases, ACompareDist, DF1, DF2);
|
||||
end;
|
||||
|
||||
|
||||
@ -245,21 +322,15 @@ var
|
||||
xValue2: DblDyneVec = nil;
|
||||
cumfreq1: DblDyneVec = nil;
|
||||
cumfreq2: DblDyneVec = nil;
|
||||
i, j, k, col1, col2, nCases, noInts: integer;
|
||||
min1, max1, min2, max2, range1, range2, value: double;
|
||||
incrSize1, incrSize2, {%H-}KS: double;
|
||||
cellVal, name1, name2: string;
|
||||
msg: String;
|
||||
i, col1, col2, nCases, noInts: integer;
|
||||
min1, max1, min2, max2: double;
|
||||
incrSize1: Double = 0.0;
|
||||
incrSize2: Double = 0.0;
|
||||
KS: double;
|
||||
cellVal, name1, name2, msg: string;
|
||||
C: TWinControl;
|
||||
lReport: TStrings;
|
||||
begin
|
||||
SetLength(var1Freq, NoCases + 1);
|
||||
SetLength(var2Freq, NoCases + 1);
|
||||
SetLength(xValue1, NoCases + 1);
|
||||
SetLength(xValue2, NoCases + 1);
|
||||
SetLength(cumfreq1, NoCases + 1);
|
||||
SetLength(cumfreq2, NoCases + 1);
|
||||
|
||||
// Get columns of the variables
|
||||
col1 := 0;
|
||||
for i := 1 to NoVariables do
|
||||
@ -289,255 +360,55 @@ begin
|
||||
end;
|
||||
|
||||
// Get min and max values for variable in col1, as well as true number of cases
|
||||
min1 := Infinity;
|
||||
max1 := -Infinity;
|
||||
nCases := 0;
|
||||
for j := 1 to NoCases do
|
||||
begin
|
||||
if not ValidValue(j, col1) then continue;
|
||||
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col1, j]);
|
||||
if value > max1 then max1 := value;
|
||||
if value < min1 then min1 := value;
|
||||
inc(nCases);
|
||||
end;
|
||||
nCases := CalcMinMax(min1, max1, col1);
|
||||
|
||||
// Validate
|
||||
// Validate edit controls
|
||||
if not Validate(nCases, msg, C) then begin
|
||||
C.SetFocus;
|
||||
ErrorMsg(msg);
|
||||
end;
|
||||
|
||||
// Get number of intervals
|
||||
noInts := NoCases - 1; // wp: why NoCases here, and not nCases?
|
||||
if noInts > 20 then noints := 20;
|
||||
CalcIntervals(min1, max1, incrSize1, noInts);
|
||||
|
||||
range1 := max1 - min1 + 1.0;
|
||||
incrSize1 := range1 / noInts;
|
||||
name1 := VarOneEdit.Text;
|
||||
// Get mem for the arrays
|
||||
SetLength(var1Freq, noInts);
|
||||
Setlength(cumFreq1, noInts);
|
||||
SetLength(xValue1, noInts + 1); // Border points of the intervals, one more than intervals
|
||||
|
||||
SetLength(var2Freq, noInts);
|
||||
SetLength(cumFreq2, noInts);
|
||||
Setlength(xValue2, noInts + 1);
|
||||
|
||||
// Repeat for variable 2 (if Compare To is selected as "Another variable")
|
||||
if CompareTo = ctVariable then
|
||||
begin
|
||||
min2 := Infinity;
|
||||
max2 := -Infinity;
|
||||
for j := 1 to NoCases do
|
||||
begin
|
||||
if not ValidValue(j, col2) then continue;
|
||||
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col2, j]);
|
||||
if value > max2 then max2 := value;
|
||||
if value < min2 then min2 := value;
|
||||
end;
|
||||
range2 := max2 - min2 + 1.0;
|
||||
incrSize2 := range2 / noInts;
|
||||
CalcMinMax(min2, max2, col2);
|
||||
incrSize2 := (max2 - min2) / noInts;
|
||||
name2 := VarTwoEdit.Text;
|
||||
end;
|
||||
|
||||
// Get frequency of cases in each interval
|
||||
for j := 1 to noints+1 do
|
||||
var1Freq[j-1] := 0;
|
||||
|
||||
for j := 1 to NoCases do
|
||||
begin
|
||||
if not ValidValue(j, col1) then continue;
|
||||
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col1, j]);
|
||||
for k := 1 to noInts do
|
||||
begin
|
||||
if (value >= min1 + (k-1) * incrSize1) and (value < min1 + k * incrSize1) then
|
||||
var1Freq[k-1] := var1Freq[k-1] + 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
cumFreq1[0] := var1Freq[0];
|
||||
for j := 1 to noInts+1 do
|
||||
xValue1[j-1] := min1 + (j-1) * incrSize1;
|
||||
for j := 1 to noInts do
|
||||
cumFreq1[j] := cumFreq1[j-1] + var1Freq[j];
|
||||
CalcFreq(xValue1, var1Freq, cumFreq1, min1, incrSize1, noInts, col1);
|
||||
|
||||
// Repeat for 2nd variable, if required
|
||||
if CompareTo = ctVariable then
|
||||
begin
|
||||
for j := 1 to noInts+1 do
|
||||
var2Freq[j-1] := 0;
|
||||
|
||||
for j := 1 to NoCases do
|
||||
begin
|
||||
if not ValidValue(j, col2) then continue;
|
||||
value := StrToFloat(OS3MainFrm.DataGrid.Cells[col2, j]);
|
||||
for k := 1 to noInts do
|
||||
begin
|
||||
if (value >= min2 + (k-1) * incrsize2) and (value < min2 + k * incrsize2) then
|
||||
var2Freq[k-1] := var2Freq[k-1] + 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
cumfreq2[0] := var2Freq[0];
|
||||
for j := 1 to noInts+1 do
|
||||
xValue2[j-1] := min2 + (j-1) * incrSize2;
|
||||
for j := 1 to noInts do
|
||||
cumFreq2[j] := cumFreq2[j-1] + var2Freq[j];
|
||||
end;
|
||||
CalcFreq(xValue2, var2Freq, cumFreq2, min2, incrSize2, noInts, col2);
|
||||
|
||||
// Get theoretical distribution frequencies for selected distribution, if required.
|
||||
if CompareDist = cd_Poisson then
|
||||
begin
|
||||
CalcFreq(xValue2, var2Freq, cumFreq2, min1, min2, noInts, nCases, StrToInt(DF1Edit.Text));
|
||||
name2 := 'Poisson';
|
||||
end
|
||||
else
|
||||
CalcTheoreticalDist(xValue2, var2Freq, cumFreq2, noInts, nCases, name2);
|
||||
|
||||
(*
|
||||
|
||||
|
||||
if CompareTo = ctTheoreticalDistrib then
|
||||
begin
|
||||
if NormalDistChk.Checked then // normal distribution curve
|
||||
begin
|
||||
name2 := 'Normal';
|
||||
min2 := -3.0;
|
||||
max2 := 3.0;
|
||||
range2 := max2 - min2;
|
||||
incrsize2 := range2 / noints;
|
||||
Xvalue2[0] := min2;
|
||||
Xvalue2[noints] := max2;
|
||||
for i := 1 to noInts do
|
||||
begin
|
||||
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
|
||||
Xvalue2[i] := min2 + (i) * incrSize2;
|
||||
prob1 := probz(abs(Xvalue2[i-1]));
|
||||
prob2 := probz(abs(Xvalue2[i]));
|
||||
if prob1 > prob2 then
|
||||
Var2Freq[i-1] := round((prob1 - prob2) * nCases)
|
||||
else
|
||||
Var2Freq[i-1] := round((prob2 - prob1) * nCases)
|
||||
end;
|
||||
Cumfreq2[0] := Var2Freq[0];
|
||||
for i := 1 to noints do
|
||||
Cumfreq2[i] := Cumfreq2[i-1] + Var2Freq[i];
|
||||
end
|
||||
else
|
||||
if tDistChk.Checked then // t-distribution
|
||||
begin
|
||||
name2 := 't-Dist.';
|
||||
min2 := -3.0;
|
||||
max2 := 3.0;
|
||||
df1 := nCases - 1;
|
||||
range2 := max2 - min2;
|
||||
incrsize2 := range2 / noints;
|
||||
Xvalue2[0] := min2;
|
||||
Xvalue2[noints] := max2;
|
||||
for i := 1 to noInts do
|
||||
begin
|
||||
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
|
||||
Xvalue2[i] := min2 + (i) * incrSize2;
|
||||
prob1 := 0.5 * probt(Xvalue2[i-1],df1);
|
||||
prob2 := 0.5 * probt(Xvalue2[i],df1);
|
||||
if prob1 > prob2 then
|
||||
Var2Freq[i-1] := round((prob1-prob2) * nCases)
|
||||
else
|
||||
Var2Freq[i-1] := round((prob2-prob1) * nCases)
|
||||
end;
|
||||
Cumfreq2[0] := Var2Freq[0];
|
||||
for i := 1 to noInts do
|
||||
Cumfreq2[i] := Cumfreq2[i-1] + Var2Freq[i];
|
||||
end
|
||||
else
|
||||
if ChiSqDistChk.Checked then // chi squared distribution
|
||||
begin
|
||||
cellval := InputBox('Deg. Freedom 1 Entry','DF 1','');
|
||||
df1 := StrToInt(cellval);
|
||||
name2 := 'Chi Sq';
|
||||
min2 := 0.0;
|
||||
max2 := 20.0;
|
||||
range2 := max2 - min2;
|
||||
incrSize2 := range2 / noInts;
|
||||
xValue2[0] := min2;
|
||||
xValue2[noInts] := max2;
|
||||
for i := 1 to noints do
|
||||
begin
|
||||
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
|
||||
Xvalue2[i] := min2 + (i) * incrSize2;
|
||||
prob1 := chisquaredprob(Xvalue2[i-1],df1);
|
||||
prob2 := chisquaredprob(Xvalue2[i],df1);
|
||||
if prob1 > prob2 then
|
||||
Var2Freq[i-1] := round((prob1-prob2) * nCases)
|
||||
else
|
||||
Var2Freq[i-1] := round((prob2-prob1) * nCases)
|
||||
end;
|
||||
cumfreq2[0] := var2Freq[0];
|
||||
for i := 1 to noints do
|
||||
cumfreq2[i] := cumfreq2[i-1] + var2Freq[i];
|
||||
end
|
||||
else
|
||||
if FDistChk.Checked then // F distribution
|
||||
begin
|
||||
// get degrees of freedom
|
||||
cellval := InputBox('Deg. Freedom 1 Entry','DF 1','');
|
||||
df1 := StrToInt(cellval);
|
||||
cellval := InputBox('Deg. Freedom 2 Entry','DF 2','');
|
||||
df2 := StrToInt(cellval);
|
||||
name2 := 'F Dist.';
|
||||
min2 := 0.0;
|
||||
max2 := 3.0;
|
||||
range2 := max2 - min2;
|
||||
incrsize2 := range2 / noints;
|
||||
Xvalue2[0] := min2;
|
||||
Xvalue2[noints] := max2;
|
||||
for i := 1 to noInts do
|
||||
begin
|
||||
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
|
||||
Xvalue2[i] := min2 + (i) * incrSize2;
|
||||
prob1 := ProbF(Xvalue2[i-1],df1,df2);
|
||||
prob2 := ProbF(Xvalue2[i],df1,df2);
|
||||
if prob1 > prob2 then
|
||||
Var2Freq[i-1] := round((prob1-prob2) * nCases)
|
||||
else
|
||||
Var2Freq[i-1] := round((prob2-prob1) * nCases)
|
||||
end;
|
||||
cumfreq2[0] := Var2Freq[0];
|
||||
for i := 1 to noInts do
|
||||
cumfreq2[i] := cumfreq2[i-1] + var2Freq[i];
|
||||
end
|
||||
else
|
||||
if PoissonDistChk.Checked then // Poisson distribution
|
||||
if CompareDist = cd_Poisson then
|
||||
begin
|
||||
CalcFreq(xValue2, var2Freq, cumFreq2, min1, min2, noInts, nCases, compareDist, StrToInt(DF1Edit.Text));
|
||||
name2 := 'Poisson';
|
||||
mean := 0; // use as parameter a in pdf call
|
||||
min2 := min1;
|
||||
max2 := max1;
|
||||
if max2 > 13 then
|
||||
begin
|
||||
ErrorMsg('Value > 13 found. Factorial too large - exiting.');
|
||||
exit;
|
||||
end;
|
||||
for i := 1 to nCases do
|
||||
mean := mean + StrToFloat(OS3MainFrm.DataGrid.Cells[col1, i]);
|
||||
mean := mean / nCases;
|
||||
cellval := IntToStr(round(mean));
|
||||
cellval := InputBox('Parameter Entry (mean)', 'DF 1', cellval);
|
||||
degfree := StrToFloat(cellval);
|
||||
range2 := max2 - min2;
|
||||
incrsize2 := range2 / noInts;
|
||||
// Xvalue2[0] := min2;
|
||||
Xvalue2[noints] := max2;
|
||||
for i := 1 to noints do
|
||||
begin
|
||||
Xvalue2[i-1] := min2 + (i-1) * incrSize2;
|
||||
Xvalue2[i] := min2 + (i) * incrSize2;
|
||||
poisson_pdf ( round(Xvalue2[i-1]), degfree, prob1 );
|
||||
// prob1 := (Xvalue2[i-1],df1);
|
||||
// prob2 := chisquaredprob(Xvalue2[i],df1);
|
||||
// if prob1 > prob2 then
|
||||
Var2Freq[i-1] := round((prob1) * nCases);
|
||||
// else Var2Freq[i-1] := round((prob2-prob1) * Ncases)
|
||||
end;
|
||||
cumfreq2[0] := var2Freq[0];
|
||||
for i := 1 to noInts do
|
||||
cumfreq2[i] := cumfreq2[i-1] + var2Freq[i];
|
||||
end;
|
||||
end
|
||||
else
|
||||
CalcTheoreticalDist(xValue2, var2Freq, cumFreq2, noInts, nCases, compareDist, name2);
|
||||
end;
|
||||
name1 := VarOneEdit.Text;
|
||||
|
||||
*)
|
||||
// Print distributions to report
|
||||
lReport := TStringList.Create;
|
||||
try
|
||||
lReport.Add('DISTRIBUTION COMPARISON by Bill Miller');
|
||||
@ -554,116 +425,24 @@ begin
|
||||
XValue1[i-1], Var1Freq[i-1], Cumfreq1[i-1], XValue2[i-1], Var2Freq[i-1], Cumfreq2[i-1]
|
||||
]);
|
||||
lReport.Add('');
|
||||
cellval := 'D';
|
||||
KS := KolmogorovTest(noInts, Cumfreq1, noInts, Cumfreq2, cellVal, lReport);
|
||||
// lReport.Add('Kolmogorov-Smirnov statistic := %5.3f', [KS]);
|
||||
KS := KolmogorovTest(noInts, Cumfreq1, noInts, Cumfreq2, 'D', lReport);
|
||||
lReport.Add('Kolmogorov-Smirnov statistic: %5.3f', [KS]);
|
||||
|
||||
FReportFrame.DisplayReport(lReport);
|
||||
finally
|
||||
lReport.Free;
|
||||
end;
|
||||
|
||||
// plot the cdfs
|
||||
Plot(FCumFreqChartFrame, cumFreq1, cumFreq2, VarOneEdit.Text, name2);
|
||||
// Plot the cumulative distributions
|
||||
Plot(FCumFreqChartFrame, cumFreq1, cumFreq2, VarOneEdit.Text, name2,
|
||||
'Cumulative Frequency', 'Plot of Cumulative Distributions');
|
||||
|
||||
// Plot the frequency distrigutions, if requested.
|
||||
if BothChk.Checked then
|
||||
Plot(FFreqChartFrame, var1Freq, var2Freq, VarOneEdit.Text, name2);
|
||||
Plot(FFreqChartFrame, var1Freq, var2Freq, VarOneEdit.Text, name2,
|
||||
'Frequency', 'Plot of Distributions');
|
||||
|
||||
FreqChartPage.TabVisible := BothChk.Checked;
|
||||
|
||||
(*
|
||||
|
||||
|
||||
|
||||
xtitle := 'Red = ' + VarOneEdit.Text + ' Blue = ' + name2;
|
||||
cellval := 'Plot of Cumulative Distributions';
|
||||
{
|
||||
if LinesChk.Checked then
|
||||
GraphFrm.barwideprop := 1.0
|
||||
else
|
||||
GraphFrm.barwideprop := 0.5;
|
||||
}
|
||||
GraphFrm.nosets := 2;
|
||||
GraphFrm.nbars := noints+1;
|
||||
GraphFrm.Heading := cellval;
|
||||
GraphFrm.XTitle := xtitle;
|
||||
GraphFrm.YTitle := 'Frequency';
|
||||
SetLength(GraphFrm.Ypoints,2,noints+1);
|
||||
SetLength(GraphFrm.Xpoints,1,noints+1);
|
||||
for k := 1 to noints+1 do
|
||||
begin
|
||||
GraphFrm.Ypoints[0,k-1] := Cumfreq1[k-1];
|
||||
GraphFrm.Ypoints[1,k-1] := CumFreq2[k-1];
|
||||
GraphFrm.Xpoints[0,k-1] := k;
|
||||
end;
|
||||
GraphFrm.AutoScaled := true;
|
||||
{
|
||||
if LinesChk.Checked then
|
||||
GraphFrm.GraphType := 6 // 3d lines
|
||||
else
|
||||
GraphFrm.GraphType := 8; // 3D points
|
||||
}
|
||||
GraphFrm.BackColor := clYellow;
|
||||
GraphFrm.WallColor := clBlue;
|
||||
GraphFrm.FloorColor := clGray;
|
||||
GraphFrm.ShowLeftWall := true;
|
||||
GraphFrm.ShowRightWall := true;
|
||||
GraphFrm.ShowBottomWall := true;
|
||||
GraphFrm.ShowBackWall := true;
|
||||
GraphFrm.ShowModal;
|
||||
GraphFrm.Xpoints := nil;
|
||||
GraphFrm.Ypoints := nil;
|
||||
|
||||
if BothChk.Checked then // plot the frequencies
|
||||
begin
|
||||
xtitle := 'Red = ' + VarOneEdit.Text + ' Blue = ' + name2;
|
||||
cellval := 'Plot of Cumulative Distributions';
|
||||
{
|
||||
if LinesChk.Checked then
|
||||
GraphFrm.BarWideProp := 1.0
|
||||
else
|
||||
GraphFrm.BarWideProp := 0.5;
|
||||
}
|
||||
GraphFrm.nosets := 2;
|
||||
GraphFrm.nbars := noints+1;
|
||||
GraphFrm.Heading := cellval;
|
||||
GraphFrm.XTitle := xtitle;
|
||||
GraphFrm.YTitle := 'Frequency';
|
||||
SetLength(GraphFrm.Ypoints,2,noints+1);
|
||||
SetLength(GraphFrm.Xpoints,1,noints+1);
|
||||
for k := 1 to noints+1 do
|
||||
begin
|
||||
GraphFrm.Ypoints[0,k-1] := Var1Freq[k-1];
|
||||
GraphFrm.Ypoints[1,k-1] := Var2Freq[k-1];
|
||||
GraphFrm.Xpoints[0,k-1] := k;
|
||||
end;
|
||||
GraphFrm.AutoScaled := true;
|
||||
{
|
||||
if LinesChk.Checked then
|
||||
GraphFrm.GraphType := 6 // 3d lines
|
||||
else
|
||||
GraphFrm.GraphType := 8; // 3D points
|
||||
}
|
||||
GraphFrm.BackColor := clYellow;
|
||||
GraphFrm.WallColor := clBlue;
|
||||
GraphFrm.FloorColor := clGray;
|
||||
GraphFrm.ShowLeftWall := true;
|
||||
GraphFrm.ShowRightWall := true;
|
||||
GraphFrm.ShowBottomWall := true;
|
||||
GraphFrm.ShowBackWall := true;
|
||||
GraphFrm.ShowModal;
|
||||
GraphFrm.Xpoints := nil;
|
||||
GraphFrm.Ypoints := nil;
|
||||
end;
|
||||
*)
|
||||
|
||||
// clean up
|
||||
Cumfreq2 := nil;
|
||||
Cumfreq1 := nil;
|
||||
XValue1 := nil;
|
||||
XValue2 := nil;
|
||||
Var2Freq := nil;
|
||||
Var1Freq := nil;
|
||||
end;
|
||||
|
||||
|
||||
@ -766,20 +545,23 @@ end;
|
||||
|
||||
|
||||
procedure TCompareDistFrm.Plot(AChartFrame: TChartFrame; Y1Values, Y2Values: DblDyneVec;
|
||||
AName1, AName2: String);
|
||||
ASeriesTitle1, ASeriesTitle2, AYTitle, ATitle: String);
|
||||
var
|
||||
ser1, ser2: TChartSeries;
|
||||
plotType: TPlotType;
|
||||
begin
|
||||
AChartFrame.Clear;
|
||||
|
||||
AChartFrame.SetTitle(ATitle);
|
||||
AChartFrame.SetYTitle(AYTitle);
|
||||
|
||||
if BarPlotBtn.Down then
|
||||
plotType := ptBars
|
||||
else
|
||||
plotType := ptLines;
|
||||
|
||||
ser1 := AChartFrame.PlotXY(plotType, nil, Y1Values, nil, nil, AName1, DATA_COLORS[0]);
|
||||
ser2 := AChartFrame.PlotXY(plotType, nil, Y2Values, nil, nil, AName2, DATA_Colors[1]);
|
||||
ser1 := AChartFrame.PlotXY(plotType, nil, Y1Values, nil, nil, ASeriesTitle1, DATA_COLORS[0]);
|
||||
ser2 := AChartFrame.PlotXY(plotType, nil, Y2Values, nil, nil, ASeriesTitle2, DATA_Colors[1]);
|
||||
|
||||
if (ser1 is TBarSeries) then
|
||||
begin
|
||||
|
@ -65,7 +65,7 @@ function factorial(x : integer) : integer;
|
||||
implementation
|
||||
|
||||
uses
|
||||
MathUnit;
|
||||
Utils, MathUnit;
|
||||
|
||||
function chisquaredprob(X : double; k : integer) : double;
|
||||
var
|
||||
@ -1629,28 +1629,22 @@ begin
|
||||
end;
|
||||
Result := NoGood;
|
||||
end;
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
|
||||
{ split a value into the whole and fractional parts}
|
||||
function WholeValue(value : double) : double;
|
||||
{ split a value into the whole and fractional parts}
|
||||
VAR
|
||||
whole : double;
|
||||
begin
|
||||
whole := Floor(value);
|
||||
Result := whole;
|
||||
Result := Floor(value);
|
||||
end;
|
||||
//---------------------------------------------------------------------------
|
||||
function FractionValue(value : double) : double;
|
||||
{ split a value into the whole and fractional parts }
|
||||
VAR
|
||||
fraction : double;
|
||||
begin
|
||||
fraction := value - Floor(value);
|
||||
Result := fraction;
|
||||
end;
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
Function Quartiles(TypeQ : integer; pcntile : double; N : integer;
|
||||
{ split a value into the whole and fractional parts }
|
||||
function FractionValue(value : double) : double;
|
||||
begin
|
||||
Result := value - Floor(value);
|
||||
end;
|
||||
|
||||
|
||||
function Quartiles(TypeQ : integer; pcntile : double; N : integer;
|
||||
VAR values : DblDyneVec) : double;
|
||||
VAR
|
||||
whole, fraction, Myresult, np, avalue, avalue1 : double;
|
||||
@ -1768,88 +1762,91 @@ begin
|
||||
result := p;
|
||||
end;
|
||||
|
||||
function KolmogorovTest(na : integer; const a: DblDyneVec; nb: integer;
|
||||
{ Statistical test whether two one-dimensional sets of points are compatible
|
||||
with coming from the same parent distribution, using the Kolmogorov test.
|
||||
That is, it is used to compare two experimental distributions of unbinned data.
|
||||
|
||||
Input:
|
||||
a,b: One-dimensional arrays of length na, nb, respectively.
|
||||
The elements of a and b must be given in ascending order.
|
||||
option is a character string to specify options
|
||||
"D" Put out a line of "Debug" printout
|
||||
"M" Return the Maximum Kolmogorov distance instead of prob
|
||||
|
||||
Output:
|
||||
The returned value prob is a calculated confidence level which gives a
|
||||
statistical test for compatibility of a and b.
|
||||
Values of prob close to zero are taken as indicating a small probability
|
||||
of compatibility. For two point sets drawn randomly from the same parent
|
||||
distribution, the value of prob should be uniformly distributed between
|
||||
zero and one.
|
||||
|
||||
in case of error the function return -1
|
||||
|
||||
If the 2 sets have a different number of points, the minimum of
|
||||
the two sets is used.
|
||||
|
||||
Method:
|
||||
The Kolmogorov test is used. The test statistic is the maximum deviation
|
||||
between the two integrated distribution functions, multiplied by the
|
||||
normalizing factor (rdmax*sqrt(na*nb/(na+nb)).
|
||||
|
||||
Code adapted by Rene Brun from CERNLIB routine TKOLMO (Fred James)
|
||||
(W.T. Eadie, D. Drijard, F.E. James, M. Roos and B. Sadoulet,
|
||||
Statistical Methods in Experimental Physics, (North-Holland,
|
||||
Amsterdam 1971) 269-271)
|
||||
|
||||
Method Improvement by Jason A Detwiler (JADetwiler@lbl.gov)
|
||||
-----------------------------------------------------------
|
||||
The nuts-and-bolts of the TMath::KolmogorovTest() algorithm is a for-loop
|
||||
over the two sorted arrays a and b representing empirical distribution
|
||||
functions. The for-loop handles 3 cases: when the next points to be
|
||||
evaluated satisfy a>b, a<b, or a=b:
|
||||
|
||||
For the last case, a=b, the algorithm advances each array by one index in an
|
||||
attempt to move through the equality. However, this is incorrect when one or
|
||||
the other of a or b (or both) have a repeated value, call it x. For the KS
|
||||
statistic to be computed properly, rdiff needs to be calculated after all of
|
||||
the a and b at x have been tallied (this is due to the definition of the
|
||||
empirical distribution function; another way to convince yourself that the
|
||||
old CERNLIB method is wrong is that it implies that the function defined as the
|
||||
difference between a and b is multi-valued at x -- besides being ugly, this
|
||||
would invalidate Kolmogorov's theorem).
|
||||
|
||||
NOTE1
|
||||
A good description of the Kolmogorov test can be seen at:
|
||||
http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
|
||||
}
|
||||
function KolmogorovTest(na: integer; const a: DblDyneVec; nb: integer;
|
||||
const b: DblDyneVec; option: String; AReport: TStrings): double;
|
||||
VAR
|
||||
prob : double;
|
||||
opt : string;
|
||||
rna : double; // = na;
|
||||
rnb : double; // = nb;
|
||||
sa : double; // = 1./rna;
|
||||
sb : double; // = 1./rnb;
|
||||
rdiff, rdmax, x, z : double;
|
||||
i, ia, ib : integer;
|
||||
ok : boolean;
|
||||
var
|
||||
prob: double;
|
||||
opt: string;
|
||||
rna: double; // = na;
|
||||
rnb: double; // = nb;
|
||||
sa: double; // = 1./rna;
|
||||
sb: double; // = 1./rnb;
|
||||
rdiff, rdmax, x, z: double;
|
||||
i, ia, ib: integer;
|
||||
ok: boolean;
|
||||
begin
|
||||
// Statistical test whether two one-dimensional sets of points are compatible
|
||||
// with coming from the same parent distribution, using the Kolmogorov test.
|
||||
// That is, it is used to compare two experimental distributions of unbinned data.
|
||||
//
|
||||
// Input:
|
||||
// a,b: One-dimensional arrays of length na, nb, respectively.
|
||||
// The elements of a and b must be given in ascending order.
|
||||
// option is a character string to specify options
|
||||
// "D" Put out a line of "Debug" printout
|
||||
// "M" Return the Maximum Kolmogorov distance instead of prob
|
||||
//
|
||||
// Output:
|
||||
// The returned value prob is a calculated confidence level which gives a
|
||||
// statistical test for compatibility of a and b.
|
||||
// Values of prob close to zero are taken as indicating a small probability
|
||||
// of compatibility. For two point sets drawn randomly from the same parent
|
||||
// distribution, the value of prob should be uniformly distributed between
|
||||
// zero and one.
|
||||
//
|
||||
// in case of error the function return -1
|
||||
// If the 2 sets have a different number of points, the minimum of
|
||||
// the two sets is used.
|
||||
//
|
||||
// Method:
|
||||
// The Kolmogorov test is used. The test statistic is the maximum deviation
|
||||
// between the two integrated distribution functions, multiplied by the
|
||||
// normalizing factor (rdmax*sqrt(na*nb/(na+nb)).
|
||||
//
|
||||
// Code adapted by Rene Brun from CERNLIB routine TKOLMO (Fred James)
|
||||
// (W.T. Eadie, D. Drijard, F.E. James, M. Roos and B. Sadoulet,
|
||||
// Statistical Methods in Experimental Physics, (North-Holland,
|
||||
// Amsterdam 1971) 269-271)
|
||||
//
|
||||
// Method Improvement by Jason A Detwiler (JADetwiler@lbl.gov)
|
||||
// -----------------------------------------------------------
|
||||
// The nuts-and-bolts of the TMath::KolmogorovTest() algorithm is a for-loop
|
||||
// over the two sorted arrays a and b representing empirical distribution
|
||||
// functions. The for-loop handles 3 cases: when the next points to be
|
||||
// evaluated satisfy a>b, a<b, or a=b:
|
||||
// For the last case, a=b, the algorithm advances each array by one index in an
|
||||
// attempt to move through the equality. However, this is incorrect when one or
|
||||
// the other of a or b (or both) have a repeated value, call it x. For the KS
|
||||
// statistic to be computed properly, rdiff needs to be calculated after all of
|
||||
// the a and b at x have been tallied (this is due to the definition of the
|
||||
// empirical distribution function; another way to convince yourself that the
|
||||
// old CERNLIB method is wrong is that it implies that the function defined as the
|
||||
// difference between a and b is multi-valued at x -- besides being ugly, this
|
||||
// would invalidate Kolmogorov's theorem).
|
||||
//
|
||||
// NOTE1
|
||||
// A good description of the Kolmogorov test can be seen at:
|
||||
// http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
|
||||
|
||||
opt := option;
|
||||
// opt.ToUpper();
|
||||
|
||||
prob := -1;
|
||||
// Require at least two points in each graph
|
||||
|
||||
{ Require at least two points in each graph }
|
||||
if (na <= 2) or (nb <= 2) then
|
||||
begin
|
||||
ShowMessage('KolmogorovTest - Sets must have more than 2 points');
|
||||
ErrorMsg('KolmogorovTest - Sets must have more than 2 points');
|
||||
exit;
|
||||
end;
|
||||
// Constants needed
|
||||
|
||||
{ Constants needed }
|
||||
rna := na;
|
||||
rnb := nb;
|
||||
sa := 1./rna;
|
||||
sb := 1. / rnb;
|
||||
// Starting values for main loop
|
||||
|
||||
{ Starting values for main loop }
|
||||
if (a[0] < b[0]) then
|
||||
begin
|
||||
rdiff := -sa;
|
||||
@ -1864,9 +1861,9 @@ begin
|
||||
end;
|
||||
rdmax := Abs(rdiff);
|
||||
|
||||
// Main loop over point sets to find max distance
|
||||
// rdiff is the running difference, and rdmax the max.
|
||||
ok := FALSE;
|
||||
{ Main loop over point sets to find max distance
|
||||
rdiff is the running difference, and rdmax the max. }
|
||||
ok := false;
|
||||
for i := 0 to na + nb - 1 do
|
||||
begin
|
||||
if (a[ia-1] < b[ib-1]) then
|
||||
@ -1875,7 +1872,7 @@ begin
|
||||
ia := ia + 1;
|
||||
if (ia > na) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
end
|
||||
@ -1885,7 +1882,7 @@ begin
|
||||
ib := ib + 1;
|
||||
if (ib > nb) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
end
|
||||
@ -1904,37 +1901,37 @@ begin
|
||||
end;
|
||||
if (ia > na) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
if (ib > nb) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
end;
|
||||
rdmax := Max(rdmax,Abs(rdiff));
|
||||
rdmax := Max(rdmax, abs(rdiff));
|
||||
end;
|
||||
// Should never terminate this loop with ok = kFALSE!
|
||||
|
||||
if (ok) then
|
||||
{ Should never terminate this loop with ok = FALSE! }
|
||||
if ok then
|
||||
begin
|
||||
rdmax := Max(rdmax,Abs(rdiff));
|
||||
rdmax := Max(rdmax, abs(rdiff));
|
||||
z := rdmax * Sqrt(rna * rnb / (rna + rnb));
|
||||
prob := KolmogorovProb(z);
|
||||
end;
|
||||
|
||||
// debug printout
|
||||
{ debug printout }
|
||||
if (opt = 'D') then
|
||||
AReport.Add(' Kolmogorov Probability: %g, Max Dist: %g', [prob, rdmax]);
|
||||
if(opt = 'M') then
|
||||
result := rdmax
|
||||
AReport.Add(' Kolmogorov Probability: %.5f, Max Dist: %.5f', [prob, rdmax]);
|
||||
|
||||
if (opt = 'M') then
|
||||
Result := rdmax
|
||||
else
|
||||
result := prob;
|
||||
Result := prob;
|
||||
end;
|
||||
|
||||
(* wp: moved to MathUnit for easier testing
|
||||
|
||||
procedure poisson_cdf ( x : integer; a : double; VAR cdf : double );
|
||||
VAR
|
||||
i : integer;
|
||||
|
Reference in New Issue
Block a user