You've already forked lazarus-ccr
LazStats: Complete refactoring of CompareDistUnit.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7723 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -65,7 +65,7 @@ function factorial(x : integer) : integer;
|
||||
implementation
|
||||
|
||||
uses
|
||||
MathUnit;
|
||||
Utils, MathUnit;
|
||||
|
||||
function chisquaredprob(X : double; k : integer) : double;
|
||||
var
|
||||
@ -1629,28 +1629,22 @@ begin
|
||||
end;
|
||||
Result := NoGood;
|
||||
end;
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
|
||||
{ split a value into the whole and fractional parts}
|
||||
function WholeValue(value : double) : double;
|
||||
{ split a value into the whole and fractional parts}
|
||||
VAR
|
||||
whole : double;
|
||||
begin
|
||||
whole := Floor(value);
|
||||
Result := whole;
|
||||
Result := Floor(value);
|
||||
end;
|
||||
//---------------------------------------------------------------------------
|
||||
function FractionValue(value : double) : double;
|
||||
{ split a value into the whole and fractional parts }
|
||||
VAR
|
||||
fraction : double;
|
||||
begin
|
||||
fraction := value - Floor(value);
|
||||
Result := fraction;
|
||||
end;
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
Function Quartiles(TypeQ : integer; pcntile : double; N : integer;
|
||||
{ split a value into the whole and fractional parts }
|
||||
function FractionValue(value : double) : double;
|
||||
begin
|
||||
Result := value - Floor(value);
|
||||
end;
|
||||
|
||||
|
||||
function Quartiles(TypeQ : integer; pcntile : double; N : integer;
|
||||
VAR values : DblDyneVec) : double;
|
||||
VAR
|
||||
whole, fraction, Myresult, np, avalue, avalue1 : double;
|
||||
@ -1768,173 +1762,176 @@ begin
|
||||
result := p;
|
||||
end;
|
||||
|
||||
function KolmogorovTest(na : integer; const a: DblDyneVec; nb: integer;
|
||||
{ Statistical test whether two one-dimensional sets of points are compatible
|
||||
with coming from the same parent distribution, using the Kolmogorov test.
|
||||
That is, it is used to compare two experimental distributions of unbinned data.
|
||||
|
||||
Input:
|
||||
a,b: One-dimensional arrays of length na, nb, respectively.
|
||||
The elements of a and b must be given in ascending order.
|
||||
option is a character string to specify options
|
||||
"D" Put out a line of "Debug" printout
|
||||
"M" Return the Maximum Kolmogorov distance instead of prob
|
||||
|
||||
Output:
|
||||
The returned value prob is a calculated confidence level which gives a
|
||||
statistical test for compatibility of a and b.
|
||||
Values of prob close to zero are taken as indicating a small probability
|
||||
of compatibility. For two point sets drawn randomly from the same parent
|
||||
distribution, the value of prob should be uniformly distributed between
|
||||
zero and one.
|
||||
|
||||
in case of error the function return -1
|
||||
|
||||
If the 2 sets have a different number of points, the minimum of
|
||||
the two sets is used.
|
||||
|
||||
Method:
|
||||
The Kolmogorov test is used. The test statistic is the maximum deviation
|
||||
between the two integrated distribution functions, multiplied by the
|
||||
normalizing factor (rdmax*sqrt(na*nb/(na+nb)).
|
||||
|
||||
Code adapted by Rene Brun from CERNLIB routine TKOLMO (Fred James)
|
||||
(W.T. Eadie, D. Drijard, F.E. James, M. Roos and B. Sadoulet,
|
||||
Statistical Methods in Experimental Physics, (North-Holland,
|
||||
Amsterdam 1971) 269-271)
|
||||
|
||||
Method Improvement by Jason A Detwiler (JADetwiler@lbl.gov)
|
||||
-----------------------------------------------------------
|
||||
The nuts-and-bolts of the TMath::KolmogorovTest() algorithm is a for-loop
|
||||
over the two sorted arrays a and b representing empirical distribution
|
||||
functions. The for-loop handles 3 cases: when the next points to be
|
||||
evaluated satisfy a>b, a<b, or a=b:
|
||||
|
||||
For the last case, a=b, the algorithm advances each array by one index in an
|
||||
attempt to move through the equality. However, this is incorrect when one or
|
||||
the other of a or b (or both) have a repeated value, call it x. For the KS
|
||||
statistic to be computed properly, rdiff needs to be calculated after all of
|
||||
the a and b at x have been tallied (this is due to the definition of the
|
||||
empirical distribution function; another way to convince yourself that the
|
||||
old CERNLIB method is wrong is that it implies that the function defined as the
|
||||
difference between a and b is multi-valued at x -- besides being ugly, this
|
||||
would invalidate Kolmogorov's theorem).
|
||||
|
||||
NOTE1
|
||||
A good description of the Kolmogorov test can be seen at:
|
||||
http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
|
||||
}
|
||||
function KolmogorovTest(na: integer; const a: DblDyneVec; nb: integer;
|
||||
const b: DblDyneVec; option: String; AReport: TStrings): double;
|
||||
VAR
|
||||
prob : double;
|
||||
opt : string;
|
||||
rna : double; // = na;
|
||||
rnb : double; // = nb;
|
||||
sa : double; // = 1./rna;
|
||||
sb : double; // = 1./rnb;
|
||||
rdiff, rdmax, x, z : double;
|
||||
i, ia, ib : integer;
|
||||
ok : boolean;
|
||||
var
|
||||
prob: double;
|
||||
opt: string;
|
||||
rna: double; // = na;
|
||||
rnb: double; // = nb;
|
||||
sa: double; // = 1./rna;
|
||||
sb: double; // = 1./rnb;
|
||||
rdiff, rdmax, x, z: double;
|
||||
i, ia, ib: integer;
|
||||
ok: boolean;
|
||||
begin
|
||||
// Statistical test whether two one-dimensional sets of points are compatible
|
||||
// with coming from the same parent distribution, using the Kolmogorov test.
|
||||
// That is, it is used to compare two experimental distributions of unbinned data.
|
||||
//
|
||||
// Input:
|
||||
// a,b: One-dimensional arrays of length na, nb, respectively.
|
||||
// The elements of a and b must be given in ascending order.
|
||||
// option is a character string to specify options
|
||||
// "D" Put out a line of "Debug" printout
|
||||
// "M" Return the Maximum Kolmogorov distance instead of prob
|
||||
//
|
||||
// Output:
|
||||
// The returned value prob is a calculated confidence level which gives a
|
||||
// statistical test for compatibility of a and b.
|
||||
// Values of prob close to zero are taken as indicating a small probability
|
||||
// of compatibility. For two point sets drawn randomly from the same parent
|
||||
// distribution, the value of prob should be uniformly distributed between
|
||||
// zero and one.
|
||||
//
|
||||
// in case of error the function return -1
|
||||
// If the 2 sets have a different number of points, the minimum of
|
||||
// the two sets is used.
|
||||
//
|
||||
// Method:
|
||||
// The Kolmogorov test is used. The test statistic is the maximum deviation
|
||||
// between the two integrated distribution functions, multiplied by the
|
||||
// normalizing factor (rdmax*sqrt(na*nb/(na+nb)).
|
||||
//
|
||||
// Code adapted by Rene Brun from CERNLIB routine TKOLMO (Fred James)
|
||||
// (W.T. Eadie, D. Drijard, F.E. James, M. Roos and B. Sadoulet,
|
||||
// Statistical Methods in Experimental Physics, (North-Holland,
|
||||
// Amsterdam 1971) 269-271)
|
||||
//
|
||||
// Method Improvement by Jason A Detwiler (JADetwiler@lbl.gov)
|
||||
// -----------------------------------------------------------
|
||||
// The nuts-and-bolts of the TMath::KolmogorovTest() algorithm is a for-loop
|
||||
// over the two sorted arrays a and b representing empirical distribution
|
||||
// functions. The for-loop handles 3 cases: when the next points to be
|
||||
// evaluated satisfy a>b, a<b, or a=b:
|
||||
// For the last case, a=b, the algorithm advances each array by one index in an
|
||||
// attempt to move through the equality. However, this is incorrect when one or
|
||||
// the other of a or b (or both) have a repeated value, call it x. For the KS
|
||||
// statistic to be computed properly, rdiff needs to be calculated after all of
|
||||
// the a and b at x have been tallied (this is due to the definition of the
|
||||
// empirical distribution function; another way to convince yourself that the
|
||||
// old CERNLIB method is wrong is that it implies that the function defined as the
|
||||
// difference between a and b is multi-valued at x -- besides being ugly, this
|
||||
// would invalidate Kolmogorov's theorem).
|
||||
//
|
||||
// NOTE1
|
||||
// A good description of the Kolmogorov test can be seen at:
|
||||
// http://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
|
||||
opt := option;
|
||||
prob := -1;
|
||||
|
||||
opt := option;
|
||||
// opt.ToUpper();
|
||||
{ Require at least two points in each graph }
|
||||
if (na <= 2) or (nb <= 2) then
|
||||
begin
|
||||
ErrorMsg('KolmogorovTest - Sets must have more than 2 points');
|
||||
exit;
|
||||
end;
|
||||
|
||||
prob := -1;
|
||||
// Require at least two points in each graph
|
||||
if (na <= 2) or (nb <= 2) then
|
||||
begin
|
||||
ShowMessage('KolmogorovTest - Sets must have more than 2 points');
|
||||
exit;
|
||||
end;
|
||||
// Constants needed
|
||||
rna := na;
|
||||
rnb := nb;
|
||||
sa := 1./rna;
|
||||
sb := 1. / rnb;
|
||||
// Starting values for main loop
|
||||
if (a[0] < b[0]) then
|
||||
begin
|
||||
rdiff := -sa;
|
||||
ia := 2;
|
||||
ib := 1;
|
||||
end
|
||||
else
|
||||
begin
|
||||
rdiff := sb;
|
||||
ib := 2;
|
||||
ia := 1;
|
||||
end;
|
||||
rdmax := Abs(rdiff);
|
||||
{ Constants needed }
|
||||
rna := na;
|
||||
rnb := nb;
|
||||
sa := 1./rna;
|
||||
sb := 1. / rnb;
|
||||
|
||||
// Main loop over point sets to find max distance
|
||||
// rdiff is the running difference, and rdmax the max.
|
||||
ok := FALSE;
|
||||
for i := 0 to na + nb - 1 do
|
||||
begin
|
||||
if (a[ia-1] < b[ib-1]) then
|
||||
{ Starting values for main loop }
|
||||
if (a[0] < b[0]) then
|
||||
begin
|
||||
rdiff := -sa;
|
||||
ia := 2;
|
||||
ib := 1;
|
||||
end
|
||||
else
|
||||
begin
|
||||
rdiff := sb;
|
||||
ib := 2;
|
||||
ia := 1;
|
||||
end;
|
||||
rdmax := Abs(rdiff);
|
||||
|
||||
{ Main loop over point sets to find max distance
|
||||
rdiff is the running difference, and rdmax the max. }
|
||||
ok := false;
|
||||
for i := 0 to na + nb - 1 do
|
||||
begin
|
||||
if (a[ia-1] < b[ib-1]) then
|
||||
begin
|
||||
rdiff := rdiff - sa;
|
||||
ia := ia + 1;
|
||||
if (ia > na) then
|
||||
begin
|
||||
rdiff := rdiff - sa;
|
||||
ia := ia + 1;
|
||||
if (ia > na) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
break;
|
||||
end;
|
||||
end
|
||||
else if (a[ia-1] > b[ib-1]) then
|
||||
begin
|
||||
rdiff := rdiff + sb;
|
||||
ib := ib + 1;
|
||||
if (ib > nb) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
break;
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
x := a[ia-1];
|
||||
while((a[ia-1] = x) and (ia <= na)) do
|
||||
begin
|
||||
rdiff := rdiff - sa;
|
||||
ia := ia + 1;
|
||||
end;
|
||||
while ((b[ib-1] = x) and (ib <= nb)) do
|
||||
begin
|
||||
rdiff := rdiff + sb;
|
||||
ib := ib + 1;
|
||||
end;
|
||||
if (ia > na) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
break;
|
||||
end;
|
||||
if (ib > nb) then
|
||||
begin
|
||||
ok := TRUE;
|
||||
break;
|
||||
end;
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
rdmax := Max(rdmax,Abs(rdiff));
|
||||
end;
|
||||
// Should never terminate this loop with ok = kFALSE!
|
||||
end
|
||||
else if (a[ia-1] > b[ib-1]) then
|
||||
begin
|
||||
rdiff := rdiff + sb;
|
||||
ib := ib + 1;
|
||||
if (ib > nb) then
|
||||
begin
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
x := a[ia-1];
|
||||
while((a[ia-1] = x) and (ia <= na)) do
|
||||
begin
|
||||
rdiff := rdiff - sa;
|
||||
ia := ia + 1;
|
||||
end;
|
||||
while ((b[ib-1] = x) and (ib <= nb)) do
|
||||
begin
|
||||
rdiff := rdiff + sb;
|
||||
ib := ib + 1;
|
||||
end;
|
||||
if (ia > na) then
|
||||
begin
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
if (ib > nb) then
|
||||
begin
|
||||
ok := true;
|
||||
break;
|
||||
end;
|
||||
end;
|
||||
rdmax := Max(rdmax, abs(rdiff));
|
||||
end;
|
||||
|
||||
if (ok) then
|
||||
begin
|
||||
rdmax := Max(rdmax,Abs(rdiff));
|
||||
z := rdmax * Sqrt(rna * rnb / (rna + rnb));
|
||||
prob := KolmogorovProb(z);
|
||||
end;
|
||||
{ Should never terminate this loop with ok = FALSE! }
|
||||
if ok then
|
||||
begin
|
||||
rdmax := Max(rdmax, abs(rdiff));
|
||||
z := rdmax * Sqrt(rna * rnb / (rna + rnb));
|
||||
prob := KolmogorovProb(z);
|
||||
end;
|
||||
|
||||
// debug printout
|
||||
if (opt = 'D') then
|
||||
AReport.Add(' Kolmogorov Probability: %g, Max Dist: %g', [prob, rdmax]);
|
||||
if(opt = 'M') then
|
||||
result := rdmax
|
||||
else
|
||||
result := prob;
|
||||
{ debug printout }
|
||||
if (opt = 'D') then
|
||||
AReport.Add(' Kolmogorov Probability: %.5f, Max Dist: %.5f', [prob, rdmax]);
|
||||
|
||||
if (opt = 'M') then
|
||||
Result := rdmax
|
||||
else
|
||||
Result := prob;
|
||||
end;
|
||||
|
||||
(* wp: moved to MathUnit for easier testing
|
||||
|
||||
procedure poisson_cdf ( x : integer; a : double; VAR cdf : double );
|
||||
VAR
|
||||
i : integer;
|
||||
|
Reference in New Issue
Block a user