Files
lazarus-ccr/applications/lazstats/source/forms/analysis/descriptive/normalityunit.pas

615 lines
16 KiB
ObjectPascal
Raw Normal View History

// Use file "cansas.laz" for testing
unit NormalityUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs,
StdCtrls, Buttons, ExtCtrls, ComCtrls,
MainUnit, Globals, FunctionsLib, DataProcs, ReportFrameUnit, ChartFrameUnit;
type
{ TNormalityFrm }
TNormalityFrm = class(TForm)
Bevel1: TBevel;
PageControl: TPageControl;
ParamsPanel: TPanel;
ResetBtn: TButton;
ComputeBtn: TButton;
CloseBtn: TButton;
Panel1: TPanel;
ParamsSplitter: TSplitter;
ReportPage: TTabSheet;
ChartPage: TTabSheet;
TestVarEdit: TEdit;
Label2: TLabel;
VarInBtn: TBitBtn;
VarOutBtn: TBitBtn;
Label1: TLabel;
VarList: TListBox;
procedure ComputeBtnClick(Sender: TObject);
procedure FormActivate(Sender: TObject);
procedure FormCreate(Sender: TObject);
procedure ResetBtnClick(Sender: TObject);
procedure VarInBtnClick(Sender: TObject);
procedure VarListDblClick(Sender: TObject);
procedure VarListSelectionChange(Sender: TObject; User: boolean);
procedure VarOutBtnClick(Sender: TObject);
private
{ private declarations }
FReportFrame: TReportFrame;
FChartFrame: TChartFrame;
FAutoSized: boolean;
function Calc_ShapiroWilks(const AData: DblDyneVec; out W, Prob: Double): Boolean;
procedure Calc_Lilliefors(const AData: DblDyneVec;
out ASkew, AKurtosis, AStat: Double; out AConclusion: String);
function Norm(z : double): double;
procedure PlotData(AData: DblDyneVec);
function PrepareData(const VarName: String): DblDyneVec;
procedure UpdateBtnStates;
public
{ public declarations }
procedure Reset;
end;
var
NormalityFrm: TNormalityFrm;
implementation
{$R *.lfm}
uses
Math,
TAChartUtils, TATextElements, TACustomSeries, TATransformations,
TACustomSource, TASources,
Utils;
{ TNormalityFrm }
procedure TNormalityFrm.FormActivate(Sender: TObject);
var
w: Integer;
begin
if FAutoSized then
exit;
w := MaxValue([ResetBtn.Width, ComputeBtn.Width, CloseBtn.Width]);
ResetBtn.Constraints.MinWidth := w;
ComputeBtn.Constraints.MinWidth := w;
CloseBtn.Constraints.MinWidth := w;
ParamsPanel.Constraints.MinWidth := Max(
3*w + 2*CloseBtn.BorderSpacing.Left,
Max(Label1.Width, Label2.Width) + VarInBtn.Width + 2*VarList.BorderSpacing.Right);
ParamsPanel.Constraints.MinHeight := VarOutBtn.Top + VarOutBtn.Height +
VarOutBtn.BorderSpacing.Bottom + Bevel1.Height + Panel1.Height +
Panel1.BorderSpacing.Top;
Constraints.MinWidth := ParamsPanel.Constraints.MinWidth + 300;
Constraints.MinHeight := ParamsPanel.Constraints.MinHeight + 2*ParamsPanel.BorderSpacing.Top;
Position := poDesigned;
FAutoSized := True;
end;
procedure TNormalityFrm.FormCreate(Sender: TObject);
const
MARKS: array[0..23] of double = (0.001, 0.002, 0.005, 0.01, 0.02, 0.03, 0.05, 0.07,
0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.93, 0.95, 0.98, 0.99,
0.995, 0.998, 0.999);
var
T : TChartAxisTransformations;
InvNormDistTransform: TAxisTransform;
L: TListChartSource;
i: Integer;
begin
Assert(OS3MainFrm <> nil);
InitForm(self);
FReportFrame := TReportFrame.Create(self);
FReportFrame.Parent := ReportPage;
FReportFrame.Align := alClient;
FChartFrame := TChartFrame.Create(self);
FChartFrame.Parent := ChartPage;
FChartFrame.Align := alClient;
FChartFrame.Chart.BottomAxis.Intervals.MaxLength := 80;
FChartFrame.Chart.BottomAxis.Intervals.MinLength := 30;
T := TChartAxisTransformations.Create(FChartFrame);
InvNormDistTransform := TCumulNormDistrAxisTransform.Create(T);
InvNormDistTransform.Transformations := T;
FChartFrame.Chart.LeftAxis.Transformations := T;
FChartFrame.Chart.LeftAxis.Intervals.Tolerance := 10;
FChartFrame.Chart.LeftAxis.Intervals.Count := 30;
FChartFrame.Chart.LeftAxis.Intervals.Options := FChartFrame.Chart.leftAxis.Intervals.options + [aipUseCount]; //aipGraphCoords];
FChartFrame.Chart.LeftAxis.Marks.OverlapPolicy := opHideNeighbour;
{
L := TListChartSource.Create(FChartFrame.Chart);
for i := 0 to High(MARKS) do
L.Add(MARKS[i], MARKS[i]);
FChartFrame.Chart.LeftAxis.Marks.Source := L;
FChartFrame.Chart.LeftAxis.Marks.Style := smsLabel;
}
Reset;
end;
procedure TNormalityFrm.Calc_Lilliefors(const AData: DblDyneVec;
out ASkew, AKurtosis, AStat: Double; out AConclusion: String);
var
i, j, n, n1: Integer;
freq: IntDyneVec = nil;
x: DblDyneVec = nil;
z: DblDyneVec = nil;
fval: DblDyneVec = nil;
jval: DblDyneVec = nil;
DP: DblDyneVec = nil;
mean, variance, stddev: Double;
deviation, devSqr: Double;
M2, M3, M4, F1, DPP, t2: Double;
A0, C1, D025, D05, D10, D15: Double;
begin
// Count of data values
n := Length(AData) - 1; // -1 due to ignored element at index 0
SetLength(freq, n+1); // +1 to make the array 1-based
SetLength(x, n+1);
SetLength(z, n+1);
SetLength(fval, n+1);
Setlength(jval, n+1);
SetLength(DP, n+1);
// Now do Lilliefors
// Get unique scores and their frequencies
n1 := 1;
i := 1;
freq[1] := 1;
x[1] := AData[1];
repeat
for j := i + 1 to n do
if AData[j] = x[n1] then freq[n1] := freq[n1] + 1;
i := i + freq[n1];
if i <= n then
begin
n1 := n1 + 1;
x[n1] := AData[i];
freq[n1] := 1;
end;
until i > n;
// Now get skew and kurtosis of scores
mean := 0.0;
variance := 0.0;
for i := 1 to n do
begin
mean := mean + AData[i];
variance := variance + (AData[i] * AData[i]);
end;
variance := variance - sqr(mean) / n;
variance := variance / (n - 1);
stddev := sqrt(variance);
mean := mean / n;
// Obtain skew, kurtosis and z scores
M2 := 0.0;
M3 := 0.0;
M4 := 0.0;
for i := 1 to n do
begin
deviation := AData[i] - mean;
devsqr := deviation * deviation;
M2 := M2 + devsqr;
M3 := M3 + (deviation * devsqr);
M4 := M4 + (devsqr * devsqr);
z[i] := (AData[i] - mean) / stddev;
end;
for i := 1 to n1 do
x[i] := (x[i] - mean) / stddev;
ASkew := (n * M3) / ((n - 1) * (n - 2) * stddev * variance);
AKurtosis := (n * (n + 1) * M4) - (3 * M2 * M2 * (n - 1));
AKurtosis := AKurtosis /( (n - 1) * (n - 2) * (n - 3) * sqr(variance) );
// Obtain the test statistic
for i := 1 to n1 do
begin
F1 := Norm(x[i]);
if x[i] >= 0 then
fval[i] := 1.0 - (F1 / 2.0)
else
fval[i] := F1 / 2.0;
end;
// Cumulative proportions
jval[1] := freq[1] / n;
for i := 2 to n1 do jval[i] := jval[i-1] + freq[i] / n;
for i := 1 to n1 do DP[i] := abs(jval[i] - fval[i]);
// Sort DP
for i := 1 to n1-1 do
for j := i+1 to n1 do
if DP[j] < DP[i] then
Exchange(DP[i], DP[j]);
DPP := DP[n1];
AStat := DPP;
//StatEdit.Text := Format('%.3f', [D]);
A0 := sqrt(n);
C1 := A0 - 0.01 + (0.85 / A0);
D15 := 0.775 / C1;
D10 := 0.819 / C1;
D05 := 0.895 / C1;
D025 := 0.995 / C1;
t2 := AStat;
if t2 > D025 then
AConclusion := 'Strong evidence against normality.';
if ((t2 <= D025) and (t2 > D05)) then
AConclusion := 'Sufficient evidence against normality.';
if ((t2 <= D05) and (t2 > D10)) then
AConclusion := 'Suggestive evidence against normality.';
if ((t2 <= D10) and (t2 > D15)) then
AConclusion := 'Little evidence against normality.';
if (t2 <= D15) then
AConclusion := 'No evidence against normality.';
end;
{ Call Shapiro-Wilks function }
function TNormalityFrm.Calc_ShapiroWilks(const AData: DblDyneVec;
out W, Prob: Double): boolean;
var
init: Boolean = false;
n, n1, n2: Integer;
a: DblDyneVec = nil;
ier: Integer; // error code
begin
init := false;
n := Length(AData) - 1; // -1 because of unused element at index 0
n1 := n;
n2 := n div 2;
SetLength(a, n + 1); // again: 1-based vector!
swilk(init, AData, n, n1, n2, a, W, Prob, ier);
Result := (ier = 0);
case ier of
0: ;
1: ErrorMsg('Error encountered: N < 3');
2: ErrorMsg('Error encountered: N > 5000');
3: ErrorMsg('Error encountered: N2 < N/2');
4: ErrorMsg('Error encountered: N1 > N or ((N1 < N) and (N < 20))');
5: ErrorMsg('Error encountered: The proportion censored (N - N1) / N > 0.8');
6: ErrorMsg('Error encountered: Data have zero range.');
7: ErrorMsg('Error encounterde: X values are not sorted in ascending order.');
else ErrorMsg('Error no. ' + IntToStr(ier) + ' encountered');
end;
end;
procedure TNormalityFrm.ComputeBtnClick(Sender: TObject);
var
w: Double = 0.0;
pw: Double = 0.0;
skew, kurtosis: double;
mean, variance, stddev, deviation, devsqr, M2, M3, M4: double;
i, j, n, n1: integer;
data: DblDyneVec = nil;
//a
z, x: DblDyneVec;
freq: IntDyneVec;
fval, jval, DP: DblDyneVec;
F1, DPP, D, A0, C1, D15, D10, D05, D025, t2: double;
init : boolean;
conclusion: String;
lReport: TStrings;
begin
data := PrepareData(TestVarEdit.Text);
if data = nil then
exit;
n := Length(data) - 1; // Subtract 1 because of unused element at index 0
(*
// SetLength(a, n+1); // +1 because all arrays are considered to begin at 1 here
SetLength(freq, n+1);
SetLength(z, n+1);
SetLength(x, n+1);
SetLength(fval, n+1);
SetLength(jval, n+1);
SetLength(DP, n+1);
*)
// Sort into ascending order
for i := 1 to n - 1 do
for j := i + 1 to n do
if data[i] > data[j] then
Exchange(data[i], data[j]);
if not Calc_ShapiroWilks(data, w, pw) then
exit;
(*
// Call Shapiro-Wilks function
init := false;
swilk(init, data, n, n1, n2, a, w, pw, ier);
if ier <> 0 then
begin
ErrorMsg('Error encountered: ' + IntToStr(ier));
Cleanup;
exit;
end;
*)
{
WEdit.Text := Format('%.4f', [w]);
ProbEdit.Text := Format('%.4f', [pw]);
}
// Now do Lilliefors
Calc_Lilliefors(data, skew, kurtosis, D, conclusion);
(*
// Get unique scores and their frequencies
n1 := 1;
i := 1;
freq[1] := 1;
x[1] := data[1];
repeat
for j := i + 1 to n do
begin
if data[j] = x[n1] then freq[n1] := freq[n1] + 1;
end;
i := i + freq[n1];
if i <= n then
begin
n1 := n1 + 1;
x[n1] := data[i];
freq[n1] := 1;
end;
until i > n;
// Now get skew and kurtosis of scores
mean := 0.0;
variance := 0.0;
for i := 1 to n do
begin
mean := mean + data[i];
variance := variance + (data[i] * data[i]);
end;
variance := variance - (mean * mean) / n;
variance := variance / (n - 1);
stddev := sqrt(variance);
mean := mean / n;
// Obtain skew, kurtosis and z scores
M2 := 0.0;
M3 := 0.0;
M4 := 0.0;
for i := 1 to n do
begin
deviation := data[i] - mean;
devsqr := deviation * deviation;
M2 := M2 + devsqr;
M3 := M3 + (deviation * devsqr);
M4 := M4 + (devsqr * devsqr);
z[i] := (data[i] - mean) / stddev;
end;
for i := 1 to n1 do x[i] := (x[i] - mean) / stddev;
skew := (n * M3) / ((n - 1) * (n - 2) * stddev * variance);
kurtosis := (n * (n + 1) * M4) - (3 * M2 * M2 * (n - 1));
kurtosis := kurtosis /( (n - 1) * (n - 2) * (n - 3) * (variance * variance) );
//SkewEdit.Text := Format('%.3f', [skew]);
//KurtosisEdit.Text := Format('%.3f', [kurtosis]);
// Obtain the test statistic
for i := 1 to n1 do
begin
F1 := Norm(x[i]);
if x[i] >= 0 then
fval[i] := 1.0 - (F1 / 2.0)
else
fval[i] := F1 / 2.0;
end;
// Cumulative proportions
jval[1] := freq[1] / n;
for i := 2 to n1 do jval[i] := jval[i-1] + freq[i] / n;
for i := 1 to n1 do DP[i] := abs(jval[i] - fval[i]);
// Sort DP
for i := 1 to n1-1 do
for j := i+1 to n1 do
if DP[j] < DP[i] then
Exchange(DP[i], DP[j]);
DPP := DP[n1];
D := DPP;
//StatEdit.Text := Format('%.3f', [D]);
A0 := sqrt(n);
C1 := A0 - 0.01 + (0.85 / A0);
D15 := 0.775 / C1;
D10 := 0.819 / C1;
D05 := 0.895 / C1;
D025 := 0.995 / C1;
t2 := D;
if t2 > D025 then conclusion := 'Strong evidence against normality.';
if ((t2 <= D025) and (t2 > D05)) then conclusion := 'Sufficient evidence against normality.';
if ((t2 <= D05) and (t2 > D10)) then conclusion := 'Suggestive evidence against normality.';
if ((t2 <= D10) and (t2 > D15)) then conclusion := 'Little evidence against normality.';
if (t2 <= D15) then conclusion := 'No evidence against normality.';
*)
lReport := TStringList.Create;
try
lReport.Add('NORMALITY TESTS FOR '+ TestVarEdit.Text);
lReport.Add('');
lReport.Add('Shapiro-Wilkes Test Results');
lReport.Add(' W: %8.5f', [w]); // WEdit.Tex
lReport.Add(' Probability: %8.5f', [pw]); // ProbEdit.Text
lReport.Add('');
lReport.Add('Lilliefors Test Results');
lReport.Add(' Skew: %8.5f', [skew]); // SkewEdit.Text
lReport.Add(' Kurtosis: %8.5f', [kurtosis]); // KurtosisEdit.Text
lReport.Add(' Test Statistic: %8.5f', [D]); // StatEdit.Text
lReport.Add(' Conclusion: %s', [conclusion]);
FReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
PlotData(data);
end;
function TNormalityFrm.Norm(z : double) : double;
var
p: double;
begin
z := abs(z);
p := 1.0 + z * (0.04986735 + z * (0.02114101 + z * (0.00327763 +
z * (0.0000380036 + z * (0.0000488906 + z * 0.000005383)))));
p := p * p;
p := p * p;
p := p * p;
Result := 1.0 / (p * p);
end;
procedure TNormalityFrm.PlotData(AData: DblDyneVec);
var
i, n: Integer;
ser: TChartSeries;
begin
n := Length(AData) - 1; // take care of unused value at index 0
ser := FChartFrame.PlotXY(ptSymbols, nil, nil, nil, nil, '', clBlack);
ser.AxisIndexX := 1;
ser.AxisIndexY := 0;
// Add data manually
for i := 1 to n do
ser.AddXY(AData[i], i / (N+1));
FChartFrame.Chart.Legend.Visible := false;
FChartFrame.SetTitle('Probability Plot of ' + TestVarEdit.Text);
FChartFrame.SetXTitle(TestVaredit.Text);
FChartFrame.SetYTitle('Cumulative probability');
end;
{ Extracts the data values from the data grid and stores them in a float array.
Note that, because the Shapiro-Wilks function has been implemented for 1-based
arrays the data array is considered to be 1-based although the 0-index element
is present as well. }
function TNormalityFrm.PrepareData(const VarName: String): DblDyneVec;
var
selCol: Integer;
i, n: Integer;
begin
// Find data column in the grid
selcol := 0;
for i := 1 to NoVariables do
if OS3MainFrm.DataGrid.Cells[i, 0] = VarName then
begin
selcol := i;
break;
end;
if selCol = 0 then
begin
Result := nil;
MessageDlg('No variable selected.', mtError, [mbOK], 0);
exit;
end;
// Place values into the data array
SetLength(Result, NoCases+1);
n := 0;
for i := 1 to NoCases do
begin
if not ValidValue(i, selcol) then
continue;
inc(n);
if not TryStrToFloat(OS3MainFrm.DataGrid.Cells[selcol, i], Result[n]) then
begin
Result := nil;
ErrorMsg('Non-numeric value encountered.');
exit;
end;
end;
SetLength(Result, n + 1); // Take care of unused element at index 0
end;
procedure TNormalityFrm.Reset;
var
i: integer;
begin
TestVarEdit.Text := '';
VarList.Items.Clear;
for i := 1 to NoVariables do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]);
UpdateBtnStates;
end;
procedure TNormalityFrm.ResetBtnClick(Sender: TObject);
begin
Reset;
end;
procedure TNormalityFrm.UpdateBtnStates;
begin
VarInBtn.Enabled := (VarList.ItemIndex > -1) and (TestVarEdit.Text = '');
VarOutBtn.Enabled := (TestVarEdit.Text <> '');
FReportFrame.UpdateBtnStates;
FChartFrame.UpdateBtnStates;
end;
procedure TNormalityFrm.VarInBtnClick(Sender: TObject);
var
i: integer;
begin
i := VarList.ItemIndex;
if (i > -1) and (TestVarEdit.Text = '') then
begin
TestVarEdit.Text := VarList.Items[i];
VarList.Items.Delete(i);
end;
UpdateBtnStates;
end;
procedure TNormalityFrm.VarListDblClick(Sender: TObject);
begin
VarInBtnClick(nil);
end;
procedure TNormalityFrm.VarListSelectionChange(Sender: TObject; User: boolean);
begin
UpdateBtnStates;
end;
procedure TNormalityFrm.VarOutBtnClick(Sender: TObject);
begin
if TestVarEdit.Text <> '' then
begin
VarList.Items.Add(TestVarEdit.Text);
TestVarEdit.Text := '';
end;
UpdateBtnStates;
end;
end.