Files
lazarus-ccr/applications/lazstats/source/forms/analysis/descriptive/normalityunit.pas
2020-10-05 15:20:49 +00:00

484 lines
12 KiB
ObjectPascal

// Use file "cansas.laz" for testing
unit NormalityUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs,
StdCtrls, Buttons, ExtCtrls, ComCtrls,
MainUnit, Globals, FunctionsLib, DataProcs, BasicStatsReportAndChartFormUnit,
ReportFrameUnit, ChartFrameUnit;
type
{ TNormalityFrm }
TNormalityFrm = class(TBasicStatsReportAndChartForm)
Panel1: TPanel;
TestVarEdit: TEdit;
Label2: TLabel;
VarInBtn: TBitBtn;
VarOutBtn: TBitBtn;
Label1: TLabel;
VarList: TListBox;
procedure VarInBtnClick(Sender: TObject);
procedure VarListDblClick(Sender: TObject);
procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean);
procedure VarOutBtnClick(Sender: TObject);
private
{ private declarations }
function Calc_ShapiroWilks(const AData: DblDyneVec; var W, Prob: Double): Boolean;
procedure Calc_Lilliefors(const AData: DblDyneVec;
out ASkew, AKurtosis, AStat: Double; out AConclusion: String);
procedure PlotData(AData: DblDyneVec);
function PrepareData(const VarName: String): DblDyneVec;
protected
procedure AdjustConstraints; override;
procedure Compute; override;
procedure UpdateBtnStates; override;
public
constructor Create(AOwner: TComponent); override;
procedure Reset; override;
end;
var
NormalityFrm: TNormalityFrm;
implementation
{$R *.lfm}
uses
Math, MathUnit,
TAChartUtils, TATextElements, TACustomSeries, TATransformations,
TAChartAxisUtils, TAChartAxis, TASources,
Utils;
function PopulateLeftMarks(AOwner: TComponent): TListChartSource;
const
MARKS: array[0..12] of double = (
0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4);
var
i: Integer;
begin
Result := TListChartSource.Create(AOwner);
for i := 0 to High(MARKS) do Result.Add(MARKS[i], MARKS[i]);
Result.Add(0.5, 0.5);
for i := High(Marks) downto 0 do Result.Add(1-MARKS[i], 1-MARKS[i]);
end;
function PopulateRightMarks(AOwner: TComponent): TListChartSource;
var
i: Integer;
z: Double;
begin
Result := TListChartSource.Create(AOwner);
for i := -6 to -1 do
begin
z := NormalDist(i);
Result.Add(z, z, 'μ -' + IntToStr(-i) + ' σ');
end;
Result.Add(0.5, 0.5, 'Mean (μ)');
for i := 1 to 6 do
begin
z := NormalDist(i);
Result.Add(z, z, 'μ + ' + IntToStr(i) + ' σ');
end;
end;
{ TNormalityFrm }
constructor TNormalityFrm.Create(AOwner: TComponent);
var
T : TChartAxisTransformations;
InvNormDistTransform: TAxisTransform;
begin
inherited;
T := TChartAxisTransformations.Create(FChartFrame);
InvNormDistTransform := TCumulNormDistrAxisTransform.Create(T);
InvNormDistTransform.Transformations := T;
with FChartFrame.Chart.LeftAxis do
begin
Transformations := T;
Marks.Source := PopulateLeftMarks(FChartFrame.Chart);
Marks.Style := smsValue;
Marks.OverlapPolicy := opHideNeighbour;
end;
with TChartAxis(FChartFrame.Chart.AxisList.Add) do
begin
Alignment := calRight;
Transformations := T;
Marks.Source := PopulateRightMarks(FChartFrame.Chart);
Marks.Style := smsLabel;
Marks.TextFormat := tfHTML;
Grid.Color := clSilver;
Grid.Style := psSolid;
end;
end;
procedure TNormalityFrm.AdjustConstraints;
begin
inherited;
ParamsPanel.Constraints.MinWidth := Max(
3*CloseBtn.Width + 2*CloseBtn.BorderSpacing.Left,
Max(Label1.Width, Label2.Width) + VarInBtn.Width + 2*VarList.BorderSpacing.Right);
ParamsPanel.Constraints.MinHeight := VarOutBtn.Top + VarOutBtn.Height +
VarOutBtn.BorderSpacing.Bottom + ButtonBevel.Height + Panel1.Height +
Panel1.BorderSpacing.Top;
Constraints.MinWidth := ParamsPanel.Constraints.MinWidth + 300;
Constraints.MinHeight := ParamsPanel.Constraints.MinHeight + 2*ParamsPanel.BorderSpacing.Top;
end;
procedure TNormalityFrm.Calc_Lilliefors(const AData: DblDyneVec;
out ASkew, AKurtosis, AStat: Double; out AConclusion: String);
var
i, j, n, n1: Integer;
freq: IntDyneVec = nil;
x: DblDyneVec = nil;
z: DblDyneVec = nil;
fval: DblDyneVec = nil;
jval: DblDyneVec = nil;
DP: DblDyneVec = nil;
mean, variance, stddev: Double;
deviation, devSqr: Double;
M2, M3, M4, DPP, t2: Double;
A0, C1, D025, D05, D10, D15: Double;
begin
// Count of data values
n := Length(AData) - 1; // -1 due to ignored element at index 0
SetLength(freq, n+1); // +1 to make the array 1-based
SetLength(x, n+1);
SetLength(z, n+1);
SetLength(fval, n+1);
Setlength(jval, n+1);
SetLength(DP, n+1);
// Now do Lilliefors
// Get unique scores and their frequencies
n1 := 1;
i := 1;
freq[1] := 1;
x[1] := AData[1];
repeat
for j := i + 1 to n do
if AData[j] = x[n1] then freq[n1] := freq[n1] + 1;
i := i + freq[n1];
if i <= n then
begin
n1 := n1 + 1;
x[n1] := AData[i];
freq[n1] := 1;
end;
until i > n;
// Now get skew and kurtosis of scores
mean := 0.0;
variance := 0.0;
for i := 1 to n do
begin
mean := mean + AData[i];
variance := variance + (AData[i] * AData[i]);
end;
variance := variance - sqr(mean) / n;
variance := variance / (n - 1);
stddev := sqrt(variance);
mean := mean / n;
// Obtain skew, kurtosis and z scores
M2 := 0.0;
M3 := 0.0;
M4 := 0.0;
for i := 1 to n do
begin
deviation := AData[i] - mean;
devsqr := deviation * deviation;
M2 := M2 + devsqr;
M3 := M3 + (deviation * devsqr);
M4 := M4 + (devsqr * devsqr);
z[i] := (AData[i] - mean) / stddev;
end;
for i := 1 to n1 do
x[i] := (x[i] - mean) / stddev;
ASkew := (n * M3) / ((n - 1) * (n - 2) * stddev * variance);
AKurtosis := (n * (n + 1) * M4) - (3 * M2 * M2 * (n - 1));
AKurtosis := AKurtosis /( (n - 1) * (n - 2) * (n - 3) * sqr(variance) );
// Obtain the test statistic
for i := 1 to n1 do
fval[i] := NormalDist(x[i]);
// Cumulative proportions
jval[1] := freq[1] / n;
for i := 2 to n1 do jval[i] := jval[i-1] + freq[i] / n;
for i := 1 to n1 do DP[i] := abs(jval[i] - fval[i]);
// Sort DP
for i := 1 to n1-1 do
for j := i+1 to n1 do
if DP[j] < DP[i] then
Exchange(DP[i], DP[j]);
DPP := DP[n1];
AStat := DPP;
//StatEdit.Text := Format('%.3f', [D]);
A0 := sqrt(n);
C1 := A0 - 0.01 + (0.85 / A0);
D15 := 0.775 / C1;
D10 := 0.819 / C1;
D05 := 0.895 / C1;
D025 := 0.995 / C1;
t2 := AStat;
if t2 > D025 then
AConclusion := 'Strong evidence against normality.';
if ((t2 <= D025) and (t2 > D05)) then
AConclusion := 'Sufficient evidence against normality.';
if ((t2 <= D05) and (t2 > D10)) then
AConclusion := 'Suggestive evidence against normality.';
if ((t2 <= D10) and (t2 > D15)) then
AConclusion := 'Little evidence against normality.';
if (t2 <= D15) then
AConclusion := 'No evidence against normality.';
end;
{ Call Shapiro-Wilks function }
function TNormalityFrm.Calc_ShapiroWilks(const AData: DblDyneVec;
var W, Prob: Double): boolean;
var
init: Boolean = false;
n, n1, n2: Integer;
a: DblDyneVec = nil;
ier: Integer; // error code
begin
init := false;
n := Length(AData) - 1; // -1 because of unused element at index 0
n1 := n;
n2 := n div 2;
SetLength(a, n + 1); // again: 1-based vector!
swilk(init, AData, n, n1, n2, a, W, Prob, ier);
Result := (ier = 0);
case ier of
0: ;
1: ErrorMsg('Error encountered: N < 3');
2: ErrorMsg('Error encountered: N > 5000');
3: ErrorMsg('Error encountered: N2 < N/2');
4: ErrorMsg('Error encountered: N1 > N or ((N1 < N) and (N < 20))');
5: ErrorMsg('Error encountered: The proportion censored (N - N1) / N > 0.8');
6: ErrorMsg('Error encountered: Data have zero range.');
7: ErrorMsg('Error encounterde: X values are not sorted in ascending order.');
else ErrorMsg('Error no. ' + IntToStr(ier) + ' encountered');
end;
end;
procedure TNormalityFrm.Compute;
var
w: Double = 0.0;
pw: Double = 0.0;
skew, kurtosis, D: double;
i, j, n: integer;
data: DblDyneVec = nil;
conclusion: String;
lReport: TStrings;
begin
inherited;
data := PrepareData(TestVarEdit.Text);
if data = nil then
exit;
n := Length(data) - 1; // Subtract 1 because of unused element at index 0
// Sort into ascending order
for i := 1 to n - 1 do
for j := i + 1 to n do
if data[i] > data[j] then
Exchange(data[i], data[j]);
if not Calc_ShapiroWilks(data, w, pw) then
exit;
// Now do Lilliefors
Calc_Lilliefors(data, skew, kurtosis, D, conclusion);
// Print results to report frame
lReport := TStringList.Create;
try
lReport.Add('NORMALITY TESTS FOR '+ TestVarEdit.Text);
lReport.Add('');
lReport.Add('Shapiro-Wilkes Test Results');
lReport.Add(' W: %8.5f', [w]); // WEdit.Tex
lReport.Add(' Probability: %8.5f', [pw]); // ProbEdit.Text
lReport.Add('');
lReport.Add('Lilliefors Test Results');
lReport.Add(' Skew: %8.5f', [skew]); // SkewEdit.Text
lReport.Add(' Kurtosis: %8.5f', [kurtosis]); // KurtosisEdit.Text
lReport.Add(' Test Statistic: %8.5f', [D]); // StatEdit.Text
lReport.Add(' Conclusion: %s', [conclusion]);
FReportFrame.DisplayReport(lReport);
finally
lReport.Free;
end;
// Probability plot
PlotData(data);
end;
{ Plots the cumulative probability of the data onto the probability grid
prepared in FormCreate }
procedure TNormalityFrm.PlotData(AData: DblDyneVec);
var
i, n: Integer;
ser: TChartSeries;
begin
FChartFrame.Clear;
n := Length(AData) - 1; // take care of unused value at index 0
ser := FChartFrame.PlotXY(ptSymbols, nil, nil, nil, nil, '', clBlack);
ser.AxisIndexX := 1;
ser.AxisIndexY := 0;
// Add data manually
for i := 1 to n do
ser.AddXY(AData[i], i / (N+1));
FChartFrame.Chart.Legend.Visible := false;
FChartFrame.SetTitle('Probability Plot of ' + TestVarEdit.Text);
FChartFrame.SetXTitle(TestVaredit.Text);
FChartFrame.SetYTitle('Cumulative probability');
end;
{ Extracts the data values from the data grid and stores them in a float array.
Note that, because the Shapiro-Wilks function has been implemented for 1-based
arrays the data array is considered to be 1-based although the 0-index element
is present as well. }
function TNormalityFrm.PrepareData(const VarName: String): DblDyneVec;
var
selCol: Integer;
i, n: Integer;
begin
// Find data column in the grid
selcol := 0;
for i := 1 to NoVariables do
if OS3MainFrm.DataGrid.Cells[i, 0] = VarName then
begin
selcol := i;
break;
end;
if selCol = 0 then
begin
Result := nil;
MessageDlg('No variable selected.', mtError, [mbOK], 0);
exit;
end;
// Place values into the data array
SetLength(Result, NoCases+1);
n := 0;
for i := 1 to NoCases do
begin
if not ValidValue(i, selcol) then
continue;
inc(n);
if not TryStrToFloat(OS3MainFrm.DataGrid.Cells[selcol, i], Result[n]) then
begin
Result := nil;
ErrorMsg('Non-numeric value encountered.');
exit;
end;
end;
SetLength(Result, n + 1); // Take care of unused element at index 0
end;
procedure TNormalityFrm.Reset;
var
i: integer;
begin
TestVarEdit.Text := '';
VarList.Items.Clear;
for i := 1 to NoVariables do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]);
if Assigned(FReportFrame) then
FReportFrame.Clear;
if Assigned(FChartFrame) then
FChartFrame.Clear;
UpdateBtnStates;
end;
procedure TNormalityFrm.UpdateBtnStates;
begin
VarInBtn.Enabled := (VarList.ItemIndex > -1) and (TestVarEdit.Text = '');
VarOutBtn.Enabled := (TestVarEdit.Text <> '');
if Assigned(FReportFrame) then
FReportFrame.UpdateBtnStates;
if Assigned(FChartFrame) then
FChartFrame.UpdateBtnStates;
end;
procedure TNormalityFrm.VarInBtnClick(Sender: TObject);
var
i: integer;
begin
i := VarList.ItemIndex;
if (i > -1) and (TestVarEdit.Text = '') then
begin
TestVarEdit.Text := VarList.Items[i];
VarList.Items.Delete(i);
end;
UpdateBtnStates;
end;
procedure TNormalityFrm.VarListDblClick(Sender: TObject);
begin
VarInBtnClick(nil);
end;
procedure TNormalityFrm.VarListSelectionChange(Sender: TObject; User: boolean);
begin
UpdateBtnStates;
end;
procedure TNormalityFrm.VarOutBtnClick(Sender: TObject);
begin
if TestVarEdit.Text <> '' then
begin
VarList.Items.Add(TestVarEdit.Text);
TestVarEdit.Text := '';
end;
UpdateBtnStates;
end;
end.