Lazstats: Massive refactoring of DescriptiveUnits: Move calculation to new TDescriptiveStats class. More user-friendly interface.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7730 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-10-02 17:14:33 +00:00
parent 0175c44fa5
commit c25230eb89
6 changed files with 682 additions and 338 deletions

View File

@ -1558,11 +1558,6 @@
<DebugInfoType Value="dsDwarf2"/>
</Debugging>
<LinkSmart Value="True"/>
<Options>
<Win32>
<GraphicApplication Value="True"/>
</Win32>
</Options>
</Linking>
</CompilerOptions>
<Debugging>

View File

@ -1,37 +1,45 @@
inherited DescriptiveFrm: TDescriptiveFrm
Left = 526
Height = 384
Height = 432
Top = 202
Width = 900
HelpType = htKeyword
HelpKeyword = 'html/DistributionStatistics.htm'
Caption = 'Descriptive Statistics'
ClientHeight = 384
ClientHeight = 432
ClientWidth = 900
inherited ParamsPanel: TPanel
Left = 16
Height = 352
Height = 400
Top = 16
Width = 322
BorderSpacing.Around = 8
ClientHeight = 352
ClientHeight = 400
ClientWidth = 322
inherited CloseBtn: TButton
Left = 267
Top = 375
TabOrder = 9
end
inherited ComputeBtn: TButton
AnchorSideBottom.Control = ParamsPanel
AnchorSideBottom.Side = asrBottom
Left = 184
Top = 375
TabOrder = 10
end
inherited ResetBtn: TButton
Left = 122
Top = 375
TabOrder = 8
end
inherited HelpBtn: TButton
Left = 63
Top = 375
TabOrder = 7
end
inherited ButtonBevel: TBevel
Top = 311
Top = 359
Width = 322
end
object Label2: TLabel[5]
@ -51,7 +59,7 @@ inherited DescriptiveFrm: TDescriptiveFrm
AnchorSideRight.Control = AllBtn
AnchorSideBottom.Control = OptionsGroup
Left = 0
Height = 183
Height = 202
Top = 17
Width = 130
Anchors = [akTop, akLeft, akRight, akBottom]
@ -84,7 +92,7 @@ inherited DescriptiveFrm: TDescriptiveFrm
AnchorSideBottom.Control = VarList
AnchorSideBottom.Side = asrBottom
Left = 192
Height = 183
Height = 202
Top = 17
Width = 130
Anchors = [akTop, akLeft, akRight, akBottom]
@ -149,7 +157,7 @@ inherited DescriptiveFrm: TDescriptiveFrm
AnchorSideRight.Control = CIEdit
Left = 0
Height = 15
Top = 292
Top = 311
Width = 174
BorderSpacing.Right = 8
Caption = 'Confidence Interval for the Mean'
@ -161,7 +169,7 @@ inherited DescriptiveFrm: TDescriptiveFrm
AnchorSideBottom.Control = CIEdit
Left = 0
Height = 72
Top = 208
Top = 227
Width = 306
Anchors = [akLeft, akBottom]
AutoSize = True
@ -197,7 +205,7 @@ inherited DescriptiveFrm: TDescriptiveFrm
Caption = 'z Scores to Grid'
TabOrder = 1
end
object PcntileChk: TCheckBox
object PercentileChk: TCheckBox
AnchorSideTop.Side = asrBottom
Left = 12
Height = 19
@ -208,7 +216,7 @@ inherited DescriptiveFrm: TDescriptiveFrm
Caption = 'Show Percentile Ranks'
TabOrder = 2
end
object AllQrtilesChk: TCheckBox
object AllQuartilesChk: TCheckBox
AnchorSideTop.Side = asrBottom
Left = 174
Height = 19
@ -224,29 +232,66 @@ inherited DescriptiveFrm: TDescriptiveFrm
AnchorSideLeft.Control = Label1
AnchorSideLeft.Side = asrBottom
AnchorSideRight.Side = asrBottom
AnchorSideBottom.Control = DecPlacesEdit
Left = 182
Height = 23
Top = 288
Width = 41
Top = 307
Width = 50
Alignment = taRightJustify
Anchors = [akLeft, akBottom]
BorderSpacing.Left = 8
TabOrder = 6
Text = '95.0'
end
object DecPlacesEdit: TSpinEdit[15]
AnchorSideLeft.Control = CIEdit
AnchorSideTop.Side = asrBottom
AnchorSideRight.Control = CIEdit
AnchorSideRight.Side = asrBottom
AnchorSideBottom.Control = ButtonBevel
Left = 182
Height = 23
Top = 334
Width = 50
Alignment = taRightJustify
Anchors = [akLeft, akRight, akBottom]
BorderSpacing.Top = 4
BorderSpacing.Bottom = 2
MaxValue = 10
TabOrder = 11
Value = 3
end
object Label4: TLabel[16]
AnchorSideLeft.Control = Label1
AnchorSideTop.Control = DecPlacesEdit
AnchorSideTop.Side = asrCenter
Left = 0
Height = 15
Top = 338
Width = 79
Caption = 'Decimal places'
ParentColor = False
end
end
inherited ParamsSplitter: TSplitter
Left = 350
Height = 384
Height = 432
end
object ReportPanel: TPanel[2]
Left = 355
Height = 384
Top = 0
Width = 545
object PageControl: TPageControl[2]
Left = 359
Height = 416
Top = 8
Width = 533
ActivePage = ReportPage
Align = alClient
Anchors = [akTop, akLeft, akRight]
BevelOuter = bvNone
BorderSpacing.Left = 4
BorderSpacing.Top = 8
BorderSpacing.Right = 8
BorderSpacing.Bottom = 8
TabIndex = 0
TabOrder = 2
object ReportPage: TTabSheet
Caption = 'Report'
end
end
end

View File

@ -6,23 +6,73 @@ interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs,
StdCtrls, ExtCtrls, Buttons,
MainUnit, Globals, FunctionsLib, BasicStatsReportFormUnit,
StdCtrls, ExtCtrls, Buttons, Spin, ComCtrls, Grids,
MainUnit, Globals, FunctionsLib, ReportFrameUnit, BasicStatsReportFormUnit,
DataProcs, DictionaryUnit;
type
{ TDescriptiveFrm }
{ TDescriptiveStats }
TDescriptiveOption = (doAlternativeQuartiles, doPercentileRanks, doCasewiseDeletion);
TDescriptiveOptions = set of TDescriptiveOption;
TQuartileMethod = 1..8;
TQuartile = 1..3;
TDescriptiveStats = class
private
FDataGrid: TStringGrid;
FColIndex: Integer;
FConfLevel: Double; // usually 0.95
FColsSelected: IntDyneVec;
FMean, FStdErrorMean, FDeltaMean: Double;
FMin, FMax: Double;
FSum: Double;
FVariance, FStdDev: Double;
FSkew, FStdErrorSkew: Double;
FKurtosis, FStdErrorKurtosis: Double;
FFirstQuartile, FMedian, FThirdQuartile: Double;
FCategoryValues, FPercentiles: DblDyneVec;
FFreqValues: IntDyneVec;
FOptions: TDescriptiveOptions;
FNumCases: Integer;
FQuartiles: array[TQuartileMethod, TQuartile] of Double;
procedure Calc_AlternativeQuartiles(const AValues: DblDyneVec);
function Calc_DeltaMean(AStdErrorOfMean: Double): Double;
procedure Calc_Moments(const AValues: DblDyneVec; AMean: Double;
out M2, M3, M4: Double);
procedure Calc_Quartiles(const AValues: DblDyneVec; out Q1, Median, Q3: Double);
procedure Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; ANumCases: Integer;
out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
procedure Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec;
out ASum, ASumOfSquares, AMin, AMax: Double);
procedure CollectValues(out AValues: DblDyneVec);
procedure PercentileRank(const AValues: DblDyneVec;
out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
public
constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec; AConfLevel: Double);
procedure Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions);
procedure WriteToReport(AVarName: String; ADecPlaces: Integer; AReport: TStrings);
property Mean: Double read FMean;
property StdDev: Double read FStdDev;
// more can be added...
end;
{ TDescriptiveFrm }
TDescriptiveFrm = class(TBasicStatsReportForm)
CaseChk: TCheckBox;
DecPlacesEdit: TSpinEdit;
Label4: TLabel;
PageControl: TPageControl;
ReportPage: TTabSheet;
ZScoresToGridChk: TCheckBox;
AllQrtilesChk: TCheckBox;
AllQuartilesChk: TCheckBox;
Label2: TLabel;
Label3: TLabel;
ReportPanel: TPanel;
PcntileChk: TCheckBox;
PercentileChk: TCheckBox;
OptionsGroup: TGroupBox;
InBtn: TBitBtn;
OutBtn: TBitBtn;
@ -39,9 +89,9 @@ type
procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean);
private
{ private declarations }
sum, variance, stddev, value, mean, min, max, range, skew, prob, df, CI : double;
kurtosis, z, semean, seskew, sekurtosis, deviation, devsqr, M2, M3, M4 : double;
function GetReportFrame(APageIndex: Integer): TReportFrame;
procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec;
AMean, AStdDev: Double);
protected
procedure AdjustConstraints; override;
@ -49,6 +99,7 @@ type
procedure UpdateBtnStates; override;
public
constructor Create(AOwner: TComponent); override;
procedure Reset; override;
end;
@ -61,15 +112,381 @@ implementation
{$R *.lfm}
uses
Math;
Math,
Utils;
{===============================================================================
* TDescriptiveStats
*-------------------------------------------------------------------------------
* TDescriptiveStats is a helper class which
* - does all the required calculations (Analyze) and
* - prepares the report for each variable (WriteToReport).
*==============================================================================}
constructor TDescriptiveStats.Create(ADataGrid: TStringGrid;
AColsSelected: IntDyneVec; AConfLevel: Double);
begin
inherited Create;
FDataGrid := ADataGrid;
FColsSelected := AColsSelected;
FConfLevel := AConfLevel;
end;
procedure TDescriptiveStats.Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions);
var
SS: Double;
values: DblDyneVec;
M2, M3, M4: Double;
begin
FMean := NaN;
FVariance := NaN;
FStdDev := NaN;
FStdErrorMean := NaN;
FDeltaMean := NaN;
FSkew := NaN;
FStdErrorSkew := NaN;
FColIndex := AColIndex;
FOptions := AOptions;
CollectValues(values);
FNumCases := Length(values);
SortOnX(values);
Calc_Sum_SumOfSquares_Min_Max(values, FSum, SS, FMin, FMax);
if FNumCases > 0 then begin
FMean := FSum / FNumCases;
if FNumCases > 1 then
begin
FVariance := (SS - sqr(FSum) / FNumCases) / (FNumCases - 1);
FStdDev := sqrt(FVariance);
FStdErrorMean := sqrt(FVariance / FNumCases);
FDeltaMean := Calc_DeltaMean(FStdErrorMean);
end;
Calc_Moments(values, FMean, M2, M3, M4);
Calc_Skew_Kurtosis(FStdDev, M2, M3, M4, FNumCases, FSkew, FStdErrorSkew, FKurtosis, FStdErrorKurtosis);
Calc_Quartiles(values, FFirstQuartile, FMedian, FThirdQuartile);
if (doAlternativeQuartiles in FOptions) then
Calc_AlternativeQuartiles(values);
if (doPercentileRanks in FOptions) then
PercentileRank(values, FCategoryValues, FPercentiles, FFreqValues);
end;
end;
procedure TDescriptiveStats.Calc_AlternativeQuartiles(const AValues: DblDyneVec);
var
nCases: Integer;
q: TQuartile;
m: TQuartileMethod;
begin
nCases := Length(AValues);
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
for q := Low(TQuartile) to High(TQuartile) do
FQuartiles[m, q] := Quartiles(m, 0.25*q, nCases, AValues);
end;
// Tolerance around the mean
function TDescriptiveStats.Calc_DeltaMean(AStdErrorOfMean: Double): Double;
var
alpha: Double;
confLev: Double;
DOF: Integer;
begin
alpha := (1 - FConfLevel) / 2;
confLev := 1 - alpha;
if FNumCases < 120 then
begin
DOF := FNumCases - 1;
Result := AStdErrorOfMean * InverseT(confLev, DOF);
end else
Result := AStdErrorOfMean * InverseZ(confLev);
end;
procedure TDescriptiveStats.Calc_Moments(const AValues: DblDyneVec;
AMean: Double; out M2, M3, M4: Double);
var
i: Integer;
dev, devSqr: Double;
begin
M2 := 0;
M3 := 0;
M4 := 0;
for i := 0 to High(AValues) do
begin
dev := AValues[i] - AMean;
devSqr := Sqr(dev);
M2 := M2 + devSqr;
M3 := M3 + dev * devSqr;
M4 := M4 + sqr(devSqr);
end;
end;
procedure TDescriptiveStats.Calc_Quartiles(const AValues: DblDyneVec;
out Q1, Median, Q3: Double);
var
n: Integer;
begin
n := Length(AValues);
Q1 := Quartiles(2, 0.25, n, AValues);
Median := Quartiles(2, 0.5, n, AValues);
Q3 := Quartiles(2, 0.75, n, AValues);
end;
procedure TDescriptiveStats.Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double;
ANumCases: Integer; out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
var
num, denom: Double;
stdDev3, stdDev4: Double;
begin
Skew := NaN;
StdErrorSkew := NaN;
Kurtosis := NaN;
StdErrorKurtosis := NaN;
stdDev3 := StdDev * StdDev * StdDev;
stdDev4 := StdDev3 * StdDev;
if ANumCases > 2 then
begin
Skew := ANumCases * M3 / ((ANumCases - 1) * (ANumCases - 3) * stdDev3);
num := 6.0 * ANumCases * (ANumCases - 1);
denom := (ANumCases - 2) * (ANumCases + 1) * (ANumCases + 3);
StdErrorSkew := sqrt(num / denom);
end;
if ANumCases > 3 then
begin
num := ANumCases * (ANumCases + 1) * M4 - 3 * M2 * M2 * (ANumCases - 1);
denom := (ANumCases - 1) * (ANumCases - 2) * (ANumCases - 3) * stdDev4;
Kurtosis := num / denom;
num := 4.0 * (sqr(ANumCases) - 1) * sqr(StdErrorSkew);
denom := (ANumCases - 3) * (ANumCases + 5);
StdErrorKurtosis := sqrt(num / denom);
end;
end;
procedure TDescriptiveStats.Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec;
out ASum, ASumOfSquares, AMin, AMax: Double);
var
i: Integer;
begin
ASum := 0.0;
ASumOfSquares := 0;
AMin := Infinity;
AMax := -Infinity;
for i := 0 to High(AValues) do
begin
ASum := ASum + AValues[i];
ASumOfSquares := ASumOfSquares + sqr(AValues[i]);
if AValues[i] < AMin then AMin := AValues[i];
if AValues[i] > AMax then AMax := AValues[i];
end;
end;
procedure TDescriptiveStats.CollectValues(out AValues: DblDyneVec);
var
i, n: Integer;
begin
AValues := nil; // silence the compiler
SetLength(AValues, NoCases);
n := 0;
for i := 1 to NoCases do
begin
if (doCasewiseDeletion in FOptions) then
begin
// Do not consider a case when any variable is empty
if not ValidValue(i, FColIndex) then
continue;
end else
begin
// Do not consider a case when the current variable is empty
if not GoodRecord(i, Length(FColsSelected), FColsSelected) then
continue;
end;
if TryStrToFloat(FDataGrid.Cells[FColIndex, i], AValues[n]) then
inc(n)
else
raise Exception.CreateFmt('Invalid number: variable "%s", case "%s"',
[FDataGrid.cells[FColIndex, 0], FDataGrid.Cells[0, i]]);
end;
SetLength(AValues, n);
end;
// Computes the percentile ranks of values stored in the data grid at the
// loaded columns. The values are assumed to be sorted.
procedure TDescriptiveStats.PercentileRank(const AValues: DblDyneVec; out
ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
var
i, nCases, iCat, nCategories: Integer;
lastCategoryValue: Double;
cumFreqCentered: Double;
cumFreq: Integer;
begin
// silence the compiler
ACategoryvalues := nil;
AFreq := nil;
APercentiles := nil;
nCases := Length(AValues);
SetLength(ACategoryValues, nCases); // over-dimension; will be trimmed later
SetLength(AFreq, nCases);
// Get count of unique values and frequencies of each
lastCategoryValue := AValues[0];
ACategoryValues[0] := lastCategoryValue;
AFreq[0] := 1;
iCat := 0;
for i := 1 to nCases-1 do
begin
if (lastCategoryValue = AValues[i]) then
AFreq[iCat] := AFreq[iCat] + 1
else
begin // new value
inc(iCat);
AFreq[iCat] := 1;
lastCategoryValue := AValues[i];
ACategoryValues[iCat] := lastCategoryValue;
end;
end;
// trim arrays
nCategories := iCat + 1;
SetLength(ACategoryValues, nCategories);
SetLength(AFreq, nCategories);
// Get cumulative frequencies and percentile ranks
SetLength(APercentiles, nCategories);
APercentiles[0] := AFreq[0] * 0.5 / nCases;
cumFreq := AFreq[0];
for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1
begin
cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point
APercentiles[i] := cumFreqCentered / nCases;
cumFreq := cumFreq + AFreq[i];
end;
end;
procedure TDescriptiveStats.WriteToReport(AVarName: String; ADecPlaces: Integer;
AReport: TStrings);
var
w: Integer;
nCategories: Integer;
i: Integer;
cumFreq: Integer;
m: TQuartileMethod;
begin
w := 10 + ADecPlaces - 3;
AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']);
AReport.Add('');
AReport.Add('Number of cases: %*d', [W, FNumCases]);
AReport.Add('Sum: %*.*f', [W, ADecPlaces, FSum]);
AReport.Add('Mean: %*.*f', [W, ADecPlaces, FMean]);
AReport.Add('Variance: %*.*f', [W, ADecPlaces, FVariance]);
AReport.Add('Std.Dev.: %*.*f', [W, ADecPlaces, FStdDev]);
AReport.Add('Std.Error of Mean %*.*f', [W, ADecPlaces, FStdErrorMean]);
AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [
FConfLevel*100.0, ADecPlaces, FMean - FDeltaMean, ADecPlaces, FMean + FDeltaMean]);
AReport.Add('');
AReport.Add('Minimum: %*.*f', [W, ADecPlaces, FMin]);
AReport.Add('Maximum: %*.*f', [W, ADecPlaces, FMax]);
AReport.Add('Range: %*.*f', [W, ADecPlaces, FMax - FMin]);
AReport.Add('');
AReport.Add('Skewness: %*.*f', [W, ADecPlaces, FSkew]);
AReport.Add('Std.Error of Skew: %*.*f', [W, ADecPlaces, FStdErrorSkew]);
AReport.Add('Kurtosis: %*.*f', [W, ADecPlaces, FKurtosis]);
AReport.Add('Std. Error of Kurtosis: %*.*f', [W, ADecPlaces, FStdErrorKurtosis]);
AReport.Add('');
AReport.Add('First Quartile: %*.*f', [W, ADecPlaces, FFirstQuartile]);
AReport.Add('Median: %*.*f', [W, ADecPlaces, FMedian]);
AReport.Add('Third Quartile: %*.*f', [W, ADecPlaces, FThirdQuartile]);
AReport.Add('Interquartile range: %*.*f', [W, ADecPlaces, FThirdQuartile - FFirstQuartile]);
if (doAlternativeQuartiles in FOptions) then
begin
AReport.Add('');
AReport.Add('');
AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
AReport.Add('');
AReport.Add('Method First Quartile Median Third Quartile');
AReport.Add('------ -------------- ---------- --------------');
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
AReport.Add(' %d %12.3f %12.3f %12.3f', [m, FQuartiles[m, 1], FQuartiles[m, 2], FQuartiles[m, 3]]);
AReport.Add('');
AReport.Add('NOTES:');
AReport.Add('Method 1 is the weighted average at X[np] where ');
AReport.Add(' n is no. of cases, p is percentile / 100');
AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
AReport.Add('Method 3 is the empirical distribution function.');
AReport.Add('Method 4 is called the empirical distribution function - averaging.');
AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
AReport.Add('Method 6 is the closest observation method.');
AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
AReport.Add('Method 8 was used in an older Microsoft Excel version.');
AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
end;
if (doPercentileRanks in FOptions) then
begin
nCategories := Length(FCategoryValues);
cumFreq := 0;
AReport.Add('');
AReport.Add('');
AReport.Add('PERCENTILE RANKS');
AReport.Add('');
AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
AReport.Add('----------- --------- --------- ---------------');
for i := 0 to nCategories-1 do
begin
cumFreq := cumFreq + FFreqValues[i];
AReport.Add(' %8.3f %8d %8d %12.2f%%', [
FCategoryValues[i], FFreqValues[i], cumFreq, FPercentiles[i]*100.0
]);
end;
end;
end;
{ TDescriptiveFrm }
constructor TDescriptiveFrm.Create(AOwner: TComponent);
begin
inherited;
FReportFrame.Parent := ReportPage;
FReportFrame.BorderSpacing.Left := 0;
FReportFrame.BorderSpacing.Top := 0;
FReportFrame.BorderSpacing.Bottom := 0;
FReportFrame.BorderSpacing.Right := 0;
end;
procedure TDescriptiveFrm.AdjustConstraints;
begin
ParamsPanel.Constraints.MinHeight := AllBtn.Top + AllBtn.Height + OptionsGroup.Height +
CIEdit.Height + ButtonBevel.Height + CloseBtn.Height + VarList.BorderSpacing.Bottom +
OptionsGroup.BorderSpacing.Bottom + CloseBtn.BorderSpacing.Top;
OptionsGroup.BorderSpacing.Bottom + CloseBtn.BorderSpacing.Top +
DecPlacesEdit.Height + DecPlacesEdit.BorderSpacing.Top;
ParamsPanel.Constraints.MinWidth := Math.Max(
4*CloseBtn.Width + 3*HelpBtn.BorderSpacing.Right,
OptionsGroup.Width
@ -90,275 +507,100 @@ end;
procedure TDescriptiveFrm.Compute;
var
i, j, k, m: integer;
nCases, noSelected: integer;
Q1, Q2, Q3, Q12, Q13, Q14, Q15, Q16, Q17, Q18, Q22, Q23, Q24, Q25, Q26: double;
Q27, Q28, Q32, Q33, Q34, Q35, Q36, Q37, Q38, IQrange: double;
num, den, cases: double;
values: DblDyneVec = nil;
pcntRank: DblDyneVec = nil;
selected: IntDyneVec = nil;
cellString: String;
i, j: Integer;
noSelected: Integer;
selected: IntDyneVec = nil;
page: TTabSheet;
reportFrame: TReportFrame;
lReport: TStrings;
lDescrStats: TDescriptiveStats;
options: TDescriptiveOptions;
begin
NoSelected := SelList.Items.Count;
noSelected := SelList.Items.Count;
if noSelected = 0 then
begin
MessageDlg('No variables selected.', mtError, [mbOK], 0);
exit;
end;
SetLength(selected, noSelected);
// Get selected variables
for i := 1 to noselected do
// Find column index of selected variables
for i := 0 to noSelected - 1 do
begin
cellstring := SelList.Items.Strings[i-1];
cellstring := SelList.Items[i];
for j := 1 to NoVariables do
if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then selected[i-1] := j;
if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then selected[i] := j;
end;
lReport := TStringList.Create;
try
lReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
lReport.Add('');
SetLength(Values, NoCases);
SetLength(pcntRank, NoCases);
for j := 1 to noSelected do
// Create a tabsheet with ReportFrame for each selected variable (in addition to the built-in one)
if noSelected > PageControl.PageCount then
begin
for i := 1 to noSelected-1 do // we do not create a tab for the first variable - it exists by default
begin
deviation := 0.0;
devsqr := 0.0;
M2 := 0.0;
M3 := 0.0;
M4 := 0.0;
sum := 0.0;
variance := 0.0;
stddev := 0.0;
range := 0.0;
skew := 0.0;
kurtosis := 0.0;
ncases := 0;
df := 0.0;
seskew := 0.0;
kurtosis := 0.0;
sekurtosis := 0.0;
k := selected[j-1];
CI := StrToFloat(CIEdit.Text) / 100.0;
prob := CI;
CI := (1.0 - CI) / 2.0;
CI := 1.0 - CI;
page := TTabSheet.Create(PageControl);
page.Parent := PageControl;
reportFrame := TReportFrame.Create(page);
reportFrame.Parent := page;
reportFrame.Align := alClient;
InitToolBar(reportFrame.ReportToolbar, tpRight);
end;
end;
if ZScoresToGridChk.Checked then // add a new column to the grid
begin
cellstring := OS3MainFrm.DataGrid.Cells[k,0] + 'z';
DictionaryFrm.NewVar(NoVariables + 1);
DictionaryFrm.DictGrid.Cells[1, NoVariables] := cellstring;
OS3MainFrm.DataGrid.Cells[NoVariables, 0] := cellstring;
end;
// Remove excess pages from previous session
while PageControl.PageCount > noSelected do
PageControl.Pages[PageControl.PageCount-1].Free;
// Accumulate sums of squares, sums, etc. for variable j
min := 1.0e308;
max := -1.0e308;
for i := 1 to NoCases do
begin
if not GoodRecord(i, noSelected, selected) then
continue;
// Every tab gets the name of the corresponding variable.
for i := 0 to NoSelected-1 do
PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0];
if CaseChk.Checked then
begin
if not ValidValue(i, selected[j-1]) then
continue;
end
else if not GoodRecord(i, noselected, selected) then
continue;
// Prepare options
options := [];
if PercentileChk.Checked then Include(options, doPercentileRanks);
if AllQuartilesChk.Checked then Include(options, doAlternativeQuartiles);
if CaseChk.Checked then Include(options, doCasewiseDeletion);
value := StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]);
ncases := ncases + 1;
values[ncases-1] := value;
df := df + 1.0;
sum := sum + value;
variance := variance + (value * value);
if (value < min) then min := value;
if (value > max) then max := value;
end;
lReport := TStringList.Create;
lDescrStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected, StrToFloat(CIEdit.Text)/100);
try
for i := 0 to noSelected-1 do
begin
// Analyze the data and get descriptive stats
lDescrStats.Analyze(selected[i], options);
if ncases > 0 then
begin
mean := sum / ncases;
range := max - min;
end;
// Store z values, (value - mean) / stdDev, to grid, if needed
zScoresToGrid(selected[i], selected, lDescrStats.Mean, lDescrStats.StdDev);
if ncases > 1 then
begin
variance := variance - (sum * sum) / ncases;
variance := variance / (ncases - 1);
stddev := sqrt(variance);
semean := sqrt(variance / ncases);
if ncases < 120 then
CI := semean * inverset(CI,df)
else
CI := semean * inversez(CI);
end;
if variance = 0.0 then
begin
cellstring := OS3MainFrm.DataGrid.Cells[k,0];
MessageDlg('No Variability in '+ cellstring + ' variable - ending analysis.', mtInformation, [mbOK], 0);
exit;
end;
if ncases > 3 then // obtain skew, kurtosis and z scores
begin
for i := 1 to NoCases do
begin
if CaseChk.Checked then
begin
if not ValidValue(i, selected[j-1]) then continue;
end else
if not GoodRecord(i, noselected, selected) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]);
if stddev > 0.0 then
begin
deviation := value - mean;
devsqr := deviation * deviation;
M2 := M2 + devsqr;
M3 := M3 + (deviation * devsqr);
M4 := M4 + (devsqr * devsqr);
z := (value - mean) / stddev;
if ZScoresToGridChk.Checked then
begin
cellstring := format('%8.5f',[z]);
OS3MainFrm.DataGrid.Cells[NoVariables,i] := cellstring;
end;
end;
end;
if ncases > 2 then
begin
skew := (ncases * M3) / ((ncases - 1) * (ncases - 2) * stddev * variance);
cases := ncases;
num := 6.0 * cases * (cases - 1.0);
den := (cases - 2.0) * (cases + 1.0) * (cases + 3.0);
seskew := sqrt(num / den);
end;
if ncases > 3 then
begin
kurtosis := (ncases * (ncases + 1) * M4) - (3 * M2 * M2 * (ncases - 1));
kurtosis := kurtosis / ( (ncases - 1) * (ncases - 2) * (ncases - 3) * (variance * variance) );
sekurtosis := sqrt((4.0 * (ncases * ncases - 1) * (seskew * seskew)) / ((ncases - 3) * (ncases + 5)));
end;
end;
// output results for the kth variable
cellstring := OS3MainFrm.DataGrid.Cells[k,0];
if j > 1 then lReport.Add('');
lReport.Add('VARIABLE: %10s', ['"' + cellString + '"']);
lReport.Add('');
lReport.Add('Number of cases: %10d', [nCases]);
lReport.Add('Sum: %10.3f', [sum]);
lReport.Add('Mean: %10.3f', [mean]);
lReport.Add('Variance: %10.3f', [variance]);
lReport.Add('Std.Dev.: %10.3f', [stddev]);
lReport.Add('Std.Error of Mean %10.3f', [seMean]);
lReport.Add('%.2f%% Conf.Interval Mean: %10.3f to %.3f', [prob*100.0, mean - CI, mean + CI]);
lReport.Add('Range: %10.3f', [range]);
lReport.Add('Minimum: %10.3f', [min]);
lReport.Add('Maximum: %10.3f', [max]);
lReport.Add('Skewness: %10.3f', [skew]);
lReport.Add('Std.Error of Skew: %10.3f', [seSkew]);
lReport.Add('Kurtosis: %10.3f', [kurtosis]);
lReport.Add('Std. Error of Kurtosis: %10.3f', [seKurtosis]);
// Write descriptive stats to report
lReport.Clear;
lReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
lReport.Add('');
lDescrStats.WriteToReport(trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]),
DecPlacesEdit.Value, lReport);
if ncases > 4 then // get percentiles and quartiles
begin
// get percentile ranks
if pcntileChk.Checked then PRank(k, pcntRank, lReport);
// sort values and get quartiles
for i := 0 to ncases - 2 do
begin
for m := i + 1 to ncases -1 do
begin
if values[i] > values[m] then
begin
value := values[i];
values[i] := values[m];
values[m] := value;
end;
end;
end;
Q1 := Quartiles(2,0.25,ncases,values);
Q2 := Quartiles(2,0.5,ncases,values);
Q3 := Quartiles(2,0.75,ncases,values);
IQrange := Q3 - Q1;
lReport.Add('First Quartile: %10.3f', [Q1]);
lReport.Add('Median: %10.3f', [Q2]);
lReport.Add('Third Quartile: %10.3f', [Q3]);
lReport.Add('Interquartile range: %10.3f', [IQrange]);
lReport.Add('');
end;
if (AllQrtilesChk.Checked) then
begin
lReport.Add('Alternative Methods for Obtaining Quartiles');
lReport.Add(' Method 1 2 3 4 5 6 7 8');
lReport.Add('Pcntile');
Q1 := Quartiles(1,0.25,ncases,values);
Q12 := Quartiles(2,0.25,ncases,values);
Q13 := Quartiles(3,0.25,ncases,values);
Q14 := Quartiles(4,0.25,ncases,values);
Q15 := Quartiles(5,0.25,ncases,values);
Q16 := Quartiles(6,0.25,ncases,values);
Q17 := Quartiles(7,0.25,ncases,values);
Q18 := Quartiles(8,0.25,ncases,values);
lReport.Add('Q1 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f', [Q1,Q12,Q13,Q14,Q15,Q16,Q17,Q18]);
Q2 := Quartiles(1,0.5,ncases,values);
Q22 := Quartiles(2,0.5,ncases,values);
Q23 := Quartiles(3,0.5,ncases,values);
Q24 := Quartiles(4,0.5,ncases,values);
Q25 := Quartiles(5,0.5,ncases,values);
Q26 := Quartiles(6,0.5,ncases,values);
Q27 := Quartiles(7,0.5,ncases,values);
Q28 := Quartiles(8,0.5,ncases,values);
lReport.Add('Q2 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f', [Q2,Q22,Q23,Q24,Q25,Q26,Q27,Q28]);
Q3 := Quartiles(1,0.75,ncases,values);
Q32 := Quartiles(2,0.75,ncases,values);
Q33 := Quartiles(3,0.75,ncases,values);
Q34 := Quartiles(4,0.75,ncases,values);
Q35 := Quartiles(5,0.75,ncases,values);
Q36 := Quartiles(6,0.75,ncases,values);
Q37 := Quartiles(7,0.75,ncases,values);
Q38 := Quartiles(8,0.75,ncases,values);
lReport.Add('Q3 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f', [Q3,Q32,Q33,Q34,Q35,Q36,Q37,Q38]);
lReport.Add('');
lReport.Add('NOTES:');
lReport.Add('Method 1 is the weighted average at X[np] where ');
lReport.Add(' n is no. of cases, p is percentile / 100');
lReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
lReport.Add('Method 3 is the empirical distribution function.');
lReport.Add('Method 4 is called the empirical distribution function - averaging.');
lReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
lReport.Add('Method 6 is the closest observation method.');
lReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
lReport.Add('Method 8 was used in an older Microsoft Excel version.');
lReport.Add('See the internet site http://www.xycoon.com/ for the above.');
lReport.Add('');
end; // end of experimental alternatives
lReport.Add(DIVIDER_SMALL_AUTO);
end; // next j variable
FReportFrame.DisplayReport(lReport);
// Display report in the page of the variable
reportFrame := GetReportFrame(i);
reportFrame.DisplayReport(lReport);
end;
finally
// Clean up
lDescrStats.Free;
lReport.Free;
Selected := nil;
Values := nil;
pcntrank := nil;
end;
end;
function TDescriptiveFrm.GetReportFrame(APageIndex: Integer): TReportFrame;
var
page: TTabSheet;
begin
Result := nil;
if (APageIndex >=0) and (APageIndex < PageControl.PageCount) then
begin
page := PageControl.Pages[APageIndex];
if (page.ControlCount > 0) and (page.Controls[0] is TReportFrame) then
Result := TReportFrame(page.Controls[0]);
end;
end;
@ -406,6 +648,12 @@ var
i: integer;
begin
inherited;
for i := PageControl.PageCount-1 downto 1 do
PageControl.Pages[i].Free;
PageControl.Pages[0].Caption := 'Report';
CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT);
VarList.Clear;
SelList.Clear;
@ -447,9 +695,16 @@ procedure TDescriptiveFrm.UpdateBtnStates;
var
lSelected: Boolean;
i: Integer;
F: TReportFrame;
begin
inherited;
for i := 0 to PageControl.PageCount-1 do
begin
F := GetReportFrame(i);
if Assigned(F) then F.Clear;
end;
lSelected := false;
for i := 0 to VarList.Items.Count-1 do
if VarList.Selected[i] then
@ -478,5 +733,43 @@ begin
end;
procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer;
const AColsSelected: IntDyneVec; AMean, AStdDev: Double);
var
i, idx: Integer;
value, zValue: Double;
varName: String;
begin
if AStdDev = 0 then begin
ErrorMsg('Cannot store z values to grid because StdDev is zero.');
exit;
end;
varName := OS3MainFrm.DataGrid.Cells[AColIndex, 0] + '_z';
idx := OS3MainFrm.DataGrid.Rows[0].IndexOf(varName);
if idx = -1 then
begin
DictionaryFrm.NewVar(NoVariables + 1);
DictionaryFrm.DictGrid.Cells[1, NoVariables] := varName;
OS3MainFrm.DataGrid.Cells[NoVariables, 0] := varName;
idx := NoVariables;
end;
for i := 1 to NoCases do
begin
if CaseChk.Checked then
begin
if not ValidValue(i, AColsSelected[AColIndex]) then continue;
end
else
if not GoodRecord(i, Length(AColsSelected), AColsSelected) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]);
zValue := (value - AMean) / AStdDev;
OS3MainFrm.DataGrid.Cells[idx, i] := Format('%8.5f', [zValue]);
// to do: read number of decimal places from Dictionary and use in Format().
end;
end;
end.

View File

@ -1,6 +1,8 @@
inherited BasicStatsParamsForm: TBasicStatsParamsForm
Height = 459
Width = 709
Caption = 'BasicStatsParamsForm'
ClientHeight = 459
ClientWidth = 709
Position = poMainFormCenter
object ParamsPanel: TPanel[0]

View File

@ -77,7 +77,7 @@ begin
exit;
w := MaxValue([HelpBtn.Width, ResetBtn.Width, ComputeBtn.Width, CloseBtn.Width]);
HelpBtn.Constraints.MinWidth := 2;
HelpBtn.Constraints.MinWidth := w;
ResetBtn.Constraints.MinWidth := w;
ComputeBtn.Constraints.MinWidth := w;
CloseBtn.Constraints.MinWidth := w;
@ -89,6 +89,8 @@ begin
Position := poDesigned;
FAutoSized := true;
inherited;
end;

View File

@ -24,7 +24,7 @@ function poly(const c: Array of double; nord: integer; x: double): double; // RE
procedure swilk (var init : boolean; const x: DblDyneVec; n, n1, n2: integer;
const a: DblDyneVec; var w, pw: double; out ifault: integer);
procedure SVDinverse(VAR a : DblDyneMat; N : integer);
function inverset(Probt, DF : double) : double;
function InverseT(Prob, DF: double): double;
function inversechi(p : double; k : integer) : double;
function STUDENT(q,v,r : real) : real;
function realraise(base,power : double ): double;
@ -43,10 +43,12 @@ function UniStats(N : integer; VAR X : DblDyneVec; VAR z : DblDyneVec;
VAR SESkew : double; VAR SEkurtosis : double; VAR min : double;
VAR max : double; VAR Range : double; VAR MissValue : string) :
integer;
function WholeValue(value : double) : double;
function FractionValue(value : double) : double;
function Quartiles(TypeQ : integer; pcntile : double; N : integer;
VAR values : DblDyneVec) : double;
//function WholeValue(value : double) : double;
//function FractionValue(value : double) : double;
function Quartiles(TypeQ: integer; Percentile: double; N: integer;
const Values: DblDyneVec): double;
function KolmogorovProb(z: double): double;
function KolmogorovTest(na: integer; const a: DblDyneVec; nb: integer;
@ -944,15 +946,14 @@ begin
Result := prob;
end;
function inverset(Probt, DF : double) : double;
{ Returns the t value corresponding to a two-tailed t test probability. }
function InverseT(Prob, DF: double): double;
var
z, W, tValue: double;
z, W: double;
begin
// Returns the t value corresponding to a two-tailed t test probability.
z := inversez(Probt);
W := z * ((8.0 * DF + 3.0) / (1.0 + 8.0 * DF));
tValue := sqrt(DF * (exp(W * W / DF) - 1.0));
inverset := tValue;
z := InverseZ(Prob);
W := z * ((8.0 * DF + 3.0) / (1.0 + 8.0 * DF));
Result := sqrt(DF * (exp(W * W / DF) - 1.0));
end;
//---------------------------------------------------------------------
@ -1644,11 +1645,11 @@ begin
end;
function Quartiles(TypeQ : integer; pcntile : double; N : integer;
VAR values : DblDyneVec) : double;
VAR
whole, fraction, Myresult, np, avalue, avalue1 : double;
subscript : integer;
function Quartiles(TypeQ: integer; Percentile: double; N: integer;
const Values: DblDyneVec): double;
var
whole, fraction, np, lValue, lValue1: double;
subscript: integer;
begin
{ for i := 0 to N - 1 do // this is for debugging
begin
@ -1656,46 +1657,52 @@ begin
OutPutFrm.RichEdit.Lines.Add(outline);
end;
OutPutFrm.ShowModal;
OutPutFrm.RichEdit.Clear; }
case TypeQ of
1 : np := pcntile * N;
2 : np := pcntile * (N + 1);
3 : np := pcntile * N;
4 : np := pcntile * N;
5 : np := pcntile * (N - 1);
6 : np := pcntile * N + 0.5;
7 : np := pcntile * (N + 1);
8 : np := pcntile * (N + 1);
OutPutFrm.RichEdit.Clear;
}
case TypeQ of
1 : np := Percentile * N;
2 : np := Percentile * (N + 1);
3 : np := Percentile * N;
4 : np := Percentile * N;
5 : np := Percentile * (N - 1);
6 : np := Percentile * N + 0.5;
7 : np := Percentile * (N + 1);
8 : np := Percentile * (N + 1);
end;
whole := WholeValue(np);
fraction := FractionValue(np);
subscript := Trunc(whole) - 1;
lValue := Values[subscript];
lValue1 := Values[subscript + 1];
case TypeQ of
1 : Result := ((1.0 - fraction) * values[subscript]) + fraction * values[subscript + 1];
2 : Result := ((1.0 - fraction) * lValue) + fraction * lValue1; // values[subscript + 1];
3 : if (fraction = 0.0) then
Result := values[subscript]
else
Result := values[subscript + 1];
4 : if (fraction = 0.0) then
Result := 0.5 * (values[subscript] + values[subscript + 1])
else
Result := values[subscript + 1];
5 : if (fraction = 0.0) then
Result := values[subscript + 1]
else
Result := Values[subscript + 1] + fraction * (Values[subscript + 2] - values[subscript + 1]);
6 : Result := values[subscript];
7 : if (fraction = 0.0) then
Result := values[subscript]
else
Result := fraction * Values[subscript] + (1.0 - fraction) * Values[subscript + 1];
8 : begin
if (fraction = 0.0) then Result := values[subscript];
if (fraction = 0.5) then Result := 0.5 * (values[subscript] + values[subscript + 1]);
if (fraction < 0.5) then Result := values[subscript];
if (fraction > 0.5) then Result := values[subscript + 1];
end;
whole := WholeValue(np);
fraction := FractionValue(np);
subscript := Trunc(whole) - 1;
avalue := values[subscript];
avalue1 := values[subscript + 1];
case TypeQ of
1 : Myresult := ((1.0 - fraction) * values[subscript]) +
fraction * values[subscript + 1];
2 : Myresult := ((1.0 - fraction) * avalue) +
fraction * avalue1; // values[subscript + 1];
3 : if (fraction = 0.0) then Myresult := values[subscript]
else Myresult := values[subscript + 1];
4 : if (fraction = 0.0) then Myresult := 0.5 * (values[subscript] + values[subscript + 1])
else Myresult := values[subscript + 1];
5 : if (fraction = 0.0) then Myresult := values[subscript + 1]
else Myresult := values[subscript + 1] + fraction * (values[subscript + 2] -
values[subscript + 1]);
6 : Myresult := values[subscript];
7 : if (fraction = 0.0) then Myresult := values[subscript]
else Myresult := fraction * values[subscript] +
(1.0 - fraction) * values[subscript + 1];
8 : begin
if (fraction = 0.0) then Myresult := values[subscript];
if (fraction = 0.5) then Myresult := 0.5 * (values[subscript] + values[subscript + 1]);
if (fraction < 0.5) then Myresult := values[subscript];
if (fraction > 0.5) then Myresult := values[subscript + 1];
end;
end;
Result := Myresult;
end;
end;
function KolmogorovProb(z : double) : double;