LazStats: More general usability of the calculations in DescriptiveUnit

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7739 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-10-03 21:49:09 +00:00
parent a38794b4f8
commit 6f5659920f
5 changed files with 359 additions and 309 deletions

View File

@ -1563,6 +1563,11 @@
<DebugInfoType Value="dsDwarf2"/> <DebugInfoType Value="dsDwarf2"/>
</Debugging> </Debugging>
<LinkSmart Value="True"/> <LinkSmart Value="True"/>
<Options>
<Win32>
<GraphicApplication Value="True"/>
</Win32>
</Options>
</Linking> </Linking>
</CompilerOptions> </CompilerOptions>
<Debugging> <Debugging>

View File

@ -15,48 +15,52 @@ type
{ TDescriptiveStats } { TDescriptiveStats }
TDescriptiveOption = (doAlternativeQuartiles, doPercentileRanks, doCasewiseDeletion);
TDescriptiveOptions = set of TDescriptiveOption;
TQuartileMethod = 1..8; TQuartileMethod = 1..8;
TQuartile = 1..3; TQuartile = 1..3;
TQuartiles = array[TQuartileMethod, TQuartile] of Double;
TDescriptiveStats = class TDescriptiveStats = class
private private
FDataGrid: TStringGrid; FDataGrid: TStringGrid;
FColIndex: Integer; FColIndex: Integer;
FConfLevel: Double; // usually 0.95
FColsSelected: IntDyneVec; FColsSelected: IntDyneVec;
FMean, FStdErrorMean, FDeltaMean: Double; FValues: DblDyneVec;
FMean, FStdErrorMean: Double;
FMin, FMax: Double; FMin, FMax: Double;
FSum: Double; FSum: Double;
FVariance, FStdDev: Double; FVariance, FStdDev: Double;
FSkew, FStdErrorSkew: Double; FSkew, FStdErrorSkew: Double;
FKurtosis, FStdErrorKurtosis: Double; FKurtosis, FStdErrorKurtosis: Double;
FFirstQuartile, FMedian, FThirdQuartile: Double; FFirstQuartile, FMedian, FThirdQuartile: Double;
FCategoryValues, FPercentiles: DblDyneVec;
FFreqValues: IntDyneVec;
FOptions: TDescriptiveOptions;
FNumCases: Integer; FNumCases: Integer;
FQuartiles: array[TQuartileMethod, TQuartile] of Double; procedure Calc_Quartiles;
procedure Calc_AlternativeQuartiles(const AValues: DblDyneVec); procedure Calc_Skew_Kurtosis;
function Calc_DeltaMean(AStdErrorOfMean: Double): Double; // function GetMeanLimits(AIndex: Integer; AConfLevel: Double): Double;
procedure Calc_Moments(const AValues: DblDyneVec; AMean: Double; function GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
out M2, M3, M4: Double); procedure Reset;
procedure Calc_Quartiles(const AValues: DblDyneVec; out Q1, Median, Q3: Double);
procedure Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; ANumCases: Integer;
out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
procedure Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec;
out ASum, ASumOfSquares, AMin, AMax: Double);
procedure CollectValues(out AValues: DblDyneVec);
procedure PercentileRank(const AValues: DblDyneVec;
out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
public public
constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec; AConfLevel: Double); constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec = nil);
procedure Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions); procedure Analyze(AColIndex: Integer);
procedure WriteToReport(AVarName: String; ADecPlaces: Integer; AReport: TStrings); procedure CalcQuartiles(out AQuartiles: TQuartiles);
procedure PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
property FirstQuartile: Double read FFirstQuartile;
property Kurtosis: Double read FKurtosis;
property Max: Double read FMax;
property Median: Double read FMedian;
property Mean: Double read FMean; property Mean: Double read FMean;
property MeanLowerLimit[AConfLevel: Double]: Double index 0 read GetMeanLimits;
property MeanUpperLimit[AConfLevel: Double]: Double index 1 read GetMeanLimits;
property Min: Double read FMin;
property NumCases: Integer read FNumCases;
property Skew: Double read FSkew;
property StdErrorSkew: Double read FStdErrorSkew;
property StdDev: Double read FStdDev; property StdDev: Double read FStdDev;
// more can be added... property StdErrorKurtosis: Double read FStdErrorKurtosis;
property StdErrorMean: Double read FStdErrorMean;
property StdErrorSkey: Double read FStdErrorSkew;
property ThirdQuartile: Double read FThirdQuartile;
property Variance: Double read FVariance;
end; end;
@ -90,6 +94,7 @@ type
private private
function GetReportFrame(APageIndex: Integer): TReportFrame; function GetReportFrame(APageIndex: Integer): TReportFrame;
procedure WriteToReport(Stats: TDescriptiveStats; AVarName: String; AReport: TStrings);
procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec; procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec;
AMean, AStdDev: Double); AMean, AStdDev: Double);
@ -113,7 +118,7 @@ implementation
uses uses
Math, Math,
Utils; Utils, MathUnit, GridProcs;
{=============================================================================== {===============================================================================
@ -125,218 +130,149 @@ uses
*==============================================================================} *==============================================================================}
constructor TDescriptiveStats.Create(ADataGrid: TStringGrid; constructor TDescriptiveStats.Create(ADataGrid: TStringGrid;
AColsSelected: IntDyneVec; AConfLevel: Double); AColsSelected: IntDyneVec = nil);
begin begin
inherited Create; inherited Create;
FDataGrid := ADataGrid; FDataGrid := ADataGrid;
FColsSelected := AColsSelected; FColsSelected := AColsSelected;
FConfLevel := AConfLevel; Reset;
end; end;
procedure TDescriptiveStats.Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions); procedure TDescriptiveStats.Analyze(AColIndex: Integer);
var
SS: Double;
values: DblDyneVec;
M2, M3, M4: Double;
begin begin
FMean := NaN; Reset;
FVariance := NaN;
FStdDev := NaN;
FStdErrorMean := NaN;
FDeltaMean := NaN;
FSkew := NaN;
FStdErrorSkew := NaN;
FColIndex := AColIndex; FColIndex := AColIndex;
FOptions := AOptions;
CollectValues(values); if Length(FColsSelected) = 0 then
FNumCases := Length(values); FValues := CollectValues(FDataGrid, AColIndex, FColsSelected)
else
FValues := CollectValues(FDataGrid, AColIndex);
FNumCases := Length(FValues);
SortOnX(values); SortOnX(FValues);
MathUnit.Calc_MaxMin(FValues, FMax, FMin);
MathUnit.Calc_MeanVarStdDev(FValues, FMean, FVariance, FStdDev);
Calc_Sum_SumOfSquares_Min_Max(values, FSum, SS, FMin, FMax);
if FNumCases > 0 then begin
FMean := FSum / FNumCases;
if FNumCases > 1 then if FNumCases > 1 then
begin
FVariance := (SS - sqr(FSum) / FNumCases) / (FNumCases - 1);
FStdDev := sqrt(FVariance);
FStdErrorMean := sqrt(FVariance / FNumCases); FStdErrorMean := sqrt(FVariance / FNumCases);
FDeltaMean := Calc_DeltaMean(FStdErrorMean);
end;
Calc_Moments(values, FMean, M2, M3, M4); Calc_Skew_Kurtosis;
Calc_Skew_Kurtosis(FStdDev, M2, M3, M4, FNumCases, FSkew, FStdErrorSkew, FKurtosis, FStdErrorKurtosis); Calc_Quartiles;
Calc_Quartiles(values, FFirstQuartile, FMedian, FThirdQuartile);
if (doAlternativeQuartiles in FOptions) then
Calc_AlternativeQuartiles(values);
if (doPercentileRanks in FOptions) then
PercentileRank(values, FCategoryValues, FPercentiles, FFreqValues);
end;
end; end;
procedure TDescriptiveStats.Calc_AlternativeQuartiles(const AValues: DblDyneVec); procedure TDescriptiveStats.Calc_Quartiles;
begin
FFirstQuartile := Quartiles(2, 0.25, FNumCases, FValues);
FMedian := Quartiles(2, 0.5, FNumCases, FValues);
FThirdQuartile := Quartiles(2, 0.75, FNumCases, FValues);
end;
procedure TDescriptiveStats.CalcQuartiles(out AQuartiles: TQuartiles);
var var
nCases: Integer;
q: TQuartile; q: TQuartile;
m: TQuartileMethod; m: TQuartileMethod;
begin begin
nCases := Length(AValues);
for m := Low(TQuartileMethod) to High(TQuartileMethod) do for m := Low(TQuartileMethod) to High(TQuartileMethod) do
for q := Low(TQuartile) to High(TQuartile) do for q := Low(TQuartile) to High(TQuartile) do
FQuartiles[m, q] := Quartiles(m, 0.25*q, nCases, AValues); AQuartiles[m, q] := Quartiles(m, 0.25*q, FNumCases, FValues);
end; end;
// Tolerance around the mean procedure TDescriptiveStats.Calc_Skew_Kurtosis;
function TDescriptiveStats.Calc_DeltaMean(AStdErrorOfMean: Double): Double;
var
alpha: Double;
confLev: Double;
DOF: Integer;
begin
alpha := (1 - FConfLevel) / 2;
confLev := 1 - alpha;
if FNumCases < 120 then procedure Moments(out M2, M3, M4: Double);
begin var
DOF := FNumCases - 1;
Result := AStdErrorOfMean * InverseT(confLev, DOF);
end else
Result := AStdErrorOfMean * InverseZ(confLev);
end;
procedure TDescriptiveStats.Calc_Moments(const AValues: DblDyneVec;
AMean: Double; out M2, M3, M4: Double);
var
i: Integer; i: Integer;
dev, devSqr: Double; dev, devSqr: Double;
begin begin
M2 := 0; M2 := 0;
M3 := 0; M3 := 0;
M4 := 0; M4 := 0;
for i := 0 to High(AValues) do for i := 0 to High(FValues) do
begin begin
dev := AValues[i] - AMean; dev := FValues[i] - FMean;
devSqr := Sqr(dev); devSqr := Sqr(dev);
M2 := M2 + devSqr; M2 := M2 + devSqr;
M3 := M3 + dev * devSqr; M3 := M3 + dev * devSqr;
M4 := M4 + sqr(devSqr); M4 := M4 + sqr(devSqr);
end; end;
end; end;
procedure TDescriptiveStats.Calc_Quartiles(const AValues: DblDyneVec;
out Q1, Median, Q3: Double);
var
n: Integer;
begin
n := Length(AValues);
Q1 := Quartiles(2, 0.25, n, AValues);
Median := Quartiles(2, 0.5, n, AValues);
Q3 := Quartiles(2, 0.75, n, AValues);
end;
procedure TDescriptiveStats.Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double;
ANumCases: Integer; out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
var var
num, denom: Double; num, denom: Double;
stdDev3, stdDev4: Double; stdDev3, stdDev4: Double;
M2, M3, M4: Double;
begin begin
Skew := NaN; FSkew := NaN;
StdErrorSkew := NaN; FStdErrorSkew := NaN;
Kurtosis := NaN; FKurtosis := NaN;
StdErrorKurtosis := NaN; FStdErrorKurtosis := NaN;
stdDev3 := StdDev * StdDev * StdDev; if FNumCases < 2 then
stdDev4 := StdDev3 * StdDev; exit;
if ANumCases > 2 then stdDev3 := FStdDev * FStdDev * FStdDev;
stdDev4 := StdDev3 * FStdDev;
Moments(M2, M3, M4);
if FNumCases > 2 then
begin begin
Skew := ANumCases * M3 / ((ANumCases - 1) * (ANumCases - 3) * stdDev3); FSkew := FNumCases * M3 / ((FNumCases - 1) * (FNumCases - 3) * stdDev3);
num := 6.0 * ANumCases * (ANumCases - 1); num := 6.0 * FNumCases * (FNumCases - 1);
denom := (ANumCases - 2) * (ANumCases + 1) * (ANumCases + 3); denom := (FNumCases - 2) * (FNumCases + 1) * (FNumCases + 3);
StdErrorSkew := sqrt(num / denom); FStdErrorSkew := sqrt(num / denom);
end; end;
if ANumCases > 3 then if FNumCases > 3 then
begin begin
num := ANumCases * (ANumCases + 1) * M4 - 3 * M2 * M2 * (ANumCases - 1); num := FNumCases * (FNumCases + 1) * M4 - 3 * M2 * M2 * (FNumCases - 1);
denom := (ANumCases - 1) * (ANumCases - 2) * (ANumCases - 3) * stdDev4; denom := (FNumCases - 1) * (FNumCases - 2) * (FNumCases - 3) * stdDev4;
Kurtosis := num / denom; FKurtosis := num / denom;
num := 4.0 * (sqr(ANumCases) - 1) * sqr(StdErrorSkew); num := 4.0 * (sqr(FNumCases) - 1) * sqr(FStdErrorSkew);
denom := (ANumCases - 3) * (ANumCases + 5); denom := (FNumCases - 3) * (FNumCases + 5);
StdErrorKurtosis := sqrt(num / denom); FStdErrorKurtosis := sqrt(num / denom);
end; end;
end; end;
procedure TDescriptiveStats.Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec; function TDescriptiveStats.GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
out ASum, ASumOfSquares, AMin, AMax: Double);
var var
i: Integer; alpha: Double;
confLev: Double;
DOF: Integer;
delta: Double;
begin begin
ASum := 0.0; alpha := (1 - AConfLevel) / 2;
ASumOfSquares := 0; confLev := 1 - alpha;
AMin := Infinity;
AMax := -Infinity;
for i := 0 to High(AValues) do if FNumCases < 120 then
begin begin
ASum := ASum + AValues[i]; DOF := FNumCases - 1;
ASumOfSquares := ASumOfSquares + sqr(AValues[i]); delta := FStdErrorMean * InverseT(confLev, DOF);
if AValues[i] < AMin then AMin := AValues[i];
if AValues[i] > AMax then AMax := AValues[i];
end;
end;
procedure TDescriptiveStats.CollectValues(out AValues: DblDyneVec);
var
i, n: Integer;
begin
AValues := nil; // silence the compiler
SetLength(AValues, NoCases);
n := 0;
for i := 1 to NoCases do
begin
if (doCasewiseDeletion in FOptions) then
begin
// Do not consider a case when any variable is empty
if not ValidValue(i, FColIndex) then
continue;
end else end else
begin delta := FStdErrorMean * InverseZ(confLev);
// Do not consider a case when the current variable is empty
if not GoodRecord(i, Length(FColsSelected), FColsSelected) then
continue;
end;
if TryStrToFloat(FDataGrid.Cells[FColIndex, i], AValues[n]) then case AIndex of
inc(n) 0: Result := FMean - delta;
else 1: Result := FMean + delta;
raise Exception.CreateFmt('Invalid number: variable "%s", case "%s"',
[FDataGrid.cells[FColIndex, 0], FDataGrid.Cells[0, i]]);
end; end;
SetLength(AValues, n);
end; end;
// Computes the percentile ranks of values stored in the data grid at the // Computes the percentile ranks of values stored in the data grid at the
// loaded columns. The values are assumed to be sorted. // loaded columns. The values are assumed to be sorted.
procedure TDescriptiveStats.PercentileRank(const AValues: DblDyneVec; out procedure TDescriptiveStats.PercentileRank(out ACategoryValues, APercentiles: DblDyneVec;
ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); out AFreq: IntDyneVec);
var var
i, nCases, iCat, nCategories: Integer; i, iCat, nCategories: Integer;
lastCategoryValue: Double; lastCategoryValue: Double;
cumFreqCentered: Double; cumFreqCentered: Double;
cumFreq: Integer; cumFreq: Integer;
@ -346,24 +282,23 @@ begin
AFreq := nil; AFreq := nil;
APercentiles := nil; APercentiles := nil;
nCases := Length(AValues); SetLength(ACategoryValues, FNumCases); // over-dimension; will be trimmed later
SetLength(ACategoryValues, nCases); // over-dimension; will be trimmed later SetLength(AFreq, FNumCases);
SetLength(AFreq, nCases);
// Get count of unique values and frequencies of each // Get count of unique values and frequencies of each
lastCategoryValue := AValues[0]; lastCategoryValue := FValues[0];
ACategoryValues[0] := lastCategoryValue; ACategoryValues[0] := lastCategoryValue;
AFreq[0] := 1; AFreq[0] := 1;
iCat := 0; iCat := 0;
for i := 1 to nCases-1 do for i := 1 to FNumCases-1 do
begin begin
if (lastCategoryValue = AValues[i]) then if (lastCategoryValue = FValues[i]) then
AFreq[iCat] := AFreq[iCat] + 1 AFreq[iCat] := AFreq[iCat] + 1
else else
begin // new value begin // new value
inc(iCat); inc(iCat);
AFreq[iCat] := 1; AFreq[iCat] := 1;
lastCategoryValue := AValues[i]; lastCategoryValue := FValues[i];
ACategoryValues[iCat] := lastCategoryValue; ACategoryValues[iCat] := lastCategoryValue;
end; end;
end; end;
@ -375,94 +310,32 @@ begin
// Get cumulative frequencies and percentile ranks // Get cumulative frequencies and percentile ranks
SetLength(APercentiles, nCategories); SetLength(APercentiles, nCategories);
APercentiles[0] := AFreq[0] * 0.5 / nCases; APercentiles[0] := AFreq[0] * 0.5 / FNumCases;
cumFreq := AFreq[0]; cumFreq := AFreq[0];
for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1 for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1
begin begin
cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point
APercentiles[i] := cumFreqCentered / nCases; APercentiles[i] := cumFreqCentered / FNumCases;
cumFreq := cumFreq + AFreq[i]; cumFreq := cumFreq + AFreq[i];
end; end;
end; end;
procedure TDescriptiveStats.WriteToReport(AVarName: String; ADecPlaces: Integer; procedure TDescriptiveStats.Reset;
AReport: TStrings);
var
w: Integer;
nCategories: Integer;
i: Integer;
cumFreq: Integer;
m: TQuartileMethod;
begin begin
w := 10 + ADecPlaces - 3; FValues := nil;
AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']); FMean := NaN;
AReport.Add(''); FStdErrorMean := NaN;
AReport.Add('Number of cases: %*d', [W, FNumCases]); FMin := NaN;
AReport.Add('Sum: %*.*f', [W, ADecPlaces, FSum]); FMax := NaN;
AReport.Add('Mean: %*.*f', [W, ADecPlaces, FMean]); FVariance := NaN;
AReport.Add('Variance: %*.*f', [W, ADecPlaces, FVariance]); FStdDev := NaN;
AReport.Add('Std.Dev.: %*.*f', [W, ADecPlaces, FStdDev]); FStdErrorMean := NaN;
AReport.Add('Std.Error of Mean %*.*f', [W, ADecPlaces, FStdErrorMean]); FSkew := NaN;
AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [ FStdErrorSkew := NaN;
FConfLevel*100.0, ADecPlaces, FMean - FDeltaMean, ADecPlaces, FMean + FDeltaMean]); FKurtosis := NaN;
AReport.Add(''); FStdErrorKurtosis := NaN;
AReport.Add('Minimum: %*.*f', [W, ADecPlaces, FMin]); FNumCases := 0;
AReport.Add('Maximum: %*.*f', [W, ADecPlaces, FMax]);
AReport.Add('Range: %*.*f', [W, ADecPlaces, FMax - FMin]);
AReport.Add('');
AReport.Add('Skewness: %*.*f', [W, ADecPlaces, FSkew]);
AReport.Add('Std.Error of Skew: %*.*f', [W, ADecPlaces, FStdErrorSkew]);
AReport.Add('Kurtosis: %*.*f', [W, ADecPlaces, FKurtosis]);
AReport.Add('Std. Error of Kurtosis: %*.*f', [W, ADecPlaces, FStdErrorKurtosis]);
AReport.Add('');
AReport.Add('First Quartile: %*.*f', [W, ADecPlaces, FFirstQuartile]);
AReport.Add('Median: %*.*f', [W, ADecPlaces, FMedian]);
AReport.Add('Third Quartile: %*.*f', [W, ADecPlaces, FThirdQuartile]);
AReport.Add('Interquartile range: %*.*f', [W, ADecPlaces, FThirdQuartile - FFirstQuartile]);
if (doAlternativeQuartiles in FOptions) then
begin
AReport.Add('');
AReport.Add('');
AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
AReport.Add('');
AReport.Add('Method First Quartile Median Third Quartile');
AReport.Add('------ -------------- ---------- --------------');
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
AReport.Add(' %d %12.3f %12.3f %12.3f', [m, FQuartiles[m, 1], FQuartiles[m, 2], FQuartiles[m, 3]]);
AReport.Add('');
AReport.Add('NOTES:');
AReport.Add('Method 1 is the weighted average at X[np] where ');
AReport.Add(' n is no. of cases, p is percentile / 100');
AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
AReport.Add('Method 3 is the empirical distribution function.');
AReport.Add('Method 4 is called the empirical distribution function - averaging.');
AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
AReport.Add('Method 6 is the closest observation method.');
AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
AReport.Add('Method 8 was used in an older Microsoft Excel version.');
AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
end;
if (doPercentileRanks in FOptions) then
begin
nCategories := Length(FCategoryValues);
cumFreq := 0;
AReport.Add('');
AReport.Add('');
AReport.Add('PERCENTILE RANKS');
AReport.Add('');
AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
AReport.Add('----------- --------- --------- ---------------');
for i := 0 to nCategories-1 do
begin
cumFreq := cumFreq + FFreqValues[i];
AReport.Add(' %8.3f %8d %8d %12.2f%%', [
FCategoryValues[i], FFreqValues[i], cumFreq, FPercentiles[i]*100.0
]);
end;
end;
end; end;
@ -514,8 +387,7 @@ var
page: TTabSheet; page: TTabSheet;
reportFrame: TReportFrame; reportFrame: TReportFrame;
lReport: TStrings; lReport: TStrings;
lDescrStats: TDescriptiveStats; lStats: TDescriptiveStats;
options: TDescriptiveOptions;
begin begin
noSelected := SelList.Items.Count; noSelected := SelList.Items.Count;
if noSelected = 0 then if noSelected = 0 then
@ -555,29 +427,23 @@ begin
for i := 0 to NoSelected-1 do for i := 0 to NoSelected-1 do
PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0]; PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0];
// Prepare options //
options := [];
if PercentileChk.Checked then Include(options, doPercentileRanks);
if AllQuartilesChk.Checked then Include(options, doAlternativeQuartiles);
if CaseChk.Checked then Include(options, doCasewiseDeletion);
lReport := TStringList.Create; lReport := TStringList.Create;
lDescrStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected, StrToFloat(CIEdit.Text)/100); if not CaseChk.Checked then
lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid)
else
lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected);
try try
for i := 0 to noSelected-1 do for i := 0 to noSelected-1 do
begin begin
// Analyze the data and get descriptive stats // Analyze the data and get descriptive stats
lDescrStats.Analyze(selected[i], options); lStats.Analyze(selected[i]);
// Store z values, (value - mean) / stdDev, to grid, if needed // Store z values, (value - mean) / stdDev, to grid, if needed
zScoresToGrid(selected[i], selected, lDescrStats.Mean, lDescrStats.StdDev); zScoresToGrid(selected[i], selected, lStats.Mean, lStats.StdDev);
// Write descriptive stats to report // Write descriptive stats to report
lReport.Clear; WriteToReport(lStats, trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), lReport);
lReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
lReport.Add('');
lDescrStats.WriteToReport(trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]),
DecPlacesEdit.Value, lReport);
// Display report in the page of the variable // Display report in the page of the variable
reportFrame := GetReportFrame(i); reportFrame := GetReportFrame(i);
@ -585,7 +451,7 @@ begin
end; end;
finally finally
// Clean up // Clean up
lDescrStats.Free; lStats.Free;
lReport.Free; lReport.Free;
end; end;
end; end;
@ -651,7 +517,6 @@ begin
for i := PageControl.PageCount-1 downto 1 do for i := PageControl.PageCount-1 downto 1 do
PageControl.Pages[i].Free; PageControl.Pages[i].Free;
PageControl.Pages[0].Caption := 'Report'; PageControl.Pages[0].Caption := 'Report';
CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT); CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT);
@ -698,12 +563,16 @@ var
F: TReportFrame; F: TReportFrame;
begin begin
inherited; inherited;
(*
for i := 0 to PageControl.PageCount-1 do for i := PageControl.PageCount-1 downto 0 do
begin begin
if i > 0 then
PageControl.Pages[i].Free
else begin
F := GetReportFrame(i); F := GetReportFrame(i);
if Assigned(F) then F.Clear; if Assigned(F) then F.Clear;
end; end;
end; *)
lSelected := false; lSelected := false;
for i := 0 to VarList.Items.Count-1 do for i := 0 to VarList.Items.Count-1 do
@ -733,6 +602,103 @@ begin
end; end;
procedure TDescriptiveFrm.WriteToReport(Stats: TDescriptiveStats;
AVarName: String; AReport: TStrings);
var
w: Integer;
i: Integer;
confLevel: Double;
decPlaces: Integer;
m: TQuartileMethod;
Q: TQuartiles;
cumFreq: Integer;
nCategories: Integer;
categories: DblDyneVec = nil;
freq: IntDyneVec = nil;
percentiles: DblDyneVec = nil;
begin
confLevel := StrToFloat(CIEdit.Text) / 100;
decPlaces := DecPlacesEdit.Value;
w := 10 + decPlaces - 3;
AReport.Clear;
AReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
AReport.Add('');
AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']);
AReport.Add('');
AReport.Add('Number of cases: %*d', [W, Stats.NumCases]);
// AReport.Add('Sum: %*.*f', [W, decPlaces, Stats.Sum]);
AReport.Add('Mean: %*.*f', [W, decPlaces, Stats.Mean]);
AReport.Add('Variance: %*.*f', [W, decPlaces, Stats.Variance]);
AReport.Add('Std.Dev.: %*.*f', [W, decPlaces, Stats.StdDev]);
AReport.Add('Std.Error of Mean %*.*f', [W, decPlaces, Stats.StdErrorMean]);
AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [
confLevel*100.0, decPlaces, Stats.MeanLowerLimit[confLevel], decPlaces, Stats.MeanUpperLimit[confLevel]
]);
AReport.Add('');
AReport.Add('Minimum: %*.*f', [W, decPlaces, Stats.Min]);
AReport.Add('Maximum: %*.*f', [W, decPlaces, Stats.Max]);
AReport.Add('Range: %*.*f', [W, decPlaces, Stats.Max - Stats.Min]);
AReport.Add('');
AReport.Add('Skewness: %*.*f', [W, decPlaces, Stats.Skew]);
AReport.Add('Std.Error of Skew: %*.*f', [W, decPlaces, Stats.StdErrorSkew]);
AReport.Add('Kurtosis: %*.*f', [W, decPlaces, Stats.Kurtosis]);
AReport.Add('Std. Error of Kurtosis: %*.*f', [W, decPlaces, Stats.StdErrorKurtosis]);
AReport.Add('');
AReport.Add('First Quartile: %*.*f', [W, decPlaces, Stats.FirstQuartile]);
AReport.Add('Median: %*.*f', [W, decPlaces, Stats.Median]);
AReport.Add('Third Quartile: %*.*f', [W, decPlaces, Stats.ThirdQuartile]);
AReport.Add('Interquartile range: %*.*f', [W, decPlaces, Stats.ThirdQuartile - Stats.FirstQuartile]);
if AllQuartilesChk.Checked then
begin
Stats.CalcQuartiles(Q);
AReport.Add('');
AReport.Add('');
AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
AReport.Add('');
AReport.Add('Method First Quartile Median Third Quartile');
AReport.Add('------ -------------- ---------- --------------');
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
AReport.Add(' %d %12.3f %12.3f %12.3f', [m, Q[m, 1], Q[m, 2], Q[m, 3]]);
AReport.Add('');
AReport.Add('NOTES:');
AReport.Add('Method 1 is the weighted average at X[np] where ');
AReport.Add(' n is no. of cases, p is percentile / 100');
AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
AReport.Add('Method 3 is the empirical distribution function.');
AReport.Add('Method 4 is called the empirical distribution function - averaging.');
AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
AReport.Add('Method 6 is the closest observation method.');
AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
AReport.Add('Method 8 was used in an older Microsoft Excel version.');
AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
end;
if PercentileChk.Checked then
begin
Stats.PercentileRank(categories, percentiles, freq);
nCategories := Length(categories);
cumFreq := 0;
AReport.Add('');
AReport.Add('');
AReport.Add('PERCENTILE RANKS');
AReport.Add('');
AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
AReport.Add('----------- --------- --------- ---------------');
for i := 0 to nCategories-1 do
begin
cumFreq := cumFreq + freq[i];
AReport.Add(' %8.3f %8d %8d %12.2f%%', [
categories[i], freq[i], cumFreq, percentiles[i]*100.0
]);
end;
end;
end;
procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer; procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer;
const AColsSelected: IntDyneVec; AMean, AStdDev: Double); const AColsSelected: IntDyneVec; AMean, AStdDev: Double);
var var
@ -759,10 +725,10 @@ begin
begin begin
if CaseChk.Checked then if CaseChk.Checked then
begin begin
if not ValidValue(i, AColsSelected[AColIndex]) then continue; if not DataProcs.ValidValue(i, AColsSelected[AColIndex]) then continue;
end end
else else
if not GoodRecord(i, Length(AColsSelected), AColsSelected) then continue; if not DataProcs.GoodRecord(i, Length(AColsSelected), AColsSelected) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]); value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]);
zValue := (value - AMean) / AStdDev; zValue := (value - AMean) / AStdDev;

View File

@ -41,6 +41,8 @@ type
LHelpPath: String; LHelpPath: String;
end; end;
ELazStats = class(Exception);
var var
NoCases : integer; NoCases : integer;
NoVariables : integer; NoVariables : integer;

View File

@ -9,7 +9,7 @@ uses
Globals, DictionaryUnit; Globals, DictionaryUnit;
function CollectValues(AGrid: TStringGrid; AColIndex: Integer; function CollectValues(AGrid: TStringGrid; AColIndex: Integer;
AColCheck: IntDyneVec): DblDyneVec; AColCheck: IntDyneVec = nil): DblDyneVec;
procedure GetMinMax(AGrid: TStringGrid; AColIndex: Integer; procedure GetMinMax(AGrid: TStringGrid; AColIndex: Integer;
const AColCheck: IntDyneVec; out AMin, AMax: Double); const AColCheck: IntDyneVec; out AMin, AMax: Double);
@ -42,6 +42,7 @@ uses
function CollectValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec; function CollectValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec;
var var
row, n: Integer; row, n: Integer;
val: Double;
begin begin
SetLength(Result, AGrid.RowCount); SetLength(Result, AGrid.RowCount);
n := 0; n := 0;
@ -54,7 +55,11 @@ begin
begin begin
if not GoodRecord(AGrid, row, AColCheck) then continue; if not GoodRecord(AGrid, row, AColCheck) then continue;
end; end;
Result[n] := StrToFloat(trim(AGrid.Cells[AColIndex, row])); if TryStrToFloat(trim(AGrid.Cells[AColIndex, row]), val) then
Result[n] := val
else
raise ELazStats.CreateFmt('Non-numeric string "%s" in column %d, row %d',
[AGrid.Cells[AColIndex, row], AColIndex, row]);
inc(n); inc(n);
end; end;
SetLength(Result, n); SetLength(Result, n);

View File

@ -7,7 +7,8 @@ unit MathUnit;
interface interface
uses uses
Classes, SysUtils; Classes, SysUtils,
Globals;
const const
TWO_PI = 2.0 * PI; TWO_PI = 2.0 * PI;
@ -43,6 +44,13 @@ function FactorialLn(n: Integer): Double;
function PoissonPDF(n: integer; a: double): Double; function PoissonPDF(n: integer; a: double): Double;
function PoissonCDF(n: Integer; a: double): Double; function PoissonCDF(n: Integer; a: double): Double;
procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double);
procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double);
procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double);
procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double);
implementation implementation
uses uses
@ -474,6 +482,70 @@ begin
end; end;
{===============================================================================
* Vector-based calculations
===============================================================================}
procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double);
var
i: Integer;
begin
AMin := Infinity;
AMax := -Infinity;
for i := Low(AData) to High(AData) do
begin
if AData[i] < AMin then AMin := AData[i];
if AData[i] > AMax then AMax := AData[i];
end;
end;
procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double);
var
variance: Double;
begin
Calc_MeanVarStdDev(AData, AMean, variance, AStdDev);
end;
procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double);
var
sum, ss: Double;
n: Integer;
begin
AMean := NaN;
AVariance := NaN;
AStdDev := NaN;
n := Length(AData);
if n = 0 then
exit;
Calc_SumSS(AData, sum, ss);
AMean := sum / n;
if n = 1 then
exit;
AVariance := ((ss - sqr(AMean)) / n) / (n - 1);
AStdDev := sqrt(AVariance);
end;
procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double);
var
i: Integer;
begin
Sum := 0;
SS := 0;
for i := Low(AData) to High(AData) do
begin
Sum := Sum + AData[i];
SS := SS + sqr(AData[i]);
end;
end;
initialization initialization
InitFactLn(); InitFactLn();