diff --git a/applications/lazstats/source/LazStats.lpi b/applications/lazstats/source/LazStats.lpi
index f6570482f..48809267d 100644
--- a/applications/lazstats/source/LazStats.lpi
+++ b/applications/lazstats/source/LazStats.lpi
@@ -1563,6 +1563,11 @@
+
+
+
+
+
diff --git a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas
index a4cc868ea..6da4633cc 100644
--- a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas
+++ b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas
@@ -15,48 +15,52 @@ type
{ TDescriptiveStats }
- TDescriptiveOption = (doAlternativeQuartiles, doPercentileRanks, doCasewiseDeletion);
- TDescriptiveOptions = set of TDescriptiveOption;
TQuartileMethod = 1..8;
TQuartile = 1..3;
+ TQuartiles = array[TQuartileMethod, TQuartile] of Double;
TDescriptiveStats = class
private
FDataGrid: TStringGrid;
FColIndex: Integer;
- FConfLevel: Double; // usually 0.95
FColsSelected: IntDyneVec;
- FMean, FStdErrorMean, FDeltaMean: Double;
+ FValues: DblDyneVec;
+ FMean, FStdErrorMean: Double;
FMin, FMax: Double;
FSum: Double;
FVariance, FStdDev: Double;
FSkew, FStdErrorSkew: Double;
FKurtosis, FStdErrorKurtosis: Double;
FFirstQuartile, FMedian, FThirdQuartile: Double;
- FCategoryValues, FPercentiles: DblDyneVec;
- FFreqValues: IntDyneVec;
- FOptions: TDescriptiveOptions;
FNumCases: Integer;
- FQuartiles: array[TQuartileMethod, TQuartile] of Double;
- procedure Calc_AlternativeQuartiles(const AValues: DblDyneVec);
- function Calc_DeltaMean(AStdErrorOfMean: Double): Double;
- procedure Calc_Moments(const AValues: DblDyneVec; AMean: Double;
- out M2, M3, M4: Double);
- procedure Calc_Quartiles(const AValues: DblDyneVec; out Q1, Median, Q3: Double);
- procedure Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; ANumCases: Integer;
- out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
- procedure Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec;
- out ASum, ASumOfSquares, AMin, AMax: Double);
- procedure CollectValues(out AValues: DblDyneVec);
- procedure PercentileRank(const AValues: DblDyneVec;
- out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
+ procedure Calc_Quartiles;
+ procedure Calc_Skew_Kurtosis;
+// function GetMeanLimits(AIndex: Integer; AConfLevel: Double): Double;
+ function GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
+ procedure Reset;
public
- constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec; AConfLevel: Double);
- procedure Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions);
- procedure WriteToReport(AVarName: String; ADecPlaces: Integer; AReport: TStrings);
+ constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec = nil);
+ procedure Analyze(AColIndex: Integer);
+ procedure CalcQuartiles(out AQuartiles: TQuartiles);
+ procedure PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
+
+ property FirstQuartile: Double read FFirstQuartile;
+ property Kurtosis: Double read FKurtosis;
+ property Max: Double read FMax;
+ property Median: Double read FMedian;
property Mean: Double read FMean;
+ property MeanLowerLimit[AConfLevel: Double]: Double index 0 read GetMeanLimits;
+ property MeanUpperLimit[AConfLevel: Double]: Double index 1 read GetMeanLimits;
+ property Min: Double read FMin;
+ property NumCases: Integer read FNumCases;
+ property Skew: Double read FSkew;
+ property StdErrorSkew: Double read FStdErrorSkew;
property StdDev: Double read FStdDev;
- // more can be added...
+ property StdErrorKurtosis: Double read FStdErrorKurtosis;
+ property StdErrorMean: Double read FStdErrorMean;
+ property StdErrorSkey: Double read FStdErrorSkew;
+ property ThirdQuartile: Double read FThirdQuartile;
+ property Variance: Double read FVariance;
end;
@@ -90,6 +94,7 @@ type
private
function GetReportFrame(APageIndex: Integer): TReportFrame;
+ procedure WriteToReport(Stats: TDescriptiveStats; AVarName: String; AReport: TStrings);
procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec;
AMean, AStdDev: Double);
@@ -113,7 +118,7 @@ implementation
uses
Math,
- Utils;
+ Utils, MathUnit, GridProcs;
{===============================================================================
@@ -125,218 +130,149 @@ uses
*==============================================================================}
constructor TDescriptiveStats.Create(ADataGrid: TStringGrid;
- AColsSelected: IntDyneVec; AConfLevel: Double);
+ AColsSelected: IntDyneVec = nil);
begin
inherited Create;
FDataGrid := ADataGrid;
FColsSelected := AColsSelected;
- FConfLevel := AConfLevel;
+ Reset;
end;
-procedure TDescriptiveStats.Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions);
-var
- SS: Double;
- values: DblDyneVec;
- M2, M3, M4: Double;
+procedure TDescriptiveStats.Analyze(AColIndex: Integer);
begin
- FMean := NaN;
- FVariance := NaN;
- FStdDev := NaN;
- FStdErrorMean := NaN;
- FDeltaMean := NaN;
- FSkew := NaN;
- FStdErrorSkew := NaN;
+ Reset;
FColIndex := AColIndex;
- FOptions := AOptions;
- CollectValues(values);
- FNumCases := Length(values);
+ if Length(FColsSelected) = 0 then
+ FValues := CollectValues(FDataGrid, AColIndex, FColsSelected)
+ else
+ FValues := CollectValues(FDataGrid, AColIndex);
+ FNumCases := Length(FValues);
- SortOnX(values);
+ SortOnX(FValues);
- Calc_Sum_SumOfSquares_Min_Max(values, FSum, SS, FMin, FMax);
- if FNumCases > 0 then begin
- FMean := FSum / FNumCases;
- if FNumCases > 1 then
+ MathUnit.Calc_MaxMin(FValues, FMax, FMin);
+ MathUnit.Calc_MeanVarStdDev(FValues, FMean, FVariance, FStdDev);
+
+ if FNumCases > 1 then
+ FStdErrorMean := sqrt(FVariance / FNumCases);
+
+ Calc_Skew_Kurtosis;
+ Calc_Quartiles;
+end;
+
+
+procedure TDescriptiveStats.Calc_Quartiles;
+begin
+ FFirstQuartile := Quartiles(2, 0.25, FNumCases, FValues);
+ FMedian := Quartiles(2, 0.5, FNumCases, FValues);
+ FThirdQuartile := Quartiles(2, 0.75, FNumCases, FValues);
+end;
+
+
+procedure TDescriptiveStats.CalcQuartiles(out AQuartiles: TQuartiles);
+var
+ q: TQuartile;
+ m: TQuartileMethod;
+begin
+ for m := Low(TQuartileMethod) to High(TQuartileMethod) do
+ for q := Low(TQuartile) to High(TQuartile) do
+ AQuartiles[m, q] := Quartiles(m, 0.25*q, FNumCases, FValues);
+end;
+
+
+procedure TDescriptiveStats.Calc_Skew_Kurtosis;
+
+ procedure Moments(out M2, M3, M4: Double);
+ var
+ i: Integer;
+ dev, devSqr: Double;
+ begin
+ M2 := 0;
+ M3 := 0;
+ M4 := 0;
+ for i := 0 to High(FValues) do
begin
- FVariance := (SS - sqr(FSum) / FNumCases) / (FNumCases - 1);
- FStdDev := sqrt(FVariance);
- FStdErrorMean := sqrt(FVariance / FNumCases);
- FDeltaMean := Calc_DeltaMean(FStdErrorMean);
+ dev := FValues[i] - FMean;
+ devSqr := Sqr(dev);
+ M2 := M2 + devSqr;
+ M3 := M3 + dev * devSqr;
+ M4 := M4 + sqr(devSqr);
end;
+ end;
- Calc_Moments(values, FMean, M2, M3, M4);
- Calc_Skew_Kurtosis(FStdDev, M2, M3, M4, FNumCases, FSkew, FStdErrorSkew, FKurtosis, FStdErrorKurtosis);
+var
+ num, denom: Double;
+ stdDev3, stdDev4: Double;
+ M2, M3, M4: Double;
+begin
+ FSkew := NaN;
+ FStdErrorSkew := NaN;
+ FKurtosis := NaN;
+ FStdErrorKurtosis := NaN;
- Calc_Quartiles(values, FFirstQuartile, FMedian, FThirdQuartile);
+ if FNumCases < 2 then
+ exit;
- if (doAlternativeQuartiles in FOptions) then
- Calc_AlternativeQuartiles(values);
- if (doPercentileRanks in FOptions) then
- PercentileRank(values, FCategoryValues, FPercentiles, FFreqValues);
+ stdDev3 := FStdDev * FStdDev * FStdDev;
+ stdDev4 := StdDev3 * FStdDev;
+
+ Moments(M2, M3, M4);
+
+ if FNumCases > 2 then
+ begin
+ FSkew := FNumCases * M3 / ((FNumCases - 1) * (FNumCases - 3) * stdDev3);
+
+ num := 6.0 * FNumCases * (FNumCases - 1);
+ denom := (FNumCases - 2) * (FNumCases + 1) * (FNumCases + 3);
+ FStdErrorSkew := sqrt(num / denom);
+ end;
+
+ if FNumCases > 3 then
+ begin
+ num := FNumCases * (FNumCases + 1) * M4 - 3 * M2 * M2 * (FNumCases - 1);
+ denom := (FNumCases - 1) * (FNumCases - 2) * (FNumCases - 3) * stdDev4;
+ FKurtosis := num / denom;
+
+ num := 4.0 * (sqr(FNumCases) - 1) * sqr(FStdErrorSkew);
+ denom := (FNumCases - 3) * (FNumCases + 5);
+ FStdErrorKurtosis := sqrt(num / denom);
end;
end;
-procedure TDescriptiveStats.Calc_AlternativeQuartiles(const AValues: DblDyneVec);
-var
- nCases: Integer;
- q: TQuartile;
- m: TQuartileMethod;
-begin
- nCases := Length(AValues);
- for m := Low(TQuartileMethod) to High(TQuartileMethod) do
- for q := Low(TQuartile) to High(TQuartile) do
- FQuartiles[m, q] := Quartiles(m, 0.25*q, nCases, AValues);
-end;
-
-
-// Tolerance around the mean
-function TDescriptiveStats.Calc_DeltaMean(AStdErrorOfMean: Double): Double;
+function TDescriptiveStats.GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
var
alpha: Double;
confLev: Double;
DOF: Integer;
+ delta: Double;
begin
- alpha := (1 - FConfLevel) / 2;
+ alpha := (1 - AConfLevel) / 2;
confLev := 1 - alpha;
if FNumCases < 120 then
begin
DOF := FNumCases - 1;
- Result := AStdErrorOfMean * InverseT(confLev, DOF);
+ delta := FStdErrorMean * InverseT(confLev, DOF);
end else
- Result := AStdErrorOfMean * InverseZ(confLev);
-end;
+ delta := FStdErrorMean * InverseZ(confLev);
-
-procedure TDescriptiveStats.Calc_Moments(const AValues: DblDyneVec;
- AMean: Double; out M2, M3, M4: Double);
-var
- i: Integer;
- dev, devSqr: Double;
-begin
- M2 := 0;
- M3 := 0;
- M4 := 0;
- for i := 0 to High(AValues) do
- begin
- dev := AValues[i] - AMean;
- devSqr := Sqr(dev);
- M2 := M2 + devSqr;
- M3 := M3 + dev * devSqr;
- M4 := M4 + sqr(devSqr);
+ case AIndex of
+ 0: Result := FMean - delta;
+ 1: Result := FMean + delta;
end;
end;
-procedure TDescriptiveStats.Calc_Quartiles(const AValues: DblDyneVec;
- out Q1, Median, Q3: Double);
-var
- n: Integer;
-begin
- n := Length(AValues);
- Q1 := Quartiles(2, 0.25, n, AValues);
- Median := Quartiles(2, 0.5, n, AValues);
- Q3 := Quartiles(2, 0.75, n, AValues);
-end;
-
-
-procedure TDescriptiveStats.Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double;
- ANumCases: Integer; out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double);
-var
- num, denom: Double;
- stdDev3, stdDev4: Double;
-begin
- Skew := NaN;
- StdErrorSkew := NaN;
- Kurtosis := NaN;
- StdErrorKurtosis := NaN;
-
- stdDev3 := StdDev * StdDev * StdDev;
- stdDev4 := StdDev3 * StdDev;
-
- if ANumCases > 2 then
- begin
- Skew := ANumCases * M3 / ((ANumCases - 1) * (ANumCases - 3) * stdDev3);
-
- num := 6.0 * ANumCases * (ANumCases - 1);
- denom := (ANumCases - 2) * (ANumCases + 1) * (ANumCases + 3);
- StdErrorSkew := sqrt(num / denom);
- end;
-
- if ANumCases > 3 then
- begin
- num := ANumCases * (ANumCases + 1) * M4 - 3 * M2 * M2 * (ANumCases - 1);
- denom := (ANumCases - 1) * (ANumCases - 2) * (ANumCases - 3) * stdDev4;
- Kurtosis := num / denom;
-
- num := 4.0 * (sqr(ANumCases) - 1) * sqr(StdErrorSkew);
- denom := (ANumCases - 3) * (ANumCases + 5);
- StdErrorKurtosis := sqrt(num / denom);
- end;
-end;
-
-
-procedure TDescriptiveStats.Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec;
- out ASum, ASumOfSquares, AMin, AMax: Double);
-var
- i: Integer;
-begin
- ASum := 0.0;
- ASumOfSquares := 0;
- AMin := Infinity;
- AMax := -Infinity;
-
- for i := 0 to High(AValues) do
- begin
- ASum := ASum + AValues[i];
- ASumOfSquares := ASumOfSquares + sqr(AValues[i]);
- if AValues[i] < AMin then AMin := AValues[i];
- if AValues[i] > AMax then AMax := AValues[i];
- end;
-end;
-
-
-procedure TDescriptiveStats.CollectValues(out AValues: DblDyneVec);
-var
- i, n: Integer;
-begin
- AValues := nil; // silence the compiler
- SetLength(AValues, NoCases);
- n := 0;
- for i := 1 to NoCases do
- begin
- if (doCasewiseDeletion in FOptions) then
- begin
- // Do not consider a case when any variable is empty
- if not ValidValue(i, FColIndex) then
- continue;
- end else
- begin
- // Do not consider a case when the current variable is empty
- if not GoodRecord(i, Length(FColsSelected), FColsSelected) then
- continue;
- end;
-
- if TryStrToFloat(FDataGrid.Cells[FColIndex, i], AValues[n]) then
- inc(n)
- else
- raise Exception.CreateFmt('Invalid number: variable "%s", case "%s"',
- [FDataGrid.cells[FColIndex, 0], FDataGrid.Cells[0, i]]);
- end;
- SetLength(AValues, n);
-end;
-
-
// Computes the percentile ranks of values stored in the data grid at the
// loaded columns. The values are assumed to be sorted.
-procedure TDescriptiveStats.PercentileRank(const AValues: DblDyneVec; out
- ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
+procedure TDescriptiveStats.PercentileRank(out ACategoryValues, APercentiles: DblDyneVec;
+ out AFreq: IntDyneVec);
var
- i, nCases, iCat, nCategories: Integer;
+ i, iCat, nCategories: Integer;
lastCategoryValue: Double;
cumFreqCentered: Double;
cumFreq: Integer;
@@ -346,24 +282,23 @@ begin
AFreq := nil;
APercentiles := nil;
- nCases := Length(AValues);
- SetLength(ACategoryValues, nCases); // over-dimension; will be trimmed later
- SetLength(AFreq, nCases);
+ SetLength(ACategoryValues, FNumCases); // over-dimension; will be trimmed later
+ SetLength(AFreq, FNumCases);
// Get count of unique values and frequencies of each
- lastCategoryValue := AValues[0];
+ lastCategoryValue := FValues[0];
ACategoryValues[0] := lastCategoryValue;
AFreq[0] := 1;
iCat := 0;
- for i := 1 to nCases-1 do
+ for i := 1 to FNumCases-1 do
begin
- if (lastCategoryValue = AValues[i]) then
+ if (lastCategoryValue = FValues[i]) then
AFreq[iCat] := AFreq[iCat] + 1
else
begin // new value
inc(iCat);
AFreq[iCat] := 1;
- lastCategoryValue := AValues[i];
+ lastCategoryValue := FValues[i];
ACategoryValues[iCat] := lastCategoryValue;
end;
end;
@@ -375,94 +310,32 @@ begin
// Get cumulative frequencies and percentile ranks
SetLength(APercentiles, nCategories);
- APercentiles[0] := AFreq[0] * 0.5 / nCases;
+ APercentiles[0] := AFreq[0] * 0.5 / FNumCases;
cumFreq := AFreq[0];
for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1
begin
cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point
- APercentiles[i] := cumFreqCentered / nCases;
+ APercentiles[i] := cumFreqCentered / FNumCases;
cumFreq := cumFreq + AFreq[i];
end;
end;
-procedure TDescriptiveStats.WriteToReport(AVarName: String; ADecPlaces: Integer;
- AReport: TStrings);
-var
- w: Integer;
- nCategories: Integer;
- i: Integer;
- cumFreq: Integer;
- m: TQuartileMethod;
+procedure TDescriptiveStats.Reset;
begin
- w := 10 + ADecPlaces - 3;
- AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']);
- AReport.Add('');
- AReport.Add('Number of cases: %*d', [W, FNumCases]);
- AReport.Add('Sum: %*.*f', [W, ADecPlaces, FSum]);
- AReport.Add('Mean: %*.*f', [W, ADecPlaces, FMean]);
- AReport.Add('Variance: %*.*f', [W, ADecPlaces, FVariance]);
- AReport.Add('Std.Dev.: %*.*f', [W, ADecPlaces, FStdDev]);
- AReport.Add('Std.Error of Mean %*.*f', [W, ADecPlaces, FStdErrorMean]);
- AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [
- FConfLevel*100.0, ADecPlaces, FMean - FDeltaMean, ADecPlaces, FMean + FDeltaMean]);
- AReport.Add('');
- AReport.Add('Minimum: %*.*f', [W, ADecPlaces, FMin]);
- AReport.Add('Maximum: %*.*f', [W, ADecPlaces, FMax]);
- AReport.Add('Range: %*.*f', [W, ADecPlaces, FMax - FMin]);
- AReport.Add('');
- AReport.Add('Skewness: %*.*f', [W, ADecPlaces, FSkew]);
- AReport.Add('Std.Error of Skew: %*.*f', [W, ADecPlaces, FStdErrorSkew]);
- AReport.Add('Kurtosis: %*.*f', [W, ADecPlaces, FKurtosis]);
- AReport.Add('Std. Error of Kurtosis: %*.*f', [W, ADecPlaces, FStdErrorKurtosis]);
- AReport.Add('');
- AReport.Add('First Quartile: %*.*f', [W, ADecPlaces, FFirstQuartile]);
- AReport.Add('Median: %*.*f', [W, ADecPlaces, FMedian]);
- AReport.Add('Third Quartile: %*.*f', [W, ADecPlaces, FThirdQuartile]);
- AReport.Add('Interquartile range: %*.*f', [W, ADecPlaces, FThirdQuartile - FFirstQuartile]);
-
- if (doAlternativeQuartiles in FOptions) then
- begin
- AReport.Add('');
- AReport.Add('');
- AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
- AReport.Add('');
- AReport.Add('Method First Quartile Median Third Quartile');
- AReport.Add('------ -------------- ---------- --------------');
- for m := Low(TQuartileMethod) to High(TQuartileMethod) do
- AReport.Add(' %d %12.3f %12.3f %12.3f', [m, FQuartiles[m, 1], FQuartiles[m, 2], FQuartiles[m, 3]]);
- AReport.Add('');
- AReport.Add('NOTES:');
- AReport.Add('Method 1 is the weighted average at X[np] where ');
- AReport.Add(' n is no. of cases, p is percentile / 100');
- AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
- AReport.Add('Method 3 is the empirical distribution function.');
- AReport.Add('Method 4 is called the empirical distribution function - averaging.');
- AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
- AReport.Add('Method 6 is the closest observation method.');
- AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
- AReport.Add('Method 8 was used in an older Microsoft Excel version.');
- AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
- end;
-
- if (doPercentileRanks in FOptions) then
- begin
- nCategories := Length(FCategoryValues);
- cumFreq := 0;
- AReport.Add('');
- AReport.Add('');
- AReport.Add('PERCENTILE RANKS');
- AReport.Add('');
- AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
- AReport.Add('----------- --------- --------- ---------------');
- for i := 0 to nCategories-1 do
- begin
- cumFreq := cumFreq + FFreqValues[i];
- AReport.Add(' %8.3f %8d %8d %12.2f%%', [
- FCategoryValues[i], FFreqValues[i], cumFreq, FPercentiles[i]*100.0
- ]);
- end;
- end;
+ FValues := nil;
+ FMean := NaN;
+ FStdErrorMean := NaN;
+ FMin := NaN;
+ FMax := NaN;
+ FVariance := NaN;
+ FStdDev := NaN;
+ FStdErrorMean := NaN;
+ FSkew := NaN;
+ FStdErrorSkew := NaN;
+ FKurtosis := NaN;
+ FStdErrorKurtosis := NaN;
+ FNumCases := 0;
end;
@@ -514,8 +387,7 @@ var
page: TTabSheet;
reportFrame: TReportFrame;
lReport: TStrings;
- lDescrStats: TDescriptiveStats;
- options: TDescriptiveOptions;
+ lStats: TDescriptiveStats;
begin
noSelected := SelList.Items.Count;
if noSelected = 0 then
@@ -555,29 +427,23 @@ begin
for i := 0 to NoSelected-1 do
PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0];
- // Prepare options
- options := [];
- if PercentileChk.Checked then Include(options, doPercentileRanks);
- if AllQuartilesChk.Checked then Include(options, doAlternativeQuartiles);
- if CaseChk.Checked then Include(options, doCasewiseDeletion);
-
+ //
lReport := TStringList.Create;
- lDescrStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected, StrToFloat(CIEdit.Text)/100);
+ if not CaseChk.Checked then
+ lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid)
+ else
+ lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected);
try
for i := 0 to noSelected-1 do
begin
// Analyze the data and get descriptive stats
- lDescrStats.Analyze(selected[i], options);
+ lStats.Analyze(selected[i]);
// Store z values, (value - mean) / stdDev, to grid, if needed
- zScoresToGrid(selected[i], selected, lDescrStats.Mean, lDescrStats.StdDev);
+ zScoresToGrid(selected[i], selected, lStats.Mean, lStats.StdDev);
// Write descriptive stats to report
- lReport.Clear;
- lReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
- lReport.Add('');
- lDescrStats.WriteToReport(trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]),
- DecPlacesEdit.Value, lReport);
+ WriteToReport(lStats, trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), lReport);
// Display report in the page of the variable
reportFrame := GetReportFrame(i);
@@ -585,7 +451,7 @@ begin
end;
finally
// Clean up
- lDescrStats.Free;
+ lStats.Free;
lReport.Free;
end;
end;
@@ -651,7 +517,6 @@ begin
for i := PageControl.PageCount-1 downto 1 do
PageControl.Pages[i].Free;
-
PageControl.Pages[0].Caption := 'Report';
CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT);
@@ -698,12 +563,16 @@ var
F: TReportFrame;
begin
inherited;
-
- for i := 0 to PageControl.PageCount-1 do
+ (*
+ for i := PageControl.PageCount-1 downto 0 do
begin
- F := GetReportFrame(i);
- if Assigned(F) then F.Clear;
- end;
+ if i > 0 then
+ PageControl.Pages[i].Free
+ else begin
+ F := GetReportFrame(i);
+ if Assigned(F) then F.Clear;
+ end;
+ end; *)
lSelected := false;
for i := 0 to VarList.Items.Count-1 do
@@ -733,6 +602,103 @@ begin
end;
+procedure TDescriptiveFrm.WriteToReport(Stats: TDescriptiveStats;
+ AVarName: String; AReport: TStrings);
+var
+ w: Integer;
+ i: Integer;
+ confLevel: Double;
+ decPlaces: Integer;
+ m: TQuartileMethod;
+ Q: TQuartiles;
+ cumFreq: Integer;
+ nCategories: Integer;
+ categories: DblDyneVec = nil;
+ freq: IntDyneVec = nil;
+ percentiles: DblDyneVec = nil;
+begin
+ confLevel := StrToFloat(CIEdit.Text) / 100;
+ decPlaces := DecPlacesEdit.Value;
+ w := 10 + decPlaces - 3;
+
+ AReport.Clear;
+
+ AReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
+ AReport.Add('');
+
+ AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']);
+ AReport.Add('');
+ AReport.Add('Number of cases: %*d', [W, Stats.NumCases]);
+// AReport.Add('Sum: %*.*f', [W, decPlaces, Stats.Sum]);
+ AReport.Add('Mean: %*.*f', [W, decPlaces, Stats.Mean]);
+ AReport.Add('Variance: %*.*f', [W, decPlaces, Stats.Variance]);
+ AReport.Add('Std.Dev.: %*.*f', [W, decPlaces, Stats.StdDev]);
+ AReport.Add('Std.Error of Mean %*.*f', [W, decPlaces, Stats.StdErrorMean]);
+ AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [
+ confLevel*100.0, decPlaces, Stats.MeanLowerLimit[confLevel], decPlaces, Stats.MeanUpperLimit[confLevel]
+ ]);
+ AReport.Add('');
+ AReport.Add('Minimum: %*.*f', [W, decPlaces, Stats.Min]);
+ AReport.Add('Maximum: %*.*f', [W, decPlaces, Stats.Max]);
+ AReport.Add('Range: %*.*f', [W, decPlaces, Stats.Max - Stats.Min]);
+ AReport.Add('');
+ AReport.Add('Skewness: %*.*f', [W, decPlaces, Stats.Skew]);
+ AReport.Add('Std.Error of Skew: %*.*f', [W, decPlaces, Stats.StdErrorSkew]);
+ AReport.Add('Kurtosis: %*.*f', [W, decPlaces, Stats.Kurtosis]);
+ AReport.Add('Std. Error of Kurtosis: %*.*f', [W, decPlaces, Stats.StdErrorKurtosis]);
+ AReport.Add('');
+ AReport.Add('First Quartile: %*.*f', [W, decPlaces, Stats.FirstQuartile]);
+ AReport.Add('Median: %*.*f', [W, decPlaces, Stats.Median]);
+ AReport.Add('Third Quartile: %*.*f', [W, decPlaces, Stats.ThirdQuartile]);
+ AReport.Add('Interquartile range: %*.*f', [W, decPlaces, Stats.ThirdQuartile - Stats.FirstQuartile]);
+
+ if AllQuartilesChk.Checked then
+ begin
+ Stats.CalcQuartiles(Q);
+ AReport.Add('');
+ AReport.Add('');
+ AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
+ AReport.Add('');
+ AReport.Add('Method First Quartile Median Third Quartile');
+ AReport.Add('------ -------------- ---------- --------------');
+ for m := Low(TQuartileMethod) to High(TQuartileMethod) do
+ AReport.Add(' %d %12.3f %12.3f %12.3f', [m, Q[m, 1], Q[m, 2], Q[m, 3]]);
+ AReport.Add('');
+ AReport.Add('NOTES:');
+ AReport.Add('Method 1 is the weighted average at X[np] where ');
+ AReport.Add(' n is no. of cases, p is percentile / 100');
+ AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
+ AReport.Add('Method 3 is the empirical distribution function.');
+ AReport.Add('Method 4 is called the empirical distribution function - averaging.');
+ AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
+ AReport.Add('Method 6 is the closest observation method.');
+ AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
+ AReport.Add('Method 8 was used in an older Microsoft Excel version.');
+ AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
+ end;
+
+ if PercentileChk.Checked then
+ begin
+ Stats.PercentileRank(categories, percentiles, freq);
+ nCategories := Length(categories);
+ cumFreq := 0;
+ AReport.Add('');
+ AReport.Add('');
+ AReport.Add('PERCENTILE RANKS');
+ AReport.Add('');
+ AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
+ AReport.Add('----------- --------- --------- ---------------');
+ for i := 0 to nCategories-1 do
+ begin
+ cumFreq := cumFreq + freq[i];
+ AReport.Add(' %8.3f %8d %8d %12.2f%%', [
+ categories[i], freq[i], cumFreq, percentiles[i]*100.0
+ ]);
+ end;
+ end;
+end;
+
+
procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer;
const AColsSelected: IntDyneVec; AMean, AStdDev: Double);
var
@@ -759,10 +725,10 @@ begin
begin
if CaseChk.Checked then
begin
- if not ValidValue(i, AColsSelected[AColIndex]) then continue;
+ if not DataProcs.ValidValue(i, AColsSelected[AColIndex]) then continue;
end
else
- if not GoodRecord(i, Length(AColsSelected), AColsSelected) then continue;
+ if not DataProcs.GoodRecord(i, Length(AColsSelected), AColsSelected) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]);
zValue := (value - AMean) / AStdDev;
diff --git a/applications/lazstats/source/units/globals.pas b/applications/lazstats/source/units/globals.pas
index cb9ebbb58..3ed9622bb 100644
--- a/applications/lazstats/source/units/globals.pas
+++ b/applications/lazstats/source/units/globals.pas
@@ -41,6 +41,8 @@ type
LHelpPath: String;
end;
+ ELazStats = class(Exception);
+
var
NoCases : integer;
NoVariables : integer;
diff --git a/applications/lazstats/source/units/gridprocs.pas b/applications/lazstats/source/units/gridprocs.pas
index 97a97b2f1..e921af1b1 100644
--- a/applications/lazstats/source/units/gridprocs.pas
+++ b/applications/lazstats/source/units/gridprocs.pas
@@ -9,7 +9,7 @@ uses
Globals, DictionaryUnit;
function CollectValues(AGrid: TStringGrid; AColIndex: Integer;
- AColCheck: IntDyneVec): DblDyneVec;
+ AColCheck: IntDyneVec = nil): DblDyneVec;
procedure GetMinMax(AGrid: TStringGrid; AColIndex: Integer;
const AColCheck: IntDyneVec; out AMin, AMax: Double);
@@ -42,6 +42,7 @@ uses
function CollectValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec;
var
row, n: Integer;
+ val: Double;
begin
SetLength(Result, AGrid.RowCount);
n := 0;
@@ -54,7 +55,11 @@ begin
begin
if not GoodRecord(AGrid, row, AColCheck) then continue;
end;
- Result[n] := StrToFloat(trim(AGrid.Cells[AColIndex, row]));
+ if TryStrToFloat(trim(AGrid.Cells[AColIndex, row]), val) then
+ Result[n] := val
+ else
+ raise ELazStats.CreateFmt('Non-numeric string "%s" in column %d, row %d',
+ [AGrid.Cells[AColIndex, row], AColIndex, row]);
inc(n);
end;
SetLength(Result, n);
diff --git a/applications/lazstats/source/units/mathunit.pas b/applications/lazstats/source/units/mathunit.pas
index 912e85e9b..d8e97e9a6 100644
--- a/applications/lazstats/source/units/mathunit.pas
+++ b/applications/lazstats/source/units/mathunit.pas
@@ -7,7 +7,8 @@ unit MathUnit;
interface
uses
- Classes, SysUtils;
+ Classes, SysUtils,
+ Globals;
const
TWO_PI = 2.0 * PI;
@@ -43,6 +44,13 @@ function FactorialLn(n: Integer): Double;
function PoissonPDF(n: integer; a: double): Double;
function PoissonCDF(n: Integer; a: double): Double;
+
+procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double);
+procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double);
+procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double);
+procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double);
+
+
implementation
uses
@@ -474,6 +482,70 @@ begin
end;
+
+{===============================================================================
+* Vector-based calculations
+===============================================================================}
+procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double);
+var
+ i: Integer;
+begin
+ AMin := Infinity;
+ AMax := -Infinity;
+ for i := Low(AData) to High(AData) do
+ begin
+ if AData[i] < AMin then AMin := AData[i];
+ if AData[i] > AMax then AMax := AData[i];
+ end;
+end;
+
+
+procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double);
+var
+ variance: Double;
+begin
+ Calc_MeanVarStdDev(AData, AMean, variance, AStdDev);
+end;
+
+
+procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double);
+var
+ sum, ss: Double;
+ n: Integer;
+begin
+ AMean := NaN;
+ AVariance := NaN;
+ AStdDev := NaN;
+
+ n := Length(AData);
+ if n = 0 then
+ exit;
+
+ Calc_SumSS(AData, sum, ss);
+
+ AMean := sum / n;
+ if n = 1 then
+ exit;
+
+ AVariance := ((ss - sqr(AMean)) / n) / (n - 1);
+ AStdDev := sqrt(AVariance);
+end;
+
+
+procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double);
+var
+ i: Integer;
+begin
+ Sum := 0;
+ SS := 0;
+ for i := Low(AData) to High(AData) do
+ begin
+ Sum := Sum + AData[i];
+ SS := SS + sqr(AData[i]);
+ end;
+end;
+
+
initialization
InitFactLn();