Files
lazarus-ccr/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas
2020-10-03 21:49:09 +00:00

742 lines
20 KiB
ObjectPascal

unit DescriptiveUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs,
StdCtrls, ExtCtrls, Buttons, Spin, ComCtrls, Grids,
MainUnit, Globals, FunctionsLib, ReportFrameUnit, BasicStatsReportFormUnit,
DataProcs, DictionaryUnit;
type
{ TDescriptiveStats }
TQuartileMethod = 1..8;
TQuartile = 1..3;
TQuartiles = array[TQuartileMethod, TQuartile] of Double;
TDescriptiveStats = class
private
FDataGrid: TStringGrid;
FColIndex: Integer;
FColsSelected: IntDyneVec;
FValues: DblDyneVec;
FMean, FStdErrorMean: Double;
FMin, FMax: Double;
FSum: Double;
FVariance, FStdDev: Double;
FSkew, FStdErrorSkew: Double;
FKurtosis, FStdErrorKurtosis: Double;
FFirstQuartile, FMedian, FThirdQuartile: Double;
FNumCases: Integer;
procedure Calc_Quartiles;
procedure Calc_Skew_Kurtosis;
// function GetMeanLimits(AIndex: Integer; AConfLevel: Double): Double;
function GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
procedure Reset;
public
constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec = nil);
procedure Analyze(AColIndex: Integer);
procedure CalcQuartiles(out AQuartiles: TQuartiles);
procedure PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec);
property FirstQuartile: Double read FFirstQuartile;
property Kurtosis: Double read FKurtosis;
property Max: Double read FMax;
property Median: Double read FMedian;
property Mean: Double read FMean;
property MeanLowerLimit[AConfLevel: Double]: Double index 0 read GetMeanLimits;
property MeanUpperLimit[AConfLevel: Double]: Double index 1 read GetMeanLimits;
property Min: Double read FMin;
property NumCases: Integer read FNumCases;
property Skew: Double read FSkew;
property StdErrorSkew: Double read FStdErrorSkew;
property StdDev: Double read FStdDev;
property StdErrorKurtosis: Double read FStdErrorKurtosis;
property StdErrorMean: Double read FStdErrorMean;
property StdErrorSkey: Double read FStdErrorSkew;
property ThirdQuartile: Double read FThirdQuartile;
property Variance: Double read FVariance;
end;
{ TDescriptiveFrm }
TDescriptiveFrm = class(TBasicStatsReportForm)
CaseChk: TCheckBox;
DecPlacesEdit: TSpinEdit;
Label4: TLabel;
PageControl: TPageControl;
ReportPage: TTabSheet;
ZScoresToGridChk: TCheckBox;
AllQuartilesChk: TCheckBox;
Label2: TLabel;
Label3: TLabel;
PercentileChk: TCheckBox;
OptionsGroup: TGroupBox;
InBtn: TBitBtn;
OutBtn: TBitBtn;
AllBtn: TBitBtn;
CIEdit: TEdit;
Label1: TLabel;
VarList: TListBox;
SelList: TListBox;
procedure AllBtnClick(Sender: TObject);
procedure InBtnClick(Sender: TObject);
procedure OutBtnClick(Sender: TObject);
procedure SelListDblClick(Sender: TObject);
procedure VarListDblClick(Sender: TObject);
procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean);
private
function GetReportFrame(APageIndex: Integer): TReportFrame;
procedure WriteToReport(Stats: TDescriptiveStats; AVarName: String; AReport: TStrings);
procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec;
AMean, AStdDev: Double);
protected
procedure AdjustConstraints; override;
procedure Compute; override;
procedure UpdateBtnStates; override;
public
constructor Create(AOwner: TComponent); override;
procedure Reset; override;
end;
var
DescriptiveFrm: TDescriptiveFrm;
implementation
{$R *.lfm}
uses
Math,
Utils, MathUnit, GridProcs;
{===============================================================================
* TDescriptiveStats
*-------------------------------------------------------------------------------
* TDescriptiveStats is a helper class which
* - does all the required calculations (Analyze) and
* - prepares the report for each variable (WriteToReport).
*==============================================================================}
constructor TDescriptiveStats.Create(ADataGrid: TStringGrid;
AColsSelected: IntDyneVec = nil);
begin
inherited Create;
FDataGrid := ADataGrid;
FColsSelected := AColsSelected;
Reset;
end;
procedure TDescriptiveStats.Analyze(AColIndex: Integer);
begin
Reset;
FColIndex := AColIndex;
if Length(FColsSelected) = 0 then
FValues := CollectValues(FDataGrid, AColIndex, FColsSelected)
else
FValues := CollectValues(FDataGrid, AColIndex);
FNumCases := Length(FValues);
SortOnX(FValues);
MathUnit.Calc_MaxMin(FValues, FMax, FMin);
MathUnit.Calc_MeanVarStdDev(FValues, FMean, FVariance, FStdDev);
if FNumCases > 1 then
FStdErrorMean := sqrt(FVariance / FNumCases);
Calc_Skew_Kurtosis;
Calc_Quartiles;
end;
procedure TDescriptiveStats.Calc_Quartiles;
begin
FFirstQuartile := Quartiles(2, 0.25, FNumCases, FValues);
FMedian := Quartiles(2, 0.5, FNumCases, FValues);
FThirdQuartile := Quartiles(2, 0.75, FNumCases, FValues);
end;
procedure TDescriptiveStats.CalcQuartiles(out AQuartiles: TQuartiles);
var
q: TQuartile;
m: TQuartileMethod;
begin
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
for q := Low(TQuartile) to High(TQuartile) do
AQuartiles[m, q] := Quartiles(m, 0.25*q, FNumCases, FValues);
end;
procedure TDescriptiveStats.Calc_Skew_Kurtosis;
procedure Moments(out M2, M3, M4: Double);
var
i: Integer;
dev, devSqr: Double;
begin
M2 := 0;
M3 := 0;
M4 := 0;
for i := 0 to High(FValues) do
begin
dev := FValues[i] - FMean;
devSqr := Sqr(dev);
M2 := M2 + devSqr;
M3 := M3 + dev * devSqr;
M4 := M4 + sqr(devSqr);
end;
end;
var
num, denom: Double;
stdDev3, stdDev4: Double;
M2, M3, M4: Double;
begin
FSkew := NaN;
FStdErrorSkew := NaN;
FKurtosis := NaN;
FStdErrorKurtosis := NaN;
if FNumCases < 2 then
exit;
stdDev3 := FStdDev * FStdDev * FStdDev;
stdDev4 := StdDev3 * FStdDev;
Moments(M2, M3, M4);
if FNumCases > 2 then
begin
FSkew := FNumCases * M3 / ((FNumCases - 1) * (FNumCases - 3) * stdDev3);
num := 6.0 * FNumCases * (FNumCases - 1);
denom := (FNumCases - 2) * (FNumCases + 1) * (FNumCases + 3);
FStdErrorSkew := sqrt(num / denom);
end;
if FNumCases > 3 then
begin
num := FNumCases * (FNumCases + 1) * M4 - 3 * M2 * M2 * (FNumCases - 1);
denom := (FNumCases - 1) * (FNumCases - 2) * (FNumCases - 3) * stdDev4;
FKurtosis := num / denom;
num := 4.0 * (sqr(FNumCases) - 1) * sqr(FStdErrorSkew);
denom := (FNumCases - 3) * (FNumCases + 5);
FStdErrorKurtosis := sqrt(num / denom);
end;
end;
function TDescriptiveStats.GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double;
var
alpha: Double;
confLev: Double;
DOF: Integer;
delta: Double;
begin
alpha := (1 - AConfLevel) / 2;
confLev := 1 - alpha;
if FNumCases < 120 then
begin
DOF := FNumCases - 1;
delta := FStdErrorMean * InverseT(confLev, DOF);
end else
delta := FStdErrorMean * InverseZ(confLev);
case AIndex of
0: Result := FMean - delta;
1: Result := FMean + delta;
end;
end;
// Computes the percentile ranks of values stored in the data grid at the
// loaded columns. The values are assumed to be sorted.
procedure TDescriptiveStats.PercentileRank(out ACategoryValues, APercentiles: DblDyneVec;
out AFreq: IntDyneVec);
var
i, iCat, nCategories: Integer;
lastCategoryValue: Double;
cumFreqCentered: Double;
cumFreq: Integer;
begin
// silence the compiler
ACategoryvalues := nil;
AFreq := nil;
APercentiles := nil;
SetLength(ACategoryValues, FNumCases); // over-dimension; will be trimmed later
SetLength(AFreq, FNumCases);
// Get count of unique values and frequencies of each
lastCategoryValue := FValues[0];
ACategoryValues[0] := lastCategoryValue;
AFreq[0] := 1;
iCat := 0;
for i := 1 to FNumCases-1 do
begin
if (lastCategoryValue = FValues[i]) then
AFreq[iCat] := AFreq[iCat] + 1
else
begin // new value
inc(iCat);
AFreq[iCat] := 1;
lastCategoryValue := FValues[i];
ACategoryValues[iCat] := lastCategoryValue;
end;
end;
// trim arrays
nCategories := iCat + 1;
SetLength(ACategoryValues, nCategories);
SetLength(AFreq, nCategories);
// Get cumulative frequencies and percentile ranks
SetLength(APercentiles, nCategories);
APercentiles[0] := AFreq[0] * 0.5 / FNumCases;
cumFreq := AFreq[0];
for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1
begin
cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point
APercentiles[i] := cumFreqCentered / FNumCases;
cumFreq := cumFreq + AFreq[i];
end;
end;
procedure TDescriptiveStats.Reset;
begin
FValues := nil;
FMean := NaN;
FStdErrorMean := NaN;
FMin := NaN;
FMax := NaN;
FVariance := NaN;
FStdDev := NaN;
FStdErrorMean := NaN;
FSkew := NaN;
FStdErrorSkew := NaN;
FKurtosis := NaN;
FStdErrorKurtosis := NaN;
FNumCases := 0;
end;
{ TDescriptiveFrm }
constructor TDescriptiveFrm.Create(AOwner: TComponent);
begin
inherited;
FReportFrame.Parent := ReportPage;
FReportFrame.BorderSpacing.Left := 0;
FReportFrame.BorderSpacing.Top := 0;
FReportFrame.BorderSpacing.Bottom := 0;
FReportFrame.BorderSpacing.Right := 0;
end;
procedure TDescriptiveFrm.AdjustConstraints;
begin
ParamsPanel.Constraints.MinHeight := AllBtn.Top + AllBtn.Height + OptionsGroup.Height +
CIEdit.Height + ButtonBevel.Height + CloseBtn.Height + VarList.BorderSpacing.Bottom +
OptionsGroup.BorderSpacing.Bottom + CloseBtn.BorderSpacing.Top +
DecPlacesEdit.Height + DecPlacesEdit.BorderSpacing.Top;
ParamsPanel.Constraints.MinWidth := Math.Max(
4*CloseBtn.Width + 3*HelpBtn.BorderSpacing.Right,
OptionsGroup.Width
);
end;
procedure TDescriptiveFrm.AllBtnClick(Sender: TObject);
var
i : integer;
begin
for i := 0 to VarList.Items.Count-1 do
SelList.Items.Add(VarList.Items.Strings[i]);
VarList.Clear;
UpdateBtnStates;
end;
procedure TDescriptiveFrm.Compute;
var
cellString: String;
i, j: Integer;
noSelected: Integer;
selected: IntDyneVec = nil;
page: TTabSheet;
reportFrame: TReportFrame;
lReport: TStrings;
lStats: TDescriptiveStats;
begin
noSelected := SelList.Items.Count;
if noSelected = 0 then
begin
MessageDlg('No variables selected.', mtError, [mbOK], 0);
exit;
end;
SetLength(selected, noSelected);
// Find column index of selected variables
for i := 0 to noSelected - 1 do
begin
cellstring := SelList.Items[i];
for j := 1 to NoVariables do
if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then selected[i] := j;
end;
// Create a tabsheet with ReportFrame for each selected variable (in addition to the built-in one)
if noSelected > PageControl.PageCount then
begin
for i := 1 to noSelected-1 do // we do not create a tab for the first variable - it exists by default
begin
page := TTabSheet.Create(PageControl);
page.Parent := PageControl;
reportFrame := TReportFrame.Create(page);
reportFrame.Parent := page;
reportFrame.Align := alClient;
InitToolBar(reportFrame.ReportToolbar, tpRight);
end;
end;
// Remove excess pages from previous session
while PageControl.PageCount > noSelected do
PageControl.Pages[PageControl.PageCount-1].Free;
// Every tab gets the name of the corresponding variable.
for i := 0 to NoSelected-1 do
PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0];
//
lReport := TStringList.Create;
if not CaseChk.Checked then
lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid)
else
lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected);
try
for i := 0 to noSelected-1 do
begin
// Analyze the data and get descriptive stats
lStats.Analyze(selected[i]);
// Store z values, (value - mean) / stdDev, to grid, if needed
zScoresToGrid(selected[i], selected, lStats.Mean, lStats.StdDev);
// Write descriptive stats to report
WriteToReport(lStats, trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), lReport);
// Display report in the page of the variable
reportFrame := GetReportFrame(i);
reportFrame.DisplayReport(lReport);
end;
finally
// Clean up
lStats.Free;
lReport.Free;
end;
end;
function TDescriptiveFrm.GetReportFrame(APageIndex: Integer): TReportFrame;
var
page: TTabSheet;
begin
Result := nil;
if (APageIndex >=0) and (APageIndex < PageControl.PageCount) then
begin
page := PageControl.Pages[APageIndex];
if (page.ControlCount > 0) and (page.Controls[0] is TReportFrame) then
Result := TReportFrame(page.Controls[0]);
end;
end;
procedure TDescriptiveFrm.InBtnClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while i < VarList.Items.Count do
begin
if VarList.Selected[i] then
begin
SelList.Items.Add(VarList.Items[i]);
VarList.Items.Delete(i);
i := 0;
end else
inc(i);
end;
UpdateBtnStates;
end;
procedure TDescriptiveFrm.OutBtnClick(Sender: TObject);
var
i: integer;
begin
i := 0;
while i < SelList.Items.Count do
begin
if SelList.Selected[i] then
begin
VarList.Items.Add(SelList.Items[i]);
SelList.Items.Delete(i);
i := 0;
end else
inc(i);
end;
UpdateBtnStates;
end;
procedure TDescriptiveFrm.Reset;
var
i: integer;
begin
inherited;
for i := PageControl.PageCount-1 downto 1 do
PageControl.Pages[i].Free;
PageControl.Pages[0].Caption := 'Report';
CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT);
VarList.Clear;
SelList.Clear;
for i := 1 to NoVariables do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]);
UpdateBtnStates;
end;
procedure TDescriptiveFrm.SelListDblClick(Sender: TObject);
var
index: integer;
begin
index := SelList.ItemIndex;
if index > -1 then
begin
VarList.Items.Add(SelList.Items[index]);
SelList.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure TDescriptiveFrm.VarListDblClick(Sender: TObject);
var
index: integer;
begin
index := VarList.ItemIndex;
if index > -1 then
begin
SelList.Items.Add(VarList.Items[index]);
VarList.Items.Delete(index);
UpdateBtnStates;
end;
end;
procedure TDescriptiveFrm.UpdateBtnStates;
var
lSelected: Boolean;
i: Integer;
F: TReportFrame;
begin
inherited;
(*
for i := PageControl.PageCount-1 downto 0 do
begin
if i > 0 then
PageControl.Pages[i].Free
else begin
F := GetReportFrame(i);
if Assigned(F) then F.Clear;
end;
end; *)
lSelected := false;
for i := 0 to VarList.Items.Count-1 do
if VarList.Selected[i] then
begin
lSelected := true;
break;
end;
InBtn.Enabled := lSelected;
lSelected := false;
for i := 0 to SelList.Items.Count-1 do
if SelList.Selected[i] then
begin
lSelected := true;
break;
end;
OutBtn.Enabled := lSelected;
AllBtn.Enabled := VarList.Count > 0;
end;
procedure TDescriptiveFrm.VarListSelectionChange(Sender: TObject; User: boolean);
begin
UpdateBtnStates;
end;
procedure TDescriptiveFrm.WriteToReport(Stats: TDescriptiveStats;
AVarName: String; AReport: TStrings);
var
w: Integer;
i: Integer;
confLevel: Double;
decPlaces: Integer;
m: TQuartileMethod;
Q: TQuartiles;
cumFreq: Integer;
nCategories: Integer;
categories: DblDyneVec = nil;
freq: IntDyneVec = nil;
percentiles: DblDyneVec = nil;
begin
confLevel := StrToFloat(CIEdit.Text) / 100;
decPlaces := DecPlacesEdit.Value;
w := 10 + decPlaces - 3;
AReport.Clear;
AReport.Add('DISTRIBUTION PARAMETER ESTIMATES');
AReport.Add('');
AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']);
AReport.Add('');
AReport.Add('Number of cases: %*d', [W, Stats.NumCases]);
// AReport.Add('Sum: %*.*f', [W, decPlaces, Stats.Sum]);
AReport.Add('Mean: %*.*f', [W, decPlaces, Stats.Mean]);
AReport.Add('Variance: %*.*f', [W, decPlaces, Stats.Variance]);
AReport.Add('Std.Dev.: %*.*f', [W, decPlaces, Stats.StdDev]);
AReport.Add('Std.Error of Mean %*.*f', [W, decPlaces, Stats.StdErrorMean]);
AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [
confLevel*100.0, decPlaces, Stats.MeanLowerLimit[confLevel], decPlaces, Stats.MeanUpperLimit[confLevel]
]);
AReport.Add('');
AReport.Add('Minimum: %*.*f', [W, decPlaces, Stats.Min]);
AReport.Add('Maximum: %*.*f', [W, decPlaces, Stats.Max]);
AReport.Add('Range: %*.*f', [W, decPlaces, Stats.Max - Stats.Min]);
AReport.Add('');
AReport.Add('Skewness: %*.*f', [W, decPlaces, Stats.Skew]);
AReport.Add('Std.Error of Skew: %*.*f', [W, decPlaces, Stats.StdErrorSkew]);
AReport.Add('Kurtosis: %*.*f', [W, decPlaces, Stats.Kurtosis]);
AReport.Add('Std. Error of Kurtosis: %*.*f', [W, decPlaces, Stats.StdErrorKurtosis]);
AReport.Add('');
AReport.Add('First Quartile: %*.*f', [W, decPlaces, Stats.FirstQuartile]);
AReport.Add('Median: %*.*f', [W, decPlaces, Stats.Median]);
AReport.Add('Third Quartile: %*.*f', [W, decPlaces, Stats.ThirdQuartile]);
AReport.Add('Interquartile range: %*.*f', [W, decPlaces, Stats.ThirdQuartile - Stats.FirstQuartile]);
if AllQuartilesChk.Checked then
begin
Stats.CalcQuartiles(Q);
AReport.Add('');
AReport.Add('');
AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES');
AReport.Add('');
AReport.Add('Method First Quartile Median Third Quartile');
AReport.Add('------ -------------- ---------- --------------');
for m := Low(TQuartileMethod) to High(TQuartileMethod) do
AReport.Add(' %d %12.3f %12.3f %12.3f', [m, Q[m, 1], Q[m, 2], Q[m, 3]]);
AReport.Add('');
AReport.Add('NOTES:');
AReport.Add('Method 1 is the weighted average at X[np] where ');
AReport.Add(' n is no. of cases, p is percentile / 100');
AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
AReport.Add('Method 3 is the empirical distribution function.');
AReport.Add('Method 4 is called the empirical distribution function - averaging.');
AReport.Add('Method 5 is called the empirical distribution function = Interpolation.');
AReport.Add('Method 6 is the closest observation method.');
AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
AReport.Add('Method 8 was used in an older Microsoft Excel version.');
AReport.Add('See the internet site http://www.xycoon.com/ for the above.');
end;
if PercentileChk.Checked then
begin
Stats.PercentileRank(categories, percentiles, freq);
nCategories := Length(categories);
cumFreq := 0;
AReport.Add('');
AReport.Add('');
AReport.Add('PERCENTILE RANKS');
AReport.Add('');
AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank');
AReport.Add('----------- --------- --------- ---------------');
for i := 0 to nCategories-1 do
begin
cumFreq := cumFreq + freq[i];
AReport.Add(' %8.3f %8d %8d %12.2f%%', [
categories[i], freq[i], cumFreq, percentiles[i]*100.0
]);
end;
end;
end;
procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer;
const AColsSelected: IntDyneVec; AMean, AStdDev: Double);
var
i, idx: Integer;
value, zValue: Double;
varName: String;
begin
if AStdDev = 0 then begin
ErrorMsg('Cannot store z values to grid because StdDev is zero.');
exit;
end;
varName := OS3MainFrm.DataGrid.Cells[AColIndex, 0] + '_z';
idx := OS3MainFrm.DataGrid.Rows[0].IndexOf(varName);
if idx = -1 then
begin
DictionaryFrm.NewVar(NoVariables + 1);
DictionaryFrm.DictGrid.Cells[1, NoVariables] := varName;
OS3MainFrm.DataGrid.Cells[NoVariables, 0] := varName;
idx := NoVariables;
end;
for i := 1 to NoCases do
begin
if CaseChk.Checked then
begin
if not DataProcs.ValidValue(i, AColsSelected[AColIndex]) then continue;
end
else
if not DataProcs.GoodRecord(i, Length(AColsSelected), AColsSelected) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]);
zValue := (value - AMean) / AStdDev;
OS3MainFrm.DataGrid.Cells[idx, i] := Format('%8.5f', [zValue]);
// to do: read number of decimal places from Dictionary and use in Format().
end;
end;
end.