diff --git a/applications/lazstats/source/LazStats.lpi b/applications/lazstats/source/LazStats.lpi index 5c53d9e47..f262e789d 100644 --- a/applications/lazstats/source/LazStats.lpi +++ b/applications/lazstats/source/LazStats.lpi @@ -1558,11 +1558,6 @@ - - - - - diff --git a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.lfm b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.lfm index 14b64e8a4..197ce97c6 100644 --- a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.lfm +++ b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.lfm @@ -1,37 +1,45 @@ inherited DescriptiveFrm: TDescriptiveFrm Left = 526 - Height = 384 + Height = 432 Top = 202 Width = 900 HelpType = htKeyword HelpKeyword = 'html/DistributionStatistics.htm' Caption = 'Descriptive Statistics' - ClientHeight = 384 + ClientHeight = 432 ClientWidth = 900 inherited ParamsPanel: TPanel Left = 16 - Height = 352 + Height = 400 Top = 16 Width = 322 BorderSpacing.Around = 8 - ClientHeight = 352 + ClientHeight = 400 ClientWidth = 322 inherited CloseBtn: TButton + Left = 267 + Top = 375 TabOrder = 9 end inherited ComputeBtn: TButton AnchorSideBottom.Control = ParamsPanel AnchorSideBottom.Side = asrBottom + Left = 184 + Top = 375 TabOrder = 10 end inherited ResetBtn: TButton + Left = 122 + Top = 375 TabOrder = 8 end inherited HelpBtn: TButton + Left = 63 + Top = 375 TabOrder = 7 end inherited ButtonBevel: TBevel - Top = 311 + Top = 359 Width = 322 end object Label2: TLabel[5] @@ -51,7 +59,7 @@ inherited DescriptiveFrm: TDescriptiveFrm AnchorSideRight.Control = AllBtn AnchorSideBottom.Control = OptionsGroup Left = 0 - Height = 183 + Height = 202 Top = 17 Width = 130 Anchors = [akTop, akLeft, akRight, akBottom] @@ -84,7 +92,7 @@ inherited DescriptiveFrm: TDescriptiveFrm AnchorSideBottom.Control = VarList AnchorSideBottom.Side = asrBottom Left = 192 - Height = 183 + Height = 202 Top = 17 Width = 130 Anchors = [akTop, akLeft, akRight, akBottom] @@ -149,7 +157,7 @@ inherited DescriptiveFrm: TDescriptiveFrm AnchorSideRight.Control = CIEdit Left = 0 Height = 15 - Top = 292 + Top = 311 Width = 174 BorderSpacing.Right = 8 Caption = 'Confidence Interval for the Mean' @@ -161,7 +169,7 @@ inherited DescriptiveFrm: TDescriptiveFrm AnchorSideBottom.Control = CIEdit Left = 0 Height = 72 - Top = 208 + Top = 227 Width = 306 Anchors = [akLeft, akBottom] AutoSize = True @@ -197,7 +205,7 @@ inherited DescriptiveFrm: TDescriptiveFrm Caption = 'z Scores to Grid' TabOrder = 1 end - object PcntileChk: TCheckBox + object PercentileChk: TCheckBox AnchorSideTop.Side = asrBottom Left = 12 Height = 19 @@ -208,7 +216,7 @@ inherited DescriptiveFrm: TDescriptiveFrm Caption = 'Show Percentile Ranks' TabOrder = 2 end - object AllQrtilesChk: TCheckBox + object AllQuartilesChk: TCheckBox AnchorSideTop.Side = asrBottom Left = 174 Height = 19 @@ -224,29 +232,66 @@ inherited DescriptiveFrm: TDescriptiveFrm AnchorSideLeft.Control = Label1 AnchorSideLeft.Side = asrBottom AnchorSideRight.Side = asrBottom + AnchorSideBottom.Control = DecPlacesEdit Left = 182 Height = 23 - Top = 288 - Width = 41 + Top = 307 + Width = 50 Alignment = taRightJustify Anchors = [akLeft, akBottom] BorderSpacing.Left = 8 TabOrder = 6 Text = '95.0' end + object DecPlacesEdit: TSpinEdit[15] + AnchorSideLeft.Control = CIEdit + AnchorSideTop.Side = asrBottom + AnchorSideRight.Control = CIEdit + AnchorSideRight.Side = asrBottom + AnchorSideBottom.Control = ButtonBevel + Left = 182 + Height = 23 + Top = 334 + Width = 50 + Alignment = taRightJustify + Anchors = [akLeft, akRight, akBottom] + BorderSpacing.Top = 4 + BorderSpacing.Bottom = 2 + MaxValue = 10 + TabOrder = 11 + Value = 3 + end + object Label4: TLabel[16] + AnchorSideLeft.Control = Label1 + AnchorSideTop.Control = DecPlacesEdit + AnchorSideTop.Side = asrCenter + Left = 0 + Height = 15 + Top = 338 + Width = 79 + Caption = 'Decimal places' + ParentColor = False + end end inherited ParamsSplitter: TSplitter Left = 350 - Height = 384 + Height = 432 end - object ReportPanel: TPanel[2] - Left = 355 - Height = 384 - Top = 0 - Width = 545 + object PageControl: TPageControl[2] + Left = 359 + Height = 416 + Top = 8 + Width = 533 + ActivePage = ReportPage Align = alClient - Anchors = [akTop, akLeft, akRight] - BevelOuter = bvNone + BorderSpacing.Left = 4 + BorderSpacing.Top = 8 + BorderSpacing.Right = 8 + BorderSpacing.Bottom = 8 + TabIndex = 0 TabOrder = 2 + object ReportPage: TTabSheet + Caption = 'Report' + end end end diff --git a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas index c43f19697..a4cc868ea 100644 --- a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas +++ b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas @@ -6,23 +6,73 @@ interface uses Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, - StdCtrls, ExtCtrls, Buttons, - MainUnit, Globals, FunctionsLib, BasicStatsReportFormUnit, + StdCtrls, ExtCtrls, Buttons, Spin, ComCtrls, Grids, + MainUnit, Globals, FunctionsLib, ReportFrameUnit, BasicStatsReportFormUnit, DataProcs, DictionaryUnit; type - { TDescriptiveFrm } +{ TDescriptiveStats } + + TDescriptiveOption = (doAlternativeQuartiles, doPercentileRanks, doCasewiseDeletion); + TDescriptiveOptions = set of TDescriptiveOption; + TQuartileMethod = 1..8; + TQuartile = 1..3; + + TDescriptiveStats = class + private + FDataGrid: TStringGrid; + FColIndex: Integer; + FConfLevel: Double; // usually 0.95 + FColsSelected: IntDyneVec; + FMean, FStdErrorMean, FDeltaMean: Double; + FMin, FMax: Double; + FSum: Double; + FVariance, FStdDev: Double; + FSkew, FStdErrorSkew: Double; + FKurtosis, FStdErrorKurtosis: Double; + FFirstQuartile, FMedian, FThirdQuartile: Double; + FCategoryValues, FPercentiles: DblDyneVec; + FFreqValues: IntDyneVec; + FOptions: TDescriptiveOptions; + FNumCases: Integer; + FQuartiles: array[TQuartileMethod, TQuartile] of Double; + procedure Calc_AlternativeQuartiles(const AValues: DblDyneVec); + function Calc_DeltaMean(AStdErrorOfMean: Double): Double; + procedure Calc_Moments(const AValues: DblDyneVec; AMean: Double; + out M2, M3, M4: Double); + procedure Calc_Quartiles(const AValues: DblDyneVec; out Q1, Median, Q3: Double); + procedure Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; ANumCases: Integer; + out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double); + procedure Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec; + out ASum, ASumOfSquares, AMin, AMax: Double); + procedure CollectValues(out AValues: DblDyneVec); + procedure PercentileRank(const AValues: DblDyneVec; + out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); + public + constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec; AConfLevel: Double); + procedure Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions); + procedure WriteToReport(AVarName: String; ADecPlaces: Integer; AReport: TStrings); + property Mean: Double read FMean; + property StdDev: Double read FStdDev; + // more can be added... + end; + + +{ TDescriptiveFrm } TDescriptiveFrm = class(TBasicStatsReportForm) CaseChk: TCheckBox; + DecPlacesEdit: TSpinEdit; + Label4: TLabel; + PageControl: TPageControl; + ReportPage: TTabSheet; ZScoresToGridChk: TCheckBox; - AllQrtilesChk: TCheckBox; + AllQuartilesChk: TCheckBox; Label2: TLabel; Label3: TLabel; - ReportPanel: TPanel; - PcntileChk: TCheckBox; + PercentileChk: TCheckBox; OptionsGroup: TGroupBox; InBtn: TBitBtn; OutBtn: TBitBtn; @@ -39,9 +89,9 @@ type procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean); private - { private declarations } - sum, variance, stddev, value, mean, min, max, range, skew, prob, df, CI : double; - kurtosis, z, semean, seskew, sekurtosis, deviation, devsqr, M2, M3, M4 : double; + function GetReportFrame(APageIndex: Integer): TReportFrame; + procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec; + AMean, AStdDev: Double); protected procedure AdjustConstraints; override; @@ -49,6 +99,7 @@ type procedure UpdateBtnStates; override; public + constructor Create(AOwner: TComponent); override; procedure Reset; override; end; @@ -61,15 +112,381 @@ implementation {$R *.lfm} uses - Math; + Math, + Utils; + + +{=============================================================================== +* TDescriptiveStats +*------------------------------------------------------------------------------- +* TDescriptiveStats is a helper class which +* - does all the required calculations (Analyze) and +* - prepares the report for each variable (WriteToReport). +*==============================================================================} + +constructor TDescriptiveStats.Create(ADataGrid: TStringGrid; + AColsSelected: IntDyneVec; AConfLevel: Double); +begin + inherited Create; + FDataGrid := ADataGrid; + FColsSelected := AColsSelected; + FConfLevel := AConfLevel; +end; + + +procedure TDescriptiveStats.Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions); +var + SS: Double; + values: DblDyneVec; + M2, M3, M4: Double; +begin + FMean := NaN; + FVariance := NaN; + FStdDev := NaN; + FStdErrorMean := NaN; + FDeltaMean := NaN; + FSkew := NaN; + FStdErrorSkew := NaN; + + FColIndex := AColIndex; + FOptions := AOptions; + + CollectValues(values); + FNumCases := Length(values); + + SortOnX(values); + + Calc_Sum_SumOfSquares_Min_Max(values, FSum, SS, FMin, FMax); + if FNumCases > 0 then begin + FMean := FSum / FNumCases; + if FNumCases > 1 then + begin + FVariance := (SS - sqr(FSum) / FNumCases) / (FNumCases - 1); + FStdDev := sqrt(FVariance); + FStdErrorMean := sqrt(FVariance / FNumCases); + FDeltaMean := Calc_DeltaMean(FStdErrorMean); + end; + + Calc_Moments(values, FMean, M2, M3, M4); + Calc_Skew_Kurtosis(FStdDev, M2, M3, M4, FNumCases, FSkew, FStdErrorSkew, FKurtosis, FStdErrorKurtosis); + + Calc_Quartiles(values, FFirstQuartile, FMedian, FThirdQuartile); + + if (doAlternativeQuartiles in FOptions) then + Calc_AlternativeQuartiles(values); + if (doPercentileRanks in FOptions) then + PercentileRank(values, FCategoryValues, FPercentiles, FFreqValues); + end; +end; + + +procedure TDescriptiveStats.Calc_AlternativeQuartiles(const AValues: DblDyneVec); +var + nCases: Integer; + q: TQuartile; + m: TQuartileMethod; +begin + nCases := Length(AValues); + for m := Low(TQuartileMethod) to High(TQuartileMethod) do + for q := Low(TQuartile) to High(TQuartile) do + FQuartiles[m, q] := Quartiles(m, 0.25*q, nCases, AValues); +end; + + +// Tolerance around the mean +function TDescriptiveStats.Calc_DeltaMean(AStdErrorOfMean: Double): Double; +var + alpha: Double; + confLev: Double; + DOF: Integer; +begin + alpha := (1 - FConfLevel) / 2; + confLev := 1 - alpha; + + if FNumCases < 120 then + begin + DOF := FNumCases - 1; + Result := AStdErrorOfMean * InverseT(confLev, DOF); + end else + Result := AStdErrorOfMean * InverseZ(confLev); +end; + + +procedure TDescriptiveStats.Calc_Moments(const AValues: DblDyneVec; + AMean: Double; out M2, M3, M4: Double); +var + i: Integer; + dev, devSqr: Double; +begin + M2 := 0; + M3 := 0; + M4 := 0; + for i := 0 to High(AValues) do + begin + dev := AValues[i] - AMean; + devSqr := Sqr(dev); + M2 := M2 + devSqr; + M3 := M3 + dev * devSqr; + M4 := M4 + sqr(devSqr); + end; +end; + + +procedure TDescriptiveStats.Calc_Quartiles(const AValues: DblDyneVec; + out Q1, Median, Q3: Double); +var + n: Integer; +begin + n := Length(AValues); + Q1 := Quartiles(2, 0.25, n, AValues); + Median := Quartiles(2, 0.5, n, AValues); + Q3 := Quartiles(2, 0.75, n, AValues); +end; + + +procedure TDescriptiveStats.Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; + ANumCases: Integer; out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double); +var + num, denom: Double; + stdDev3, stdDev4: Double; +begin + Skew := NaN; + StdErrorSkew := NaN; + Kurtosis := NaN; + StdErrorKurtosis := NaN; + + stdDev3 := StdDev * StdDev * StdDev; + stdDev4 := StdDev3 * StdDev; + + if ANumCases > 2 then + begin + Skew := ANumCases * M3 / ((ANumCases - 1) * (ANumCases - 3) * stdDev3); + + num := 6.0 * ANumCases * (ANumCases - 1); + denom := (ANumCases - 2) * (ANumCases + 1) * (ANumCases + 3); + StdErrorSkew := sqrt(num / denom); + end; + + if ANumCases > 3 then + begin + num := ANumCases * (ANumCases + 1) * M4 - 3 * M2 * M2 * (ANumCases - 1); + denom := (ANumCases - 1) * (ANumCases - 2) * (ANumCases - 3) * stdDev4; + Kurtosis := num / denom; + + num := 4.0 * (sqr(ANumCases) - 1) * sqr(StdErrorSkew); + denom := (ANumCases - 3) * (ANumCases + 5); + StdErrorKurtosis := sqrt(num / denom); + end; +end; + + +procedure TDescriptiveStats.Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec; + out ASum, ASumOfSquares, AMin, AMax: Double); +var + i: Integer; +begin + ASum := 0.0; + ASumOfSquares := 0; + AMin := Infinity; + AMax := -Infinity; + + for i := 0 to High(AValues) do + begin + ASum := ASum + AValues[i]; + ASumOfSquares := ASumOfSquares + sqr(AValues[i]); + if AValues[i] < AMin then AMin := AValues[i]; + if AValues[i] > AMax then AMax := AValues[i]; + end; +end; + + +procedure TDescriptiveStats.CollectValues(out AValues: DblDyneVec); +var + i, n: Integer; +begin + AValues := nil; // silence the compiler + SetLength(AValues, NoCases); + n := 0; + for i := 1 to NoCases do + begin + if (doCasewiseDeletion in FOptions) then + begin + // Do not consider a case when any variable is empty + if not ValidValue(i, FColIndex) then + continue; + end else + begin + // Do not consider a case when the current variable is empty + if not GoodRecord(i, Length(FColsSelected), FColsSelected) then + continue; + end; + + if TryStrToFloat(FDataGrid.Cells[FColIndex, i], AValues[n]) then + inc(n) + else + raise Exception.CreateFmt('Invalid number: variable "%s", case "%s"', + [FDataGrid.cells[FColIndex, 0], FDataGrid.Cells[0, i]]); + end; + SetLength(AValues, n); +end; + + +// Computes the percentile ranks of values stored in the data grid at the +// loaded columns. The values are assumed to be sorted. +procedure TDescriptiveStats.PercentileRank(const AValues: DblDyneVec; out + ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); +var + i, nCases, iCat, nCategories: Integer; + lastCategoryValue: Double; + cumFreqCentered: Double; + cumFreq: Integer; +begin + // silence the compiler + ACategoryvalues := nil; + AFreq := nil; + APercentiles := nil; + + nCases := Length(AValues); + SetLength(ACategoryValues, nCases); // over-dimension; will be trimmed later + SetLength(AFreq, nCases); + + // Get count of unique values and frequencies of each + lastCategoryValue := AValues[0]; + ACategoryValues[0] := lastCategoryValue; + AFreq[0] := 1; + iCat := 0; + for i := 1 to nCases-1 do + begin + if (lastCategoryValue = AValues[i]) then + AFreq[iCat] := AFreq[iCat] + 1 + else + begin // new value + inc(iCat); + AFreq[iCat] := 1; + lastCategoryValue := AValues[i]; + ACategoryValues[iCat] := lastCategoryValue; + end; + end; + + // trim arrays + nCategories := iCat + 1; + SetLength(ACategoryValues, nCategories); + SetLength(AFreq, nCategories); + + // Get cumulative frequencies and percentile ranks + SetLength(APercentiles, nCategories); + APercentiles[0] := AFreq[0] * 0.5 / nCases; + cumFreq := AFreq[0]; + for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1 + begin + cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point + APercentiles[i] := cumFreqCentered / nCases; + cumFreq := cumFreq + AFreq[i]; + end; +end; + + +procedure TDescriptiveStats.WriteToReport(AVarName: String; ADecPlaces: Integer; + AReport: TStrings); +var + w: Integer; + nCategories: Integer; + i: Integer; + cumFreq: Integer; + m: TQuartileMethod; +begin + w := 10 + ADecPlaces - 3; + AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']); + AReport.Add(''); + AReport.Add('Number of cases: %*d', [W, FNumCases]); + AReport.Add('Sum: %*.*f', [W, ADecPlaces, FSum]); + AReport.Add('Mean: %*.*f', [W, ADecPlaces, FMean]); + AReport.Add('Variance: %*.*f', [W, ADecPlaces, FVariance]); + AReport.Add('Std.Dev.: %*.*f', [W, ADecPlaces, FStdDev]); + AReport.Add('Std.Error of Mean %*.*f', [W, ADecPlaces, FStdErrorMean]); + AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [ + FConfLevel*100.0, ADecPlaces, FMean - FDeltaMean, ADecPlaces, FMean + FDeltaMean]); + AReport.Add(''); + AReport.Add('Minimum: %*.*f', [W, ADecPlaces, FMin]); + AReport.Add('Maximum: %*.*f', [W, ADecPlaces, FMax]); + AReport.Add('Range: %*.*f', [W, ADecPlaces, FMax - FMin]); + AReport.Add(''); + AReport.Add('Skewness: %*.*f', [W, ADecPlaces, FSkew]); + AReport.Add('Std.Error of Skew: %*.*f', [W, ADecPlaces, FStdErrorSkew]); + AReport.Add('Kurtosis: %*.*f', [W, ADecPlaces, FKurtosis]); + AReport.Add('Std. Error of Kurtosis: %*.*f', [W, ADecPlaces, FStdErrorKurtosis]); + AReport.Add(''); + AReport.Add('First Quartile: %*.*f', [W, ADecPlaces, FFirstQuartile]); + AReport.Add('Median: %*.*f', [W, ADecPlaces, FMedian]); + AReport.Add('Third Quartile: %*.*f', [W, ADecPlaces, FThirdQuartile]); + AReport.Add('Interquartile range: %*.*f', [W, ADecPlaces, FThirdQuartile - FFirstQuartile]); + + if (doAlternativeQuartiles in FOptions) then + begin + AReport.Add(''); + AReport.Add(''); + AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES'); + AReport.Add(''); + AReport.Add('Method First Quartile Median Third Quartile'); + AReport.Add('------ -------------- ---------- --------------'); + for m := Low(TQuartileMethod) to High(TQuartileMethod) do + AReport.Add(' %d %12.3f %12.3f %12.3f', [m, FQuartiles[m, 1], FQuartiles[m, 2], FQuartiles[m, 3]]); + AReport.Add(''); + AReport.Add('NOTES:'); + AReport.Add('Method 1 is the weighted average at X[np] where '); + AReport.Add(' n is no. of cases, p is percentile / 100'); + AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.'); + AReport.Add('Method 3 is the empirical distribution function.'); + AReport.Add('Method 4 is called the empirical distribution function - averaging.'); + AReport.Add('Method 5 is called the empirical distribution function = Interpolation.'); + AReport.Add('Method 6 is the closest observation method.'); + AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.'); + AReport.Add('Method 8 was used in an older Microsoft Excel version.'); + AReport.Add('See the internet site http://www.xycoon.com/ for the above.'); + end; + + if (doPercentileRanks in FOptions) then + begin + nCategories := Length(FCategoryValues); + cumFreq := 0; + AReport.Add(''); + AReport.Add(''); + AReport.Add('PERCENTILE RANKS'); + AReport.Add(''); + AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank'); + AReport.Add('----------- --------- --------- ---------------'); + for i := 0 to nCategories-1 do + begin + cumFreq := cumFreq + FFreqValues[i]; + AReport.Add(' %8.3f %8d %8d %12.2f%%', [ + FCategoryValues[i], FFreqValues[i], cumFreq, FPercentiles[i]*100.0 + ]); + end; + end; +end; + + { TDescriptiveFrm } +constructor TDescriptiveFrm.Create(AOwner: TComponent); +begin + inherited; + FReportFrame.Parent := ReportPage; + FReportFrame.BorderSpacing.Left := 0; + FReportFrame.BorderSpacing.Top := 0; + FReportFrame.BorderSpacing.Bottom := 0; + FReportFrame.BorderSpacing.Right := 0; +end; + + procedure TDescriptiveFrm.AdjustConstraints; begin ParamsPanel.Constraints.MinHeight := AllBtn.Top + AllBtn.Height + OptionsGroup.Height + CIEdit.Height + ButtonBevel.Height + CloseBtn.Height + VarList.BorderSpacing.Bottom + - OptionsGroup.BorderSpacing.Bottom + CloseBtn.BorderSpacing.Top; + OptionsGroup.BorderSpacing.Bottom + CloseBtn.BorderSpacing.Top + + DecPlacesEdit.Height + DecPlacesEdit.BorderSpacing.Top; + ParamsPanel.Constraints.MinWidth := Math.Max( 4*CloseBtn.Width + 3*HelpBtn.BorderSpacing.Right, OptionsGroup.Width @@ -90,275 +507,100 @@ end; procedure TDescriptiveFrm.Compute; var - i, j, k, m: integer; - nCases, noSelected: integer; - Q1, Q2, Q3, Q12, Q13, Q14, Q15, Q16, Q17, Q18, Q22, Q23, Q24, Q25, Q26: double; - Q27, Q28, Q32, Q33, Q34, Q35, Q36, Q37, Q38, IQrange: double; - num, den, cases: double; - values: DblDyneVec = nil; - pcntRank: DblDyneVec = nil; - selected: IntDyneVec = nil; cellString: String; + i, j: Integer; + noSelected: Integer; + selected: IntDyneVec = nil; + page: TTabSheet; + reportFrame: TReportFrame; lReport: TStrings; + lDescrStats: TDescriptiveStats; + options: TDescriptiveOptions; begin - NoSelected := SelList.Items.Count; + noSelected := SelList.Items.Count; if noSelected = 0 then begin MessageDlg('No variables selected.', mtError, [mbOK], 0); exit; end; - SetLength(selected, noSelected); - // Get selected variables - for i := 1 to noselected do + // Find column index of selected variables + for i := 0 to noSelected - 1 do begin - cellstring := SelList.Items.Strings[i-1]; + cellstring := SelList.Items[i]; for j := 1 to NoVariables do - if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then selected[i-1] := j; + if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then selected[i] := j; end; - lReport := TStringList.Create; - try - lReport.Add('DISTRIBUTION PARAMETER ESTIMATES'); - lReport.Add(''); - - SetLength(Values, NoCases); - SetLength(pcntRank, NoCases); - - for j := 1 to noSelected do + // Create a tabsheet with ReportFrame for each selected variable (in addition to the built-in one) + if noSelected > PageControl.PageCount then + begin + for i := 1 to noSelected-1 do // we do not create a tab for the first variable - it exists by default begin - deviation := 0.0; - devsqr := 0.0; - M2 := 0.0; - M3 := 0.0; - M4 := 0.0; - sum := 0.0; - variance := 0.0; - stddev := 0.0; - range := 0.0; - skew := 0.0; - kurtosis := 0.0; - ncases := 0; - df := 0.0; - seskew := 0.0; - kurtosis := 0.0; - sekurtosis := 0.0; - k := selected[j-1]; - CI := StrToFloat(CIEdit.Text) / 100.0; - prob := CI; - CI := (1.0 - CI) / 2.0; - CI := 1.0 - CI; + page := TTabSheet.Create(PageControl); + page.Parent := PageControl; + reportFrame := TReportFrame.Create(page); + reportFrame.Parent := page; + reportFrame.Align := alClient; + InitToolBar(reportFrame.ReportToolbar, tpRight); + end; + end; - if ZScoresToGridChk.Checked then // add a new column to the grid - begin - cellstring := OS3MainFrm.DataGrid.Cells[k,0] + 'z'; - DictionaryFrm.NewVar(NoVariables + 1); - DictionaryFrm.DictGrid.Cells[1, NoVariables] := cellstring; - OS3MainFrm.DataGrid.Cells[NoVariables, 0] := cellstring; - end; + // Remove excess pages from previous session + while PageControl.PageCount > noSelected do + PageControl.Pages[PageControl.PageCount-1].Free; - // Accumulate sums of squares, sums, etc. for variable j - min := 1.0e308; - max := -1.0e308; - for i := 1 to NoCases do - begin - if not GoodRecord(i, noSelected, selected) then - continue; + // Every tab gets the name of the corresponding variable. + for i := 0 to NoSelected-1 do + PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0]; - if CaseChk.Checked then - begin - if not ValidValue(i, selected[j-1]) then - continue; - end - else if not GoodRecord(i, noselected, selected) then - continue; + // Prepare options + options := []; + if PercentileChk.Checked then Include(options, doPercentileRanks); + if AllQuartilesChk.Checked then Include(options, doAlternativeQuartiles); + if CaseChk.Checked then Include(options, doCasewiseDeletion); - value := StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]); - ncases := ncases + 1; - values[ncases-1] := value; - df := df + 1.0; - sum := sum + value; - variance := variance + (value * value); - if (value < min) then min := value; - if (value > max) then max := value; - end; + lReport := TStringList.Create; + lDescrStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected, StrToFloat(CIEdit.Text)/100); + try + for i := 0 to noSelected-1 do + begin + // Analyze the data and get descriptive stats + lDescrStats.Analyze(selected[i], options); - if ncases > 0 then - begin - mean := sum / ncases; - range := max - min; - end; + // Store z values, (value - mean) / stdDev, to grid, if needed + zScoresToGrid(selected[i], selected, lDescrStats.Mean, lDescrStats.StdDev); - if ncases > 1 then - begin - variance := variance - (sum * sum) / ncases; - variance := variance / (ncases - 1); - stddev := sqrt(variance); - semean := sqrt(variance / ncases); - if ncases < 120 then - CI := semean * inverset(CI,df) - else - CI := semean * inversez(CI); - end; - - if variance = 0.0 then - begin - cellstring := OS3MainFrm.DataGrid.Cells[k,0]; - MessageDlg('No Variability in '+ cellstring + ' variable - ending analysis.', mtInformation, [mbOK], 0); - exit; - end; - - if ncases > 3 then // obtain skew, kurtosis and z scores - begin - for i := 1 to NoCases do - begin - if CaseChk.Checked then - begin - if not ValidValue(i, selected[j-1]) then continue; - end else - if not GoodRecord(i, noselected, selected) then continue; - - value := StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]); - if stddev > 0.0 then - begin - deviation := value - mean; - devsqr := deviation * deviation; - M2 := M2 + devsqr; - M3 := M3 + (deviation * devsqr); - M4 := M4 + (devsqr * devsqr); - z := (value - mean) / stddev; - if ZScoresToGridChk.Checked then - begin - cellstring := format('%8.5f',[z]); - OS3MainFrm.DataGrid.Cells[NoVariables,i] := cellstring; - end; - end; - end; - - if ncases > 2 then - begin - skew := (ncases * M3) / ((ncases - 1) * (ncases - 2) * stddev * variance); - cases := ncases; - num := 6.0 * cases * (cases - 1.0); - den := (cases - 2.0) * (cases + 1.0) * (cases + 3.0); - seskew := sqrt(num / den); - end; - - if ncases > 3 then - begin - kurtosis := (ncases * (ncases + 1) * M4) - (3 * M2 * M2 * (ncases - 1)); - kurtosis := kurtosis / ( (ncases - 1) * (ncases - 2) * (ncases - 3) * (variance * variance) ); - sekurtosis := sqrt((4.0 * (ncases * ncases - 1) * (seskew * seskew)) / ((ncases - 3) * (ncases + 5))); - end; - end; - - // output results for the kth variable - cellstring := OS3MainFrm.DataGrid.Cells[k,0]; - if j > 1 then lReport.Add(''); - lReport.Add('VARIABLE: %10s', ['"' + cellString + '"']); - lReport.Add(''); - lReport.Add('Number of cases: %10d', [nCases]); - lReport.Add('Sum: %10.3f', [sum]); - lReport.Add('Mean: %10.3f', [mean]); - lReport.Add('Variance: %10.3f', [variance]); - lReport.Add('Std.Dev.: %10.3f', [stddev]); - lReport.Add('Std.Error of Mean %10.3f', [seMean]); - lReport.Add('%.2f%% Conf.Interval Mean: %10.3f to %.3f', [prob*100.0, mean - CI, mean + CI]); - lReport.Add('Range: %10.3f', [range]); - lReport.Add('Minimum: %10.3f', [min]); - lReport.Add('Maximum: %10.3f', [max]); - lReport.Add('Skewness: %10.3f', [skew]); - lReport.Add('Std.Error of Skew: %10.3f', [seSkew]); - lReport.Add('Kurtosis: %10.3f', [kurtosis]); - lReport.Add('Std. Error of Kurtosis: %10.3f', [seKurtosis]); + // Write descriptive stats to report + lReport.Clear; + lReport.Add('DISTRIBUTION PARAMETER ESTIMATES'); lReport.Add(''); + lDescrStats.WriteToReport(trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), + DecPlacesEdit.Value, lReport); - if ncases > 4 then // get percentiles and quartiles - begin - // get percentile ranks - if pcntileChk.Checked then PRank(k, pcntRank, lReport); - - // sort values and get quartiles - for i := 0 to ncases - 2 do - begin - for m := i + 1 to ncases -1 do - begin - if values[i] > values[m] then - begin - value := values[i]; - values[i] := values[m]; - values[m] := value; - end; - end; - end; - Q1 := Quartiles(2,0.25,ncases,values); - Q2 := Quartiles(2,0.5,ncases,values); - Q3 := Quartiles(2,0.75,ncases,values); - IQrange := Q3 - Q1; - lReport.Add('First Quartile: %10.3f', [Q1]); - lReport.Add('Median: %10.3f', [Q2]); - lReport.Add('Third Quartile: %10.3f', [Q3]); - lReport.Add('Interquartile range: %10.3f', [IQrange]); - lReport.Add(''); - end; - - if (AllQrtilesChk.Checked) then - begin - lReport.Add('Alternative Methods for Obtaining Quartiles'); - lReport.Add(' Method 1 2 3 4 5 6 7 8'); - lReport.Add('Pcntile'); - Q1 := Quartiles(1,0.25,ncases,values); - Q12 := Quartiles(2,0.25,ncases,values); - Q13 := Quartiles(3,0.25,ncases,values); - Q14 := Quartiles(4,0.25,ncases,values); - Q15 := Quartiles(5,0.25,ncases,values); - Q16 := Quartiles(6,0.25,ncases,values); - Q17 := Quartiles(7,0.25,ncases,values); - Q18 := Quartiles(8,0.25,ncases,values); - lReport.Add('Q1 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f', [Q1,Q12,Q13,Q14,Q15,Q16,Q17,Q18]); - Q2 := Quartiles(1,0.5,ncases,values); - Q22 := Quartiles(2,0.5,ncases,values); - Q23 := Quartiles(3,0.5,ncases,values); - Q24 := Quartiles(4,0.5,ncases,values); - Q25 := Quartiles(5,0.5,ncases,values); - Q26 := Quartiles(6,0.5,ncases,values); - Q27 := Quartiles(7,0.5,ncases,values); - Q28 := Quartiles(8,0.5,ncases,values); - lReport.Add('Q2 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f', [Q2,Q22,Q23,Q24,Q25,Q26,Q27,Q28]); - Q3 := Quartiles(1,0.75,ncases,values); - Q32 := Quartiles(2,0.75,ncases,values); - Q33 := Quartiles(3,0.75,ncases,values); - Q34 := Quartiles(4,0.75,ncases,values); - Q35 := Quartiles(5,0.75,ncases,values); - Q36 := Quartiles(6,0.75,ncases,values); - Q37 := Quartiles(7,0.75,ncases,values); - Q38 := Quartiles(8,0.75,ncases,values); - lReport.Add('Q3 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f', [Q3,Q32,Q33,Q34,Q35,Q36,Q37,Q38]); - lReport.Add(''); - lReport.Add('NOTES:'); - lReport.Add('Method 1 is the weighted average at X[np] where '); - lReport.Add(' n is no. of cases, p is percentile / 100'); - lReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.'); - lReport.Add('Method 3 is the empirical distribution function.'); - lReport.Add('Method 4 is called the empirical distribution function - averaging.'); - lReport.Add('Method 5 is called the empirical distribution function = Interpolation.'); - lReport.Add('Method 6 is the closest observation method.'); - lReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.'); - lReport.Add('Method 8 was used in an older Microsoft Excel version.'); - lReport.Add('See the internet site http://www.xycoon.com/ for the above.'); - lReport.Add(''); - end; // end of experimental alternatives - lReport.Add(DIVIDER_SMALL_AUTO); - end; // next j variable - - FReportFrame.DisplayReport(lReport); - + // Display report in the page of the variable + reportFrame := GetReportFrame(i); + reportFrame.DisplayReport(lReport); + end; finally + // Clean up + lDescrStats.Free; lReport.Free; - Selected := nil; - Values := nil; - pcntrank := nil; + end; +end; + + +function TDescriptiveFrm.GetReportFrame(APageIndex: Integer): TReportFrame; +var + page: TTabSheet; +begin + Result := nil; + if (APageIndex >=0) and (APageIndex < PageControl.PageCount) then + begin + page := PageControl.Pages[APageIndex]; + if (page.ControlCount > 0) and (page.Controls[0] is TReportFrame) then + Result := TReportFrame(page.Controls[0]); end; end; @@ -406,6 +648,12 @@ var i: integer; begin inherited; + + for i := PageControl.PageCount-1 downto 1 do + PageControl.Pages[i].Free; + + PageControl.Pages[0].Caption := 'Report'; + CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT); VarList.Clear; SelList.Clear; @@ -447,9 +695,16 @@ procedure TDescriptiveFrm.UpdateBtnStates; var lSelected: Boolean; i: Integer; + F: TReportFrame; begin inherited; + for i := 0 to PageControl.PageCount-1 do + begin + F := GetReportFrame(i); + if Assigned(F) then F.Clear; + end; + lSelected := false; for i := 0 to VarList.Items.Count-1 do if VarList.Selected[i] then @@ -478,5 +733,43 @@ begin end; +procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer; + const AColsSelected: IntDyneVec; AMean, AStdDev: Double); +var + i, idx: Integer; + value, zValue: Double; + varName: String; +begin + if AStdDev = 0 then begin + ErrorMsg('Cannot store z values to grid because StdDev is zero.'); + exit; + end; + + varName := OS3MainFrm.DataGrid.Cells[AColIndex, 0] + '_z'; + idx := OS3MainFrm.DataGrid.Rows[0].IndexOf(varName); + if idx = -1 then + begin + DictionaryFrm.NewVar(NoVariables + 1); + DictionaryFrm.DictGrid.Cells[1, NoVariables] := varName; + OS3MainFrm.DataGrid.Cells[NoVariables, 0] := varName; + idx := NoVariables; + end; + + for i := 1 to NoCases do + begin + if CaseChk.Checked then + begin + if not ValidValue(i, AColsSelected[AColIndex]) then continue; + end + else + if not GoodRecord(i, Length(AColsSelected), AColsSelected) then continue; + + value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]); + zValue := (value - AMean) / AStdDev; + OS3MainFrm.DataGrid.Cells[idx, i] := Format('%8.5f', [zValue]); + // to do: read number of decimal places from Dictionary and use in Format(). + end; +end; + end. diff --git a/applications/lazstats/source/forms/misc/basicstatsparamsformunit.lfm b/applications/lazstats/source/forms/misc/basicstatsparamsformunit.lfm index 3b27e58cd..657084798 100644 --- a/applications/lazstats/source/forms/misc/basicstatsparamsformunit.lfm +++ b/applications/lazstats/source/forms/misc/basicstatsparamsformunit.lfm @@ -1,6 +1,8 @@ inherited BasicStatsParamsForm: TBasicStatsParamsForm + Height = 459 Width = 709 Caption = 'BasicStatsParamsForm' + ClientHeight = 459 ClientWidth = 709 Position = poMainFormCenter object ParamsPanel: TPanel[0] diff --git a/applications/lazstats/source/forms/misc/basicstatsparamsformunit.pas b/applications/lazstats/source/forms/misc/basicstatsparamsformunit.pas index b298c0195..ca4d4727d 100644 --- a/applications/lazstats/source/forms/misc/basicstatsparamsformunit.pas +++ b/applications/lazstats/source/forms/misc/basicstatsparamsformunit.pas @@ -77,7 +77,7 @@ begin exit; w := MaxValue([HelpBtn.Width, ResetBtn.Width, ComputeBtn.Width, CloseBtn.Width]); - HelpBtn.Constraints.MinWidth := 2; + HelpBtn.Constraints.MinWidth := w; ResetBtn.Constraints.MinWidth := w; ComputeBtn.Constraints.MinWidth := w; CloseBtn.Constraints.MinWidth := w; @@ -89,6 +89,8 @@ begin Position := poDesigned; FAutoSized := true; + + inherited; end; diff --git a/applications/lazstats/source/units/functionslib.pas b/applications/lazstats/source/units/functionslib.pas index 4e512ba60..63562fbec 100644 --- a/applications/lazstats/source/units/functionslib.pas +++ b/applications/lazstats/source/units/functionslib.pas @@ -24,7 +24,7 @@ function poly(const c: Array of double; nord: integer; x: double): double; // RE procedure swilk (var init : boolean; const x: DblDyneVec; n, n1, n2: integer; const a: DblDyneVec; var w, pw: double; out ifault: integer); procedure SVDinverse(VAR a : DblDyneMat; N : integer); -function inverset(Probt, DF : double) : double; +function InverseT(Prob, DF: double): double; function inversechi(p : double; k : integer) : double; function STUDENT(q,v,r : real) : real; function realraise(base,power : double ): double; @@ -43,10 +43,12 @@ function UniStats(N : integer; VAR X : DblDyneVec; VAR z : DblDyneVec; VAR SESkew : double; VAR SEkurtosis : double; VAR min : double; VAR max : double; VAR Range : double; VAR MissValue : string) : integer; -function WholeValue(value : double) : double; -function FractionValue(value : double) : double; -function Quartiles(TypeQ : integer; pcntile : double; N : integer; - VAR values : DblDyneVec) : double; + +//function WholeValue(value : double) : double; +//function FractionValue(value : double) : double; + +function Quartiles(TypeQ: integer; Percentile: double; N: integer; + const Values: DblDyneVec): double; function KolmogorovProb(z: double): double; function KolmogorovTest(na: integer; const a: DblDyneVec; nb: integer; @@ -944,15 +946,14 @@ begin Result := prob; end; -function inverset(Probt, DF : double) : double; +{ Returns the t value corresponding to a two-tailed t test probability. } +function InverseT(Prob, DF: double): double; var - z, W, tValue: double; + z, W: double; begin - // Returns the t value corresponding to a two-tailed t test probability. - z := inversez(Probt); - W := z * ((8.0 * DF + 3.0) / (1.0 + 8.0 * DF)); - tValue := sqrt(DF * (exp(W * W / DF) - 1.0)); - inverset := tValue; + z := InverseZ(Prob); + W := z * ((8.0 * DF + 3.0) / (1.0 + 8.0 * DF)); + Result := sqrt(DF * (exp(W * W / DF) - 1.0)); end; //--------------------------------------------------------------------- @@ -1644,11 +1645,11 @@ begin end; -function Quartiles(TypeQ : integer; pcntile : double; N : integer; - VAR values : DblDyneVec) : double; -VAR - whole, fraction, Myresult, np, avalue, avalue1 : double; - subscript : integer; +function Quartiles(TypeQ: integer; Percentile: double; N: integer; + const Values: DblDyneVec): double; +var + whole, fraction, np, lValue, lValue1: double; + subscript: integer; begin { for i := 0 to N - 1 do // this is for debugging begin @@ -1656,46 +1657,52 @@ begin OutPutFrm.RichEdit.Lines.Add(outline); end; OutPutFrm.ShowModal; - OutPutFrm.RichEdit.Clear; } - case TypeQ of - 1 : np := pcntile * N; - 2 : np := pcntile * (N + 1); - 3 : np := pcntile * N; - 4 : np := pcntile * N; - 5 : np := pcntile * (N - 1); - 6 : np := pcntile * N + 0.5; - 7 : np := pcntile * (N + 1); - 8 : np := pcntile * (N + 1); + OutPutFrm.RichEdit.Clear; +} + case TypeQ of + 1 : np := Percentile * N; + 2 : np := Percentile * (N + 1); + 3 : np := Percentile * N; + 4 : np := Percentile * N; + 5 : np := Percentile * (N - 1); + 6 : np := Percentile * N + 0.5; + 7 : np := Percentile * (N + 1); + 8 : np := Percentile * (N + 1); + end; + + whole := WholeValue(np); + fraction := FractionValue(np); + subscript := Trunc(whole) - 1; + lValue := Values[subscript]; + lValue1 := Values[subscript + 1]; + + case TypeQ of + 1 : Result := ((1.0 - fraction) * values[subscript]) + fraction * values[subscript + 1]; + 2 : Result := ((1.0 - fraction) * lValue) + fraction * lValue1; // values[subscript + 1]; + 3 : if (fraction = 0.0) then + Result := values[subscript] + else + Result := values[subscript + 1]; + 4 : if (fraction = 0.0) then + Result := 0.5 * (values[subscript] + values[subscript + 1]) + else + Result := values[subscript + 1]; + 5 : if (fraction = 0.0) then + Result := values[subscript + 1] + else + Result := Values[subscript + 1] + fraction * (Values[subscript + 2] - values[subscript + 1]); + 6 : Result := values[subscript]; + 7 : if (fraction = 0.0) then + Result := values[subscript] + else + Result := fraction * Values[subscript] + (1.0 - fraction) * Values[subscript + 1]; + 8 : begin + if (fraction = 0.0) then Result := values[subscript]; + if (fraction = 0.5) then Result := 0.5 * (values[subscript] + values[subscript + 1]); + if (fraction < 0.5) then Result := values[subscript]; + if (fraction > 0.5) then Result := values[subscript + 1]; end; - whole := WholeValue(np); - fraction := FractionValue(np); - subscript := Trunc(whole) - 1; - avalue := values[subscript]; - avalue1 := values[subscript + 1]; - case TypeQ of - 1 : Myresult := ((1.0 - fraction) * values[subscript]) + - fraction * values[subscript + 1]; - 2 : Myresult := ((1.0 - fraction) * avalue) + - fraction * avalue1; // values[subscript + 1]; - 3 : if (fraction = 0.0) then Myresult := values[subscript] - else Myresult := values[subscript + 1]; - 4 : if (fraction = 0.0) then Myresult := 0.5 * (values[subscript] + values[subscript + 1]) - else Myresult := values[subscript + 1]; - 5 : if (fraction = 0.0) then Myresult := values[subscript + 1] - else Myresult := values[subscript + 1] + fraction * (values[subscript + 2] - - values[subscript + 1]); - 6 : Myresult := values[subscript]; - 7 : if (fraction = 0.0) then Myresult := values[subscript] - else Myresult := fraction * values[subscript] + - (1.0 - fraction) * values[subscript + 1]; - 8 : begin - if (fraction = 0.0) then Myresult := values[subscript]; - if (fraction = 0.5) then Myresult := 0.5 * (values[subscript] + values[subscript + 1]); - if (fraction < 0.5) then Myresult := values[subscript]; - if (fraction > 0.5) then Myresult := values[subscript + 1]; - end; - end; - Result := Myresult; + end; end; function KolmogorovProb(z : double) : double;