unit DescriptiveUnit; {$mode objfpc}{$H+} interface uses Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls, ExtCtrls, Buttons, Spin, ComCtrls, Grids, MainUnit, Globals, FunctionsLib, ReportFrameUnit, BasicStatsReportFormUnit, DataProcs, DictionaryUnit; type { TDescriptiveStats } TQuartileMethod = 1..8; TQuartile = 1..3; TQuartiles = array[TQuartileMethod, TQuartile] of Double; TDescriptiveStats = class private FDataGrid: TStringGrid; FColIndex: Integer; FColsSelected: IntDyneVec; FValues: DblDyneVec; FMean, FStdErrorMean: Double; FMin, FMax: Double; FSum: Double; FVariance, FStdDev: Double; FSkew, FStdErrorSkew: Double; FKurtosis, FStdErrorKurtosis: Double; FFirstQuartile, FMedian, FThirdQuartile: Double; FNumCases: Integer; procedure Calc_Quartiles; procedure Calc_Skew_Kurtosis; // function GetMeanLimits(AIndex: Integer; AConfLevel: Double): Double; function GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double; procedure Reset; public constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec = nil); procedure Analyze(AColIndex: Integer); procedure CalcQuartiles(out AQuartiles: TQuartiles); procedure PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); property FirstQuartile: Double read FFirstQuartile; property Kurtosis: Double read FKurtosis; property Max: Double read FMax; property Median: Double read FMedian; property Mean: Double read FMean; property MeanLowerLimit[AConfLevel: Double]: Double index 0 read GetMeanLimits; property MeanUpperLimit[AConfLevel: Double]: Double index 1 read GetMeanLimits; property Min: Double read FMin; property NumCases: Integer read FNumCases; property Skew: Double read FSkew; property StdErrorSkew: Double read FStdErrorSkew; property StdDev: Double read FStdDev; property StdErrorKurtosis: Double read FStdErrorKurtosis; property StdErrorMean: Double read FStdErrorMean; property StdErrorSkey: Double read FStdErrorSkew; property ThirdQuartile: Double read FThirdQuartile; property Variance: Double read FVariance; end; { TDescriptiveFrm } TDescriptiveFrm = class(TBasicStatsReportForm) CaseChk: TCheckBox; DecPlacesEdit: TSpinEdit; Label4: TLabel; PageControl: TPageControl; ReportPage: TTabSheet; ZScoresToGridChk: TCheckBox; AllQuartilesChk: TCheckBox; Label2: TLabel; Label3: TLabel; PercentileChk: TCheckBox; OptionsGroup: TGroupBox; InBtn: TBitBtn; OutBtn: TBitBtn; AllBtn: TBitBtn; CIEdit: TEdit; Label1: TLabel; VarList: TListBox; SelList: TListBox; procedure AllBtnClick(Sender: TObject); procedure InBtnClick(Sender: TObject); procedure OutBtnClick(Sender: TObject); procedure SelListDblClick(Sender: TObject); procedure VarListDblClick(Sender: TObject); procedure VarListSelectionChange(Sender: TObject; {%H-}User: boolean); private function GetReportFrame(APageIndex: Integer): TReportFrame; procedure WriteToReport(Stats: TDescriptiveStats; AVarName: String; AReport: TStrings); procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec; AMean, AStdDev: Double); protected procedure AdjustConstraints; override; procedure Compute; override; procedure UpdateBtnStates; override; public constructor Create(AOwner: TComponent); override; procedure Reset; override; end; var DescriptiveFrm: TDescriptiveFrm; implementation {$R *.lfm} uses Math, Utils, MathUnit, GridProcs; {=============================================================================== * TDescriptiveStats *------------------------------------------------------------------------------- * TDescriptiveStats is a helper class which * - does all the required calculations (Analyze) and * - prepares the report for each variable (WriteToReport). *==============================================================================} constructor TDescriptiveStats.Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec = nil); begin inherited Create; FDataGrid := ADataGrid; FColsSelected := AColsSelected; Reset; end; procedure TDescriptiveStats.Analyze(AColIndex: Integer); begin Reset; FColIndex := AColIndex; if Length(FColsSelected) = 0 then FValues := CollectValues(FDataGrid, AColIndex, FColsSelected) else FValues := CollectValues(FDataGrid, AColIndex); FNumCases := Length(FValues); SortOnX(FValues); MathUnit.Calc_MaxMin(FValues, FMax, FMin); MathUnit.Calc_MeanVarStdDev(FValues, FMean, FVariance, FStdDev); if FNumCases > 1 then FStdErrorMean := sqrt(FVariance / FNumCases); Calc_Skew_Kurtosis; Calc_Quartiles; end; procedure TDescriptiveStats.Calc_Quartiles; begin FFirstQuartile := Quartiles(2, 0.25, FNumCases, FValues); FMedian := Quartiles(2, 0.5, FNumCases, FValues); FThirdQuartile := Quartiles(2, 0.75, FNumCases, FValues); end; procedure TDescriptiveStats.CalcQuartiles(out AQuartiles: TQuartiles); var q: TQuartile; m: TQuartileMethod; begin for m := Low(TQuartileMethod) to High(TQuartileMethod) do for q := Low(TQuartile) to High(TQuartile) do AQuartiles[m, q] := Quartiles(m, 0.25*q, FNumCases, FValues); end; procedure TDescriptiveStats.Calc_Skew_Kurtosis; procedure Moments(out M2, M3, M4: Double); var i: Integer; dev, devSqr: Double; begin M2 := 0; M3 := 0; M4 := 0; for i := 0 to High(FValues) do begin dev := FValues[i] - FMean; devSqr := Sqr(dev); M2 := M2 + devSqr; M3 := M3 + dev * devSqr; M4 := M4 + sqr(devSqr); end; end; var num, denom: Double; stdDev3, stdDev4: Double; M2, M3, M4: Double; begin FSkew := NaN; FStdErrorSkew := NaN; FKurtosis := NaN; FStdErrorKurtosis := NaN; if FNumCases < 2 then exit; stdDev3 := FStdDev * FStdDev * FStdDev; stdDev4 := StdDev3 * FStdDev; Moments(M2, M3, M4); if FNumCases > 2 then begin FSkew := FNumCases * M3 / ((FNumCases - 1) * (FNumCases - 3) * stdDev3); num := 6.0 * FNumCases * (FNumCases - 1); denom := (FNumCases - 2) * (FNumCases + 1) * (FNumCases + 3); FStdErrorSkew := sqrt(num / denom); end; if FNumCases > 3 then begin num := FNumCases * (FNumCases + 1) * M4 - 3 * M2 * M2 * (FNumCases - 1); denom := (FNumCases - 1) * (FNumCases - 2) * (FNumCases - 3) * stdDev4; FKurtosis := num / denom; num := 4.0 * (sqr(FNumCases) - 1) * sqr(FStdErrorSkew); denom := (FNumCases - 3) * (FNumCases + 5); FStdErrorKurtosis := sqrt(num / denom); end; end; function TDescriptiveStats.GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double; var alpha: Double; confLev: Double; DOF: Integer; delta: Double; begin alpha := (1 - AConfLevel) / 2; confLev := 1 - alpha; if FNumCases < 120 then begin DOF := FNumCases - 1; delta := FStdErrorMean * InverseT(confLev, DOF); end else delta := FStdErrorMean * InverseZ(confLev); case AIndex of 0: Result := FMean - delta; 1: Result := FMean + delta; end; end; // Computes the percentile ranks of values stored in the data grid at the // loaded columns. The values are assumed to be sorted. procedure TDescriptiveStats.PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); var i, iCat, nCategories: Integer; lastCategoryValue: Double; cumFreqCentered: Double; cumFreq: Integer; begin // silence the compiler ACategoryvalues := nil; AFreq := nil; APercentiles := nil; SetLength(ACategoryValues, FNumCases); // over-dimension; will be trimmed later SetLength(AFreq, FNumCases); // Get count of unique values and frequencies of each lastCategoryValue := FValues[0]; ACategoryValues[0] := lastCategoryValue; AFreq[0] := 1; iCat := 0; for i := 1 to FNumCases-1 do begin if (lastCategoryValue = FValues[i]) then AFreq[iCat] := AFreq[iCat] + 1 else begin // new value inc(iCat); AFreq[iCat] := 1; lastCategoryValue := FValues[i]; ACategoryValues[iCat] := lastCategoryValue; end; end; // trim arrays nCategories := iCat + 1; SetLength(ACategoryValues, nCategories); SetLength(AFreq, nCategories); // Get cumulative frequencies and percentile ranks SetLength(APercentiles, nCategories); APercentiles[0] := AFreq[0] * 0.5 / FNumCases; cumFreq := AFreq[0]; for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1 begin cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point APercentiles[i] := cumFreqCentered / FNumCases; cumFreq := cumFreq + AFreq[i]; end; end; procedure TDescriptiveStats.Reset; begin FValues := nil; FMean := NaN; FStdErrorMean := NaN; FMin := NaN; FMax := NaN; FVariance := NaN; FStdDev := NaN; FStdErrorMean := NaN; FSkew := NaN; FStdErrorSkew := NaN; FKurtosis := NaN; FStdErrorKurtosis := NaN; FNumCases := 0; end; { TDescriptiveFrm } constructor TDescriptiveFrm.Create(AOwner: TComponent); begin inherited; FReportFrame.Parent := ReportPage; FReportFrame.BorderSpacing.Left := 0; FReportFrame.BorderSpacing.Top := 0; FReportFrame.BorderSpacing.Bottom := 0; FReportFrame.BorderSpacing.Right := 0; end; procedure TDescriptiveFrm.AdjustConstraints; begin ParamsPanel.Constraints.MinHeight := AllBtn.Top + AllBtn.Height + OptionsGroup.Height + CIEdit.Height + ButtonBevel.Height + CloseBtn.Height + VarList.BorderSpacing.Bottom + OptionsGroup.BorderSpacing.Bottom + CloseBtn.BorderSpacing.Top + DecPlacesEdit.Height + DecPlacesEdit.BorderSpacing.Top; ParamsPanel.Constraints.MinWidth := Math.Max( 4*CloseBtn.Width + 3*HelpBtn.BorderSpacing.Right, OptionsGroup.Width ); end; procedure TDescriptiveFrm.AllBtnClick(Sender: TObject); var i : integer; begin for i := 0 to VarList.Items.Count-1 do SelList.Items.Add(VarList.Items.Strings[i]); VarList.Clear; UpdateBtnStates; end; procedure TDescriptiveFrm.Compute; var cellString: String; i, j: Integer; noSelected: Integer; selected: IntDyneVec = nil; page: TTabSheet; reportFrame: TReportFrame; lReport: TStrings; lStats: TDescriptiveStats; begin noSelected := SelList.Items.Count; if noSelected = 0 then begin MessageDlg('No variables selected.', mtError, [mbOK], 0); exit; end; SetLength(selected, noSelected); // Find column index of selected variables for i := 0 to noSelected - 1 do begin cellstring := SelList.Items[i]; for j := 1 to NoVariables do if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then selected[i] := j; end; // Create a tabsheet with ReportFrame for each selected variable (in addition to the built-in one) if noSelected > PageControl.PageCount then begin for i := 1 to noSelected-1 do // we do not create a tab for the first variable - it exists by default begin page := TTabSheet.Create(PageControl); page.Parent := PageControl; reportFrame := TReportFrame.Create(page); reportFrame.Parent := page; reportFrame.Align := alClient; InitToolBar(reportFrame.ReportToolbar, tpRight); end; end; // Remove excess pages from previous session while PageControl.PageCount > noSelected do PageControl.Pages[PageControl.PageCount-1].Free; // Every tab gets the name of the corresponding variable. for i := 0 to NoSelected-1 do PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0]; // lReport := TStringList.Create; if not CaseChk.Checked then lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid) else lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected); try for i := 0 to noSelected-1 do begin // Analyze the data and get descriptive stats lStats.Analyze(selected[i]); // Store z values, (value - mean) / stdDev, to grid, if needed zScoresToGrid(selected[i], selected, lStats.Mean, lStats.StdDev); // Write descriptive stats to report WriteToReport(lStats, trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), lReport); // Display report in the page of the variable reportFrame := GetReportFrame(i); reportFrame.DisplayReport(lReport); end; finally // Clean up lStats.Free; lReport.Free; end; end; function TDescriptiveFrm.GetReportFrame(APageIndex: Integer): TReportFrame; var page: TTabSheet; begin Result := nil; if (APageIndex >=0) and (APageIndex < PageControl.PageCount) then begin page := PageControl.Pages[APageIndex]; if (page.ControlCount > 0) and (page.Controls[0] is TReportFrame) then Result := TReportFrame(page.Controls[0]); end; end; procedure TDescriptiveFrm.InBtnClick(Sender: TObject); var i: integer; begin i := 0; while i < VarList.Items.Count do begin if VarList.Selected[i] then begin SelList.Items.Add(VarList.Items[i]); VarList.Items.Delete(i); i := 0; end else inc(i); end; UpdateBtnStates; end; procedure TDescriptiveFrm.OutBtnClick(Sender: TObject); var i: integer; begin i := 0; while i < SelList.Items.Count do begin if SelList.Selected[i] then begin VarList.Items.Add(SelList.Items[i]); SelList.Items.Delete(i); i := 0; end else inc(i); end; UpdateBtnStates; end; procedure TDescriptiveFrm.Reset; var i: integer; begin inherited; for i := PageControl.PageCount-1 downto 1 do PageControl.Pages[i].Free; PageControl.Pages[0].Caption := 'Report'; CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT); VarList.Clear; SelList.Clear; for i := 1 to NoVariables do VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]); UpdateBtnStates; end; procedure TDescriptiveFrm.SelListDblClick(Sender: TObject); var index: integer; begin index := SelList.ItemIndex; if index > -1 then begin VarList.Items.Add(SelList.Items[index]); SelList.Items.Delete(index); UpdateBtnStates; end; end; procedure TDescriptiveFrm.VarListDblClick(Sender: TObject); var index: integer; begin index := VarList.ItemIndex; if index > -1 then begin SelList.Items.Add(VarList.Items[index]); VarList.Items.Delete(index); UpdateBtnStates; end; end; procedure TDescriptiveFrm.UpdateBtnStates; var lSelected: Boolean; i: Integer; F: TReportFrame; begin inherited; (* for i := PageControl.PageCount-1 downto 0 do begin if i > 0 then PageControl.Pages[i].Free else begin F := GetReportFrame(i); if Assigned(F) then F.Clear; end; end; *) lSelected := false; for i := 0 to VarList.Items.Count-1 do if VarList.Selected[i] then begin lSelected := true; break; end; InBtn.Enabled := lSelected; lSelected := false; for i := 0 to SelList.Items.Count-1 do if SelList.Selected[i] then begin lSelected := true; break; end; OutBtn.Enabled := lSelected; AllBtn.Enabled := VarList.Count > 0; end; procedure TDescriptiveFrm.VarListSelectionChange(Sender: TObject; User: boolean); begin UpdateBtnStates; end; procedure TDescriptiveFrm.WriteToReport(Stats: TDescriptiveStats; AVarName: String; AReport: TStrings); var w: Integer; i: Integer; confLevel: Double; decPlaces: Integer; m: TQuartileMethod; Q: TQuartiles; cumFreq: Integer; nCategories: Integer; categories: DblDyneVec = nil; freq: IntDyneVec = nil; percentiles: DblDyneVec = nil; begin confLevel := StrToFloat(CIEdit.Text) / 100; decPlaces := DecPlacesEdit.Value; w := 10 + decPlaces - 3; AReport.Clear; AReport.Add('DISTRIBUTION PARAMETER ESTIMATES'); AReport.Add(''); AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']); AReport.Add(''); AReport.Add('Number of cases: %*d', [W, Stats.NumCases]); // AReport.Add('Sum: %*.*f', [W, decPlaces, Stats.Sum]); AReport.Add('Mean: %*.*f', [W, decPlaces, Stats.Mean]); AReport.Add('Variance: %*.*f', [W, decPlaces, Stats.Variance]); AReport.Add('Std.Dev.: %*.*f', [W, decPlaces, Stats.StdDev]); AReport.Add('Std.Error of Mean %*.*f', [W, decPlaces, Stats.StdErrorMean]); AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [ confLevel*100.0, decPlaces, Stats.MeanLowerLimit[confLevel], decPlaces, Stats.MeanUpperLimit[confLevel] ]); AReport.Add(''); AReport.Add('Minimum: %*.*f', [W, decPlaces, Stats.Min]); AReport.Add('Maximum: %*.*f', [W, decPlaces, Stats.Max]); AReport.Add('Range: %*.*f', [W, decPlaces, Stats.Max - Stats.Min]); AReport.Add(''); AReport.Add('Skewness: %*.*f', [W, decPlaces, Stats.Skew]); AReport.Add('Std.Error of Skew: %*.*f', [W, decPlaces, Stats.StdErrorSkew]); AReport.Add('Kurtosis: %*.*f', [W, decPlaces, Stats.Kurtosis]); AReport.Add('Std. Error of Kurtosis: %*.*f', [W, decPlaces, Stats.StdErrorKurtosis]); AReport.Add(''); AReport.Add('First Quartile: %*.*f', [W, decPlaces, Stats.FirstQuartile]); AReport.Add('Median: %*.*f', [W, decPlaces, Stats.Median]); AReport.Add('Third Quartile: %*.*f', [W, decPlaces, Stats.ThirdQuartile]); AReport.Add('Interquartile range: %*.*f', [W, decPlaces, Stats.ThirdQuartile - Stats.FirstQuartile]); if AllQuartilesChk.Checked then begin Stats.CalcQuartiles(Q); AReport.Add(''); AReport.Add(''); AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES'); AReport.Add(''); AReport.Add('Method First Quartile Median Third Quartile'); AReport.Add('------ -------------- ---------- --------------'); for m := Low(TQuartileMethod) to High(TQuartileMethod) do AReport.Add(' %d %12.3f %12.3f %12.3f', [m, Q[m, 1], Q[m, 2], Q[m, 3]]); AReport.Add(''); AReport.Add('NOTES:'); AReport.Add('Method 1 is the weighted average at X[np] where '); AReport.Add(' n is no. of cases, p is percentile / 100'); AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.'); AReport.Add('Method 3 is the empirical distribution function.'); AReport.Add('Method 4 is called the empirical distribution function - averaging.'); AReport.Add('Method 5 is called the empirical distribution function = Interpolation.'); AReport.Add('Method 6 is the closest observation method.'); AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.'); AReport.Add('Method 8 was used in an older Microsoft Excel version.'); AReport.Add('See the internet site http://www.xycoon.com/ for the above.'); end; if PercentileChk.Checked then begin Stats.PercentileRank(categories, percentiles, freq); nCategories := Length(categories); cumFreq := 0; AReport.Add(''); AReport.Add(''); AReport.Add('PERCENTILE RANKS'); AReport.Add(''); AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank'); AReport.Add('----------- --------- --------- ---------------'); for i := 0 to nCategories-1 do begin cumFreq := cumFreq + freq[i]; AReport.Add(' %8.3f %8d %8d %12.2f%%', [ categories[i], freq[i], cumFreq, percentiles[i]*100.0 ]); end; end; end; procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec; AMean, AStdDev: Double); var i, idx: Integer; value, zValue: Double; varName: String; begin if AStdDev = 0 then begin ErrorMsg('Cannot store z values to grid because StdDev is zero.'); exit; end; varName := OS3MainFrm.DataGrid.Cells[AColIndex, 0] + '_z'; idx := OS3MainFrm.DataGrid.Rows[0].IndexOf(varName); if idx = -1 then begin DictionaryFrm.NewVar(NoVariables + 1); DictionaryFrm.DictGrid.Cells[1, NoVariables] := varName; OS3MainFrm.DataGrid.Cells[NoVariables, 0] := varName; idx := NoVariables; end; for i := 1 to NoCases do begin if CaseChk.Checked then begin if not DataProcs.ValidValue(i, AColsSelected[AColIndex]) then continue; end else if not DataProcs.GoodRecord(i, Length(AColsSelected), AColsSelected) then continue; value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]); zValue := (value - AMean) / AStdDev; OS3MainFrm.DataGrid.Cells[idx, i] := Format('%8.5f', [zValue]); // to do: read number of decimal places from Dictionary and use in Format(). end; end; end.