diff --git a/applications/lazstats/source/LazStats.lpi b/applications/lazstats/source/LazStats.lpi index f6570482f..48809267d 100644 --- a/applications/lazstats/source/LazStats.lpi +++ b/applications/lazstats/source/LazStats.lpi @@ -1563,6 +1563,11 @@ + + + + + diff --git a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas index a4cc868ea..6da4633cc 100644 --- a/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas +++ b/applications/lazstats/source/forms/analysis/descriptive/descriptiveunit.pas @@ -15,48 +15,52 @@ type { TDescriptiveStats } - TDescriptiveOption = (doAlternativeQuartiles, doPercentileRanks, doCasewiseDeletion); - TDescriptiveOptions = set of TDescriptiveOption; TQuartileMethod = 1..8; TQuartile = 1..3; + TQuartiles = array[TQuartileMethod, TQuartile] of Double; TDescriptiveStats = class private FDataGrid: TStringGrid; FColIndex: Integer; - FConfLevel: Double; // usually 0.95 FColsSelected: IntDyneVec; - FMean, FStdErrorMean, FDeltaMean: Double; + FValues: DblDyneVec; + FMean, FStdErrorMean: Double; FMin, FMax: Double; FSum: Double; FVariance, FStdDev: Double; FSkew, FStdErrorSkew: Double; FKurtosis, FStdErrorKurtosis: Double; FFirstQuartile, FMedian, FThirdQuartile: Double; - FCategoryValues, FPercentiles: DblDyneVec; - FFreqValues: IntDyneVec; - FOptions: TDescriptiveOptions; FNumCases: Integer; - FQuartiles: array[TQuartileMethod, TQuartile] of Double; - procedure Calc_AlternativeQuartiles(const AValues: DblDyneVec); - function Calc_DeltaMean(AStdErrorOfMean: Double): Double; - procedure Calc_Moments(const AValues: DblDyneVec; AMean: Double; - out M2, M3, M4: Double); - procedure Calc_Quartiles(const AValues: DblDyneVec; out Q1, Median, Q3: Double); - procedure Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; ANumCases: Integer; - out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double); - procedure Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec; - out ASum, ASumOfSquares, AMin, AMax: Double); - procedure CollectValues(out AValues: DblDyneVec); - procedure PercentileRank(const AValues: DblDyneVec; - out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); + procedure Calc_Quartiles; + procedure Calc_Skew_Kurtosis; +// function GetMeanLimits(AIndex: Integer; AConfLevel: Double): Double; + function GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double; + procedure Reset; public - constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec; AConfLevel: Double); - procedure Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions); - procedure WriteToReport(AVarName: String; ADecPlaces: Integer; AReport: TStrings); + constructor Create(ADataGrid: TStringGrid; AColsSelected: IntDyneVec = nil); + procedure Analyze(AColIndex: Integer); + procedure CalcQuartiles(out AQuartiles: TQuartiles); + procedure PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); + + property FirstQuartile: Double read FFirstQuartile; + property Kurtosis: Double read FKurtosis; + property Max: Double read FMax; + property Median: Double read FMedian; property Mean: Double read FMean; + property MeanLowerLimit[AConfLevel: Double]: Double index 0 read GetMeanLimits; + property MeanUpperLimit[AConfLevel: Double]: Double index 1 read GetMeanLimits; + property Min: Double read FMin; + property NumCases: Integer read FNumCases; + property Skew: Double read FSkew; + property StdErrorSkew: Double read FStdErrorSkew; property StdDev: Double read FStdDev; - // more can be added... + property StdErrorKurtosis: Double read FStdErrorKurtosis; + property StdErrorMean: Double read FStdErrorMean; + property StdErrorSkey: Double read FStdErrorSkew; + property ThirdQuartile: Double read FThirdQuartile; + property Variance: Double read FVariance; end; @@ -90,6 +94,7 @@ type private function GetReportFrame(APageIndex: Integer): TReportFrame; + procedure WriteToReport(Stats: TDescriptiveStats; AVarName: String; AReport: TStrings); procedure zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec; AMean, AStdDev: Double); @@ -113,7 +118,7 @@ implementation uses Math, - Utils; + Utils, MathUnit, GridProcs; {=============================================================================== @@ -125,218 +130,149 @@ uses *==============================================================================} constructor TDescriptiveStats.Create(ADataGrid: TStringGrid; - AColsSelected: IntDyneVec; AConfLevel: Double); + AColsSelected: IntDyneVec = nil); begin inherited Create; FDataGrid := ADataGrid; FColsSelected := AColsSelected; - FConfLevel := AConfLevel; + Reset; end; -procedure TDescriptiveStats.Analyze(AColIndex: Integer; AOptions: TDescriptiveOptions); -var - SS: Double; - values: DblDyneVec; - M2, M3, M4: Double; +procedure TDescriptiveStats.Analyze(AColIndex: Integer); begin - FMean := NaN; - FVariance := NaN; - FStdDev := NaN; - FStdErrorMean := NaN; - FDeltaMean := NaN; - FSkew := NaN; - FStdErrorSkew := NaN; + Reset; FColIndex := AColIndex; - FOptions := AOptions; - CollectValues(values); - FNumCases := Length(values); + if Length(FColsSelected) = 0 then + FValues := CollectValues(FDataGrid, AColIndex, FColsSelected) + else + FValues := CollectValues(FDataGrid, AColIndex); + FNumCases := Length(FValues); - SortOnX(values); + SortOnX(FValues); - Calc_Sum_SumOfSquares_Min_Max(values, FSum, SS, FMin, FMax); - if FNumCases > 0 then begin - FMean := FSum / FNumCases; - if FNumCases > 1 then + MathUnit.Calc_MaxMin(FValues, FMax, FMin); + MathUnit.Calc_MeanVarStdDev(FValues, FMean, FVariance, FStdDev); + + if FNumCases > 1 then + FStdErrorMean := sqrt(FVariance / FNumCases); + + Calc_Skew_Kurtosis; + Calc_Quartiles; +end; + + +procedure TDescriptiveStats.Calc_Quartiles; +begin + FFirstQuartile := Quartiles(2, 0.25, FNumCases, FValues); + FMedian := Quartiles(2, 0.5, FNumCases, FValues); + FThirdQuartile := Quartiles(2, 0.75, FNumCases, FValues); +end; + + +procedure TDescriptiveStats.CalcQuartiles(out AQuartiles: TQuartiles); +var + q: TQuartile; + m: TQuartileMethod; +begin + for m := Low(TQuartileMethod) to High(TQuartileMethod) do + for q := Low(TQuartile) to High(TQuartile) do + AQuartiles[m, q] := Quartiles(m, 0.25*q, FNumCases, FValues); +end; + + +procedure TDescriptiveStats.Calc_Skew_Kurtosis; + + procedure Moments(out M2, M3, M4: Double); + var + i: Integer; + dev, devSqr: Double; + begin + M2 := 0; + M3 := 0; + M4 := 0; + for i := 0 to High(FValues) do begin - FVariance := (SS - sqr(FSum) / FNumCases) / (FNumCases - 1); - FStdDev := sqrt(FVariance); - FStdErrorMean := sqrt(FVariance / FNumCases); - FDeltaMean := Calc_DeltaMean(FStdErrorMean); + dev := FValues[i] - FMean; + devSqr := Sqr(dev); + M2 := M2 + devSqr; + M3 := M3 + dev * devSqr; + M4 := M4 + sqr(devSqr); end; + end; - Calc_Moments(values, FMean, M2, M3, M4); - Calc_Skew_Kurtosis(FStdDev, M2, M3, M4, FNumCases, FSkew, FStdErrorSkew, FKurtosis, FStdErrorKurtosis); +var + num, denom: Double; + stdDev3, stdDev4: Double; + M2, M3, M4: Double; +begin + FSkew := NaN; + FStdErrorSkew := NaN; + FKurtosis := NaN; + FStdErrorKurtosis := NaN; - Calc_Quartiles(values, FFirstQuartile, FMedian, FThirdQuartile); + if FNumCases < 2 then + exit; - if (doAlternativeQuartiles in FOptions) then - Calc_AlternativeQuartiles(values); - if (doPercentileRanks in FOptions) then - PercentileRank(values, FCategoryValues, FPercentiles, FFreqValues); + stdDev3 := FStdDev * FStdDev * FStdDev; + stdDev4 := StdDev3 * FStdDev; + + Moments(M2, M3, M4); + + if FNumCases > 2 then + begin + FSkew := FNumCases * M3 / ((FNumCases - 1) * (FNumCases - 3) * stdDev3); + + num := 6.0 * FNumCases * (FNumCases - 1); + denom := (FNumCases - 2) * (FNumCases + 1) * (FNumCases + 3); + FStdErrorSkew := sqrt(num / denom); + end; + + if FNumCases > 3 then + begin + num := FNumCases * (FNumCases + 1) * M4 - 3 * M2 * M2 * (FNumCases - 1); + denom := (FNumCases - 1) * (FNumCases - 2) * (FNumCases - 3) * stdDev4; + FKurtosis := num / denom; + + num := 4.0 * (sqr(FNumCases) - 1) * sqr(FStdErrorSkew); + denom := (FNumCases - 3) * (FNumCases + 5); + FStdErrorKurtosis := sqrt(num / denom); end; end; -procedure TDescriptiveStats.Calc_AlternativeQuartiles(const AValues: DblDyneVec); -var - nCases: Integer; - q: TQuartile; - m: TQuartileMethod; -begin - nCases := Length(AValues); - for m := Low(TQuartileMethod) to High(TQuartileMethod) do - for q := Low(TQuartile) to High(TQuartile) do - FQuartiles[m, q] := Quartiles(m, 0.25*q, nCases, AValues); -end; - - -// Tolerance around the mean -function TDescriptiveStats.Calc_DeltaMean(AStdErrorOfMean: Double): Double; +function TDescriptiveStats.GetMeanLimits(AConfLevel: Double; AIndex: Integer): Double; var alpha: Double; confLev: Double; DOF: Integer; + delta: Double; begin - alpha := (1 - FConfLevel) / 2; + alpha := (1 - AConfLevel) / 2; confLev := 1 - alpha; if FNumCases < 120 then begin DOF := FNumCases - 1; - Result := AStdErrorOfMean * InverseT(confLev, DOF); + delta := FStdErrorMean * InverseT(confLev, DOF); end else - Result := AStdErrorOfMean * InverseZ(confLev); -end; + delta := FStdErrorMean * InverseZ(confLev); - -procedure TDescriptiveStats.Calc_Moments(const AValues: DblDyneVec; - AMean: Double; out M2, M3, M4: Double); -var - i: Integer; - dev, devSqr: Double; -begin - M2 := 0; - M3 := 0; - M4 := 0; - for i := 0 to High(AValues) do - begin - dev := AValues[i] - AMean; - devSqr := Sqr(dev); - M2 := M2 + devSqr; - M3 := M3 + dev * devSqr; - M4 := M4 + sqr(devSqr); + case AIndex of + 0: Result := FMean - delta; + 1: Result := FMean + delta; end; end; -procedure TDescriptiveStats.Calc_Quartiles(const AValues: DblDyneVec; - out Q1, Median, Q3: Double); -var - n: Integer; -begin - n := Length(AValues); - Q1 := Quartiles(2, 0.25, n, AValues); - Median := Quartiles(2, 0.5, n, AValues); - Q3 := Quartiles(2, 0.75, n, AValues); -end; - - -procedure TDescriptiveStats.Calc_Skew_Kurtosis(StdDev, M2, M3, M4: Double; - ANumCases: Integer; out Skew, StdErrorSkew, Kurtosis, StdErrorKurtosis: Double); -var - num, denom: Double; - stdDev3, stdDev4: Double; -begin - Skew := NaN; - StdErrorSkew := NaN; - Kurtosis := NaN; - StdErrorKurtosis := NaN; - - stdDev3 := StdDev * StdDev * StdDev; - stdDev4 := StdDev3 * StdDev; - - if ANumCases > 2 then - begin - Skew := ANumCases * M3 / ((ANumCases - 1) * (ANumCases - 3) * stdDev3); - - num := 6.0 * ANumCases * (ANumCases - 1); - denom := (ANumCases - 2) * (ANumCases + 1) * (ANumCases + 3); - StdErrorSkew := sqrt(num / denom); - end; - - if ANumCases > 3 then - begin - num := ANumCases * (ANumCases + 1) * M4 - 3 * M2 * M2 * (ANumCases - 1); - denom := (ANumCases - 1) * (ANumCases - 2) * (ANumCases - 3) * stdDev4; - Kurtosis := num / denom; - - num := 4.0 * (sqr(ANumCases) - 1) * sqr(StdErrorSkew); - denom := (ANumCases - 3) * (ANumCases + 5); - StdErrorKurtosis := sqrt(num / denom); - end; -end; - - -procedure TDescriptiveStats.Calc_Sum_SumOfSquares_Min_Max(const AValues: DblDyneVec; - out ASum, ASumOfSquares, AMin, AMax: Double); -var - i: Integer; -begin - ASum := 0.0; - ASumOfSquares := 0; - AMin := Infinity; - AMax := -Infinity; - - for i := 0 to High(AValues) do - begin - ASum := ASum + AValues[i]; - ASumOfSquares := ASumOfSquares + sqr(AValues[i]); - if AValues[i] < AMin then AMin := AValues[i]; - if AValues[i] > AMax then AMax := AValues[i]; - end; -end; - - -procedure TDescriptiveStats.CollectValues(out AValues: DblDyneVec); -var - i, n: Integer; -begin - AValues := nil; // silence the compiler - SetLength(AValues, NoCases); - n := 0; - for i := 1 to NoCases do - begin - if (doCasewiseDeletion in FOptions) then - begin - // Do not consider a case when any variable is empty - if not ValidValue(i, FColIndex) then - continue; - end else - begin - // Do not consider a case when the current variable is empty - if not GoodRecord(i, Length(FColsSelected), FColsSelected) then - continue; - end; - - if TryStrToFloat(FDataGrid.Cells[FColIndex, i], AValues[n]) then - inc(n) - else - raise Exception.CreateFmt('Invalid number: variable "%s", case "%s"', - [FDataGrid.cells[FColIndex, 0], FDataGrid.Cells[0, i]]); - end; - SetLength(AValues, n); -end; - - // Computes the percentile ranks of values stored in the data grid at the // loaded columns. The values are assumed to be sorted. -procedure TDescriptiveStats.PercentileRank(const AValues: DblDyneVec; out - ACategoryValues, APercentiles: DblDyneVec; out AFreq: IntDyneVec); +procedure TDescriptiveStats.PercentileRank(out ACategoryValues, APercentiles: DblDyneVec; + out AFreq: IntDyneVec); var - i, nCases, iCat, nCategories: Integer; + i, iCat, nCategories: Integer; lastCategoryValue: Double; cumFreqCentered: Double; cumFreq: Integer; @@ -346,24 +282,23 @@ begin AFreq := nil; APercentiles := nil; - nCases := Length(AValues); - SetLength(ACategoryValues, nCases); // over-dimension; will be trimmed later - SetLength(AFreq, nCases); + SetLength(ACategoryValues, FNumCases); // over-dimension; will be trimmed later + SetLength(AFreq, FNumCases); // Get count of unique values and frequencies of each - lastCategoryValue := AValues[0]; + lastCategoryValue := FValues[0]; ACategoryValues[0] := lastCategoryValue; AFreq[0] := 1; iCat := 0; - for i := 1 to nCases-1 do + for i := 1 to FNumCases-1 do begin - if (lastCategoryValue = AValues[i]) then + if (lastCategoryValue = FValues[i]) then AFreq[iCat] := AFreq[iCat] + 1 else begin // new value inc(iCat); AFreq[iCat] := 1; - lastCategoryValue := AValues[i]; + lastCategoryValue := FValues[i]; ACategoryValues[iCat] := lastCategoryValue; end; end; @@ -375,94 +310,32 @@ begin // Get cumulative frequencies and percentile ranks SetLength(APercentiles, nCategories); - APercentiles[0] := AFreq[0] * 0.5 / nCases; + APercentiles[0] := AFreq[0] * 0.5 / FNumCases; cumFreq := AFreq[0]; for i := 1 to nCategories-1 do // NOTE: This loop must begin at index 1 begin cumFreqCentered := cumFreq + AFreq[i]*0.5; // cum frequencies at mid-point - APercentiles[i] := cumFreqCentered / nCases; + APercentiles[i] := cumFreqCentered / FNumCases; cumFreq := cumFreq + AFreq[i]; end; end; -procedure TDescriptiveStats.WriteToReport(AVarName: String; ADecPlaces: Integer; - AReport: TStrings); -var - w: Integer; - nCategories: Integer; - i: Integer; - cumFreq: Integer; - m: TQuartileMethod; +procedure TDescriptiveStats.Reset; begin - w := 10 + ADecPlaces - 3; - AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']); - AReport.Add(''); - AReport.Add('Number of cases: %*d', [W, FNumCases]); - AReport.Add('Sum: %*.*f', [W, ADecPlaces, FSum]); - AReport.Add('Mean: %*.*f', [W, ADecPlaces, FMean]); - AReport.Add('Variance: %*.*f', [W, ADecPlaces, FVariance]); - AReport.Add('Std.Dev.: %*.*f', [W, ADecPlaces, FStdDev]); - AReport.Add('Std.Error of Mean %*.*f', [W, ADecPlaces, FStdErrorMean]); - AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [ - FConfLevel*100.0, ADecPlaces, FMean - FDeltaMean, ADecPlaces, FMean + FDeltaMean]); - AReport.Add(''); - AReport.Add('Minimum: %*.*f', [W, ADecPlaces, FMin]); - AReport.Add('Maximum: %*.*f', [W, ADecPlaces, FMax]); - AReport.Add('Range: %*.*f', [W, ADecPlaces, FMax - FMin]); - AReport.Add(''); - AReport.Add('Skewness: %*.*f', [W, ADecPlaces, FSkew]); - AReport.Add('Std.Error of Skew: %*.*f', [W, ADecPlaces, FStdErrorSkew]); - AReport.Add('Kurtosis: %*.*f', [W, ADecPlaces, FKurtosis]); - AReport.Add('Std. Error of Kurtosis: %*.*f', [W, ADecPlaces, FStdErrorKurtosis]); - AReport.Add(''); - AReport.Add('First Quartile: %*.*f', [W, ADecPlaces, FFirstQuartile]); - AReport.Add('Median: %*.*f', [W, ADecPlaces, FMedian]); - AReport.Add('Third Quartile: %*.*f', [W, ADecPlaces, FThirdQuartile]); - AReport.Add('Interquartile range: %*.*f', [W, ADecPlaces, FThirdQuartile - FFirstQuartile]); - - if (doAlternativeQuartiles in FOptions) then - begin - AReport.Add(''); - AReport.Add(''); - AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES'); - AReport.Add(''); - AReport.Add('Method First Quartile Median Third Quartile'); - AReport.Add('------ -------------- ---------- --------------'); - for m := Low(TQuartileMethod) to High(TQuartileMethod) do - AReport.Add(' %d %12.3f %12.3f %12.3f', [m, FQuartiles[m, 1], FQuartiles[m, 2], FQuartiles[m, 3]]); - AReport.Add(''); - AReport.Add('NOTES:'); - AReport.Add('Method 1 is the weighted average at X[np] where '); - AReport.Add(' n is no. of cases, p is percentile / 100'); - AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.'); - AReport.Add('Method 3 is the empirical distribution function.'); - AReport.Add('Method 4 is called the empirical distribution function - averaging.'); - AReport.Add('Method 5 is called the empirical distribution function = Interpolation.'); - AReport.Add('Method 6 is the closest observation method.'); - AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.'); - AReport.Add('Method 8 was used in an older Microsoft Excel version.'); - AReport.Add('See the internet site http://www.xycoon.com/ for the above.'); - end; - - if (doPercentileRanks in FOptions) then - begin - nCategories := Length(FCategoryValues); - cumFreq := 0; - AReport.Add(''); - AReport.Add(''); - AReport.Add('PERCENTILE RANKS'); - AReport.Add(''); - AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank'); - AReport.Add('----------- --------- --------- ---------------'); - for i := 0 to nCategories-1 do - begin - cumFreq := cumFreq + FFreqValues[i]; - AReport.Add(' %8.3f %8d %8d %12.2f%%', [ - FCategoryValues[i], FFreqValues[i], cumFreq, FPercentiles[i]*100.0 - ]); - end; - end; + FValues := nil; + FMean := NaN; + FStdErrorMean := NaN; + FMin := NaN; + FMax := NaN; + FVariance := NaN; + FStdDev := NaN; + FStdErrorMean := NaN; + FSkew := NaN; + FStdErrorSkew := NaN; + FKurtosis := NaN; + FStdErrorKurtosis := NaN; + FNumCases := 0; end; @@ -514,8 +387,7 @@ var page: TTabSheet; reportFrame: TReportFrame; lReport: TStrings; - lDescrStats: TDescriptiveStats; - options: TDescriptiveOptions; + lStats: TDescriptiveStats; begin noSelected := SelList.Items.Count; if noSelected = 0 then @@ -555,29 +427,23 @@ begin for i := 0 to NoSelected-1 do PageControl.Pages[i].Caption := OS3MainFrm.DataGrid.Cells[selected[i], 0]; - // Prepare options - options := []; - if PercentileChk.Checked then Include(options, doPercentileRanks); - if AllQuartilesChk.Checked then Include(options, doAlternativeQuartiles); - if CaseChk.Checked then Include(options, doCasewiseDeletion); - + // lReport := TStringList.Create; - lDescrStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected, StrToFloat(CIEdit.Text)/100); + if not CaseChk.Checked then + lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid) + else + lStats := TDescriptiveStats.Create(OS3MainFrm.DataGrid, selected); try for i := 0 to noSelected-1 do begin // Analyze the data and get descriptive stats - lDescrStats.Analyze(selected[i], options); + lStats.Analyze(selected[i]); // Store z values, (value - mean) / stdDev, to grid, if needed - zScoresToGrid(selected[i], selected, lDescrStats.Mean, lDescrStats.StdDev); + zScoresToGrid(selected[i], selected, lStats.Mean, lStats.StdDev); // Write descriptive stats to report - lReport.Clear; - lReport.Add('DISTRIBUTION PARAMETER ESTIMATES'); - lReport.Add(''); - lDescrStats.WriteToReport(trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), - DecPlacesEdit.Value, lReport); + WriteToReport(lStats, trim(OS3MainFrm.DataGrid.Cells[selected[i], 0]), lReport); // Display report in the page of the variable reportFrame := GetReportFrame(i); @@ -585,7 +451,7 @@ begin end; finally // Clean up - lDescrStats.Free; + lStats.Free; lReport.Free; end; end; @@ -651,7 +517,6 @@ begin for i := PageControl.PageCount-1 downto 1 do PageControl.Pages[i].Free; - PageControl.Pages[0].Caption := 'Report'; CIEdit.Text := FormatFloat('0.0', DEFAULT_CONFIDENCE_LEVEL_PERCENT); @@ -698,12 +563,16 @@ var F: TReportFrame; begin inherited; - - for i := 0 to PageControl.PageCount-1 do + (* + for i := PageControl.PageCount-1 downto 0 do begin - F := GetReportFrame(i); - if Assigned(F) then F.Clear; - end; + if i > 0 then + PageControl.Pages[i].Free + else begin + F := GetReportFrame(i); + if Assigned(F) then F.Clear; + end; + end; *) lSelected := false; for i := 0 to VarList.Items.Count-1 do @@ -733,6 +602,103 @@ begin end; +procedure TDescriptiveFrm.WriteToReport(Stats: TDescriptiveStats; + AVarName: String; AReport: TStrings); +var + w: Integer; + i: Integer; + confLevel: Double; + decPlaces: Integer; + m: TQuartileMethod; + Q: TQuartiles; + cumFreq: Integer; + nCategories: Integer; + categories: DblDyneVec = nil; + freq: IntDyneVec = nil; + percentiles: DblDyneVec = nil; +begin + confLevel := StrToFloat(CIEdit.Text) / 100; + decPlaces := DecPlacesEdit.Value; + w := 10 + decPlaces - 3; + + AReport.Clear; + + AReport.Add('DISTRIBUTION PARAMETER ESTIMATES'); + AReport.Add(''); + + AReport.Add('VARIABLE: %*s', [W, '"' + AVarName + '"']); + AReport.Add(''); + AReport.Add('Number of cases: %*d', [W, Stats.NumCases]); +// AReport.Add('Sum: %*.*f', [W, decPlaces, Stats.Sum]); + AReport.Add('Mean: %*.*f', [W, decPlaces, Stats.Mean]); + AReport.Add('Variance: %*.*f', [W, decPlaces, Stats.Variance]); + AReport.Add('Std.Dev.: %*.*f', [W, decPlaces, Stats.StdDev]); + AReport.Add('Std.Error of Mean %*.*f', [W, decPlaces, Stats.StdErrorMean]); + AReport.Add('%.2f%% Conf.Interval Mean: %.*f to %.*f', [ + confLevel*100.0, decPlaces, Stats.MeanLowerLimit[confLevel], decPlaces, Stats.MeanUpperLimit[confLevel] + ]); + AReport.Add(''); + AReport.Add('Minimum: %*.*f', [W, decPlaces, Stats.Min]); + AReport.Add('Maximum: %*.*f', [W, decPlaces, Stats.Max]); + AReport.Add('Range: %*.*f', [W, decPlaces, Stats.Max - Stats.Min]); + AReport.Add(''); + AReport.Add('Skewness: %*.*f', [W, decPlaces, Stats.Skew]); + AReport.Add('Std.Error of Skew: %*.*f', [W, decPlaces, Stats.StdErrorSkew]); + AReport.Add('Kurtosis: %*.*f', [W, decPlaces, Stats.Kurtosis]); + AReport.Add('Std. Error of Kurtosis: %*.*f', [W, decPlaces, Stats.StdErrorKurtosis]); + AReport.Add(''); + AReport.Add('First Quartile: %*.*f', [W, decPlaces, Stats.FirstQuartile]); + AReport.Add('Median: %*.*f', [W, decPlaces, Stats.Median]); + AReport.Add('Third Quartile: %*.*f', [W, decPlaces, Stats.ThirdQuartile]); + AReport.Add('Interquartile range: %*.*f', [W, decPlaces, Stats.ThirdQuartile - Stats.FirstQuartile]); + + if AllQuartilesChk.Checked then + begin + Stats.CalcQuartiles(Q); + AReport.Add(''); + AReport.Add(''); + AReport.Add('ALTERNATIVE METHODS FOR OBTAINING QUARTILES'); + AReport.Add(''); + AReport.Add('Method First Quartile Median Third Quartile'); + AReport.Add('------ -------------- ---------- --------------'); + for m := Low(TQuartileMethod) to High(TQuartileMethod) do + AReport.Add(' %d %12.3f %12.3f %12.3f', [m, Q[m, 1], Q[m, 2], Q[m, 3]]); + AReport.Add(''); + AReport.Add('NOTES:'); + AReport.Add('Method 1 is the weighted average at X[np] where '); + AReport.Add(' n is no. of cases, p is percentile / 100'); + AReport.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.'); + AReport.Add('Method 3 is the empirical distribution function.'); + AReport.Add('Method 4 is called the empirical distribution function - averaging.'); + AReport.Add('Method 5 is called the empirical distribution function = Interpolation.'); + AReport.Add('Method 6 is the closest observation method.'); + AReport.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.'); + AReport.Add('Method 8 was used in an older Microsoft Excel version.'); + AReport.Add('See the internet site http://www.xycoon.com/ for the above.'); + end; + + if PercentileChk.Checked then + begin + Stats.PercentileRank(categories, percentiles, freq); + nCategories := Length(categories); + cumFreq := 0; + AReport.Add(''); + AReport.Add(''); + AReport.Add('PERCENTILE RANKS'); + AReport.Add(''); + AReport.Add('Score Value Frequency Cum.Freq. Percentile Rank'); + AReport.Add('----------- --------- --------- ---------------'); + for i := 0 to nCategories-1 do + begin + cumFreq := cumFreq + freq[i]; + AReport.Add(' %8.3f %8d %8d %12.2f%%', [ + categories[i], freq[i], cumFreq, percentiles[i]*100.0 + ]); + end; + end; +end; + + procedure TDescriptiveFrm.zScoresToGrid(AColIndex: Integer; const AColsSelected: IntDyneVec; AMean, AStdDev: Double); var @@ -759,10 +725,10 @@ begin begin if CaseChk.Checked then begin - if not ValidValue(i, AColsSelected[AColIndex]) then continue; + if not DataProcs.ValidValue(i, AColsSelected[AColIndex]) then continue; end else - if not GoodRecord(i, Length(AColsSelected), AColsSelected) then continue; + if not DataProcs.GoodRecord(i, Length(AColsSelected), AColsSelected) then continue; value := StrToFloat(OS3MainFrm.DataGrid.Cells[AColIndex, i]); zValue := (value - AMean) / AStdDev; diff --git a/applications/lazstats/source/units/globals.pas b/applications/lazstats/source/units/globals.pas index cb9ebbb58..3ed9622bb 100644 --- a/applications/lazstats/source/units/globals.pas +++ b/applications/lazstats/source/units/globals.pas @@ -41,6 +41,8 @@ type LHelpPath: String; end; + ELazStats = class(Exception); + var NoCases : integer; NoVariables : integer; diff --git a/applications/lazstats/source/units/gridprocs.pas b/applications/lazstats/source/units/gridprocs.pas index 97a97b2f1..e921af1b1 100644 --- a/applications/lazstats/source/units/gridprocs.pas +++ b/applications/lazstats/source/units/gridprocs.pas @@ -9,7 +9,7 @@ uses Globals, DictionaryUnit; function CollectValues(AGrid: TStringGrid; AColIndex: Integer; - AColCheck: IntDyneVec): DblDyneVec; + AColCheck: IntDyneVec = nil): DblDyneVec; procedure GetMinMax(AGrid: TStringGrid; AColIndex: Integer; const AColCheck: IntDyneVec; out AMin, AMax: Double); @@ -42,6 +42,7 @@ uses function CollectValues(AGrid: TStringGrid; AColIndex: Integer; AColCheck: IntDyneVec): DblDyneVec; var row, n: Integer; + val: Double; begin SetLength(Result, AGrid.RowCount); n := 0; @@ -54,7 +55,11 @@ begin begin if not GoodRecord(AGrid, row, AColCheck) then continue; end; - Result[n] := StrToFloat(trim(AGrid.Cells[AColIndex, row])); + if TryStrToFloat(trim(AGrid.Cells[AColIndex, row]), val) then + Result[n] := val + else + raise ELazStats.CreateFmt('Non-numeric string "%s" in column %d, row %d', + [AGrid.Cells[AColIndex, row], AColIndex, row]); inc(n); end; SetLength(Result, n); diff --git a/applications/lazstats/source/units/mathunit.pas b/applications/lazstats/source/units/mathunit.pas index 912e85e9b..d8e97e9a6 100644 --- a/applications/lazstats/source/units/mathunit.pas +++ b/applications/lazstats/source/units/mathunit.pas @@ -7,7 +7,8 @@ unit MathUnit; interface uses - Classes, SysUtils; + Classes, SysUtils, + Globals; const TWO_PI = 2.0 * PI; @@ -43,6 +44,13 @@ function FactorialLn(n: Integer): Double; function PoissonPDF(n: integer; a: double): Double; function PoissonCDF(n: Integer; a: double): Double; + +procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double); +procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double); +procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double); +procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double); + + implementation uses @@ -474,6 +482,70 @@ begin end; + +{=============================================================================== +* Vector-based calculations +===============================================================================} +procedure Calc_MaxMin(const AData: DblDyneVec; out AMax, AMin: Double); +var + i: Integer; +begin + AMin := Infinity; + AMax := -Infinity; + for i := Low(AData) to High(AData) do + begin + if AData[i] < AMin then AMin := AData[i]; + if AData[i] > AMax then AMax := AData[i]; + end; +end; + + +procedure Calc_MeanStdDev(const AData: DblDyneVec; out AMean, AStdDev: Double); +var + variance: Double; +begin + Calc_MeanVarStdDev(AData, AMean, variance, AStdDev); +end; + + +procedure Calc_MeanVarStdDev(const AData: DblDyneVec; out AMean, AVariance, AStdDev: Double); +var + sum, ss: Double; + n: Integer; +begin + AMean := NaN; + AVariance := NaN; + AStdDev := NaN; + + n := Length(AData); + if n = 0 then + exit; + + Calc_SumSS(AData, sum, ss); + + AMean := sum / n; + if n = 1 then + exit; + + AVariance := ((ss - sqr(AMean)) / n) / (n - 1); + AStdDev := sqrt(AVariance); +end; + + +procedure Calc_SumSS(const AData: DblDyneVec; out Sum, SS: Double); +var + i: Integer; +begin + Sum := 0; + SS := 0; + for i := Low(AData) to High(AData) do + begin + Sum := Sum + AData[i]; + SS := SS + sqr(AData[i]); + end; +end; + + initialization InitFactLn();