From 41c7131588a38a5fc9ba86bb9accc902aec0de77 Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Sat, 31 Oct 2020 23:13:18 +0000 Subject: [PATCH] LazStats: Some refactoring of RIDITUnit. git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7831 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../analysis/nonparametric/chisqrunit.pas | 6 +- .../analysis/nonparametric/riditunit.pas | 403 +++++++++++------- 2 files changed, 256 insertions(+), 153 deletions(-) diff --git a/applications/lazstats/source/forms/analysis/nonparametric/chisqrunit.pas b/applications/lazstats/source/forms/analysis/nonparametric/chisqrunit.pas index 0ce66b642..2adebc80f 100644 --- a/applications/lazstats/source/forms/analysis/nonparametric/chisqrunit.pas +++ b/applications/lazstats/source/forms/analysis/nonparametric/chisqrunit.pas @@ -78,9 +78,6 @@ type out ANumRows, ANumCols, ANumCases: Integer; out AFrequencies: IntDyneMat); - procedure GetYatesCorrection(const AFrequencies: IntDyneMat; - out AdjChiSqr: Double); - procedure GetColProportions(const AFrequencies: IntDyneMat; out AProportions: DblDyneMat); @@ -90,6 +87,9 @@ type procedure GetTotalProportions(const AFrequencies: IntDyneMat; out AProportions: DblDyneMat); + procedure GetYatesCorrection(const AFrequencies: IntDyneMat; + out AdjChiSqr: Double); + procedure ProcessAndReportCellChiSqr(const ACellChiSqr: DblDyneMat; const ARowLabels, AColLabels: StrDyneVec; ANumCases: Integer); diff --git a/applications/lazstats/source/forms/analysis/nonparametric/riditunit.pas b/applications/lazstats/source/forms/analysis/nonparametric/riditunit.pas index c91290b91..b8b344b4f 100644 --- a/applications/lazstats/source/forms/analysis/nonparametric/riditunit.pas +++ b/applications/lazstats/source/forms/analysis/nonparametric/riditunit.pas @@ -63,6 +63,17 @@ type NoToAnalyze: integer; Freq: IntDyneMat; Props: DblDyneMat; NoRows: integer; AReport: TStrings); + function CalcColProportions(const AFrequencies: IntDyneMat): DblDyneMat; + function CalcRowProportions(const AFrequencies: IntDyneMat): DblDyneMat; + + procedure GetExpectedAndCellChiSqr(const AFrequencies: IntDyneMat; + out AExpected, ACellChiSqr: DblDyneMat; out AChiSqr, AdjChiSqr: Double; + out ADegreesOfFreedom: Integer; out AYates: Boolean); + + procedure GetFrequencies(const AColNoSelected: IntDyneVec; + ANumRows, ANumCols: Integer; + out ANumCases: Integer; out AFrequencies: IntDyneMat); + protected procedure AdjustConstraints; override; procedure Compute; override; @@ -84,7 +95,7 @@ implementation uses Math, - Utils, GridProcs; + Utils, GridProcs, MatrixUnit; { TRIDITForm } @@ -176,8 +187,9 @@ begin details := DetailsChk.Checked; AReport.Add('ANALYSIS FOR STANDARD %s', [ColLabels[RefCol]]); -// AReport.Add(''); + AReport.Add(''); +{ // print frequencies outline := 'Frequencies Observed'; IntArrayPrint(Freq, NoRows, NoToAnalyze, 'Frequencies', RowLabels, ColLabels, outline, AReport); @@ -185,6 +197,7 @@ begin // print column proportions outline := 'Column Proportions Observed'; MatPrint(Props, NoRows, NoToAnalyze, outline, RowLabels, ColLabels, NoCases, AReport); +} // Get sizes in each column for i := 0 to NoToAnalyze - 1 do @@ -199,19 +212,27 @@ begin refprob[i,1] := Props[i,j] / 2.0; end; refprob[0,2] := 0.0; - for i := 1 to NoRows - 1 do refprob[i,2] := refprob[i-1,2] + refprob[i-1,0]; - for i := 0 to NoRows - 1 do refprob[i,3] := refprob[i,1] + refprob[i,2]; - if (details) then // print calculations table +// for i := 1 to NoRows - 1 do refprob[i,2] := refprob[i-1,2] + refprob[i-1,0]; + for i := 0 to NoRows - 2 do + refprob[i+1, 2] := refprob[i, 2] + refprob[i, 0]; + for i := 0 to NoRows - 1 do + refprob[i, 3] := refprob[i, 1] + refprob[i, 2]; + + // Print calculations table + if details then begin outline := 'Ridit calculations for ' + ColLabels[j]; MatPrint(refprob, NoRows, 4, outline, RowLabels, ColLabels, NoCases, AReport); + AReport.Add(DIVIDER_SMALL_AUTO); end; // store results in probdists for i := 0 to NoRows - 1 do probdists[i,j] := refprob[i,3]; end; + outline := 'Ridits for all variables'; MatPrint(probdists, NoRows, NoToAnalyze, outline, RowLabels, ColLabels, NoCases, AReport); + AReport.Add(DIVIDER_SMALL_AUTO); // obtain mean ridits for the all variables using the reference variable for i := 0 to NoToAnalyze - 1 do @@ -225,6 +246,7 @@ begin // print the means using the reference variable outline := 'Mean RIDITS Using the Reference Values'; DynVectorPrint(meanridits, NoToAnalyze, outline, ColLabels, NoCases, AReport); + AReport.Add(DIVIDER_SMALL_AUTO); // obtain the weighted grand mean ridit OverMeanRidit := 0.0; @@ -292,6 +314,60 @@ begin end; +function TRIDITForm.CalcColProportions(const AFrequencies: IntDyneMat): DblDyneMat; +var + i, j, n, m, numRows, numCols: Integer; +begin + MatSize(AFrequencies, n,m); + numRows := n-1; + numCols := m-1; + + Result := nil; + SetLength(Result, n,m); + for j := 0 to numCols do + begin + for i := 0 to numRows-1 do + begin + if (AFrequencies[numRows, j] > 0.0) then + Result[i, j] := AFrequencies[i, j] / AFrequencies[numRows, j] + else + Result[i, j] := 0.0; + end; + if (AFrequencies[numRows, j] > 0.0) then + Result[numRows, j] := 1.0 + else + Result[numRows, j] := 0.0; + end; +end; + + +function TRIDITForm.CalcRowProportions(const AFrequencies: IntDyneMat): DblDyneMat; +var + i, j, n, m, numRows, numCols: Integer; +begin + MatSize(AFrequencies, n,m); + numRows := n-1; + numCols := m-1; + + Result := nil; + SetLength(Result, n,m); + for i := 0 to numRows do + begin + for j := 0 to numCols-1 do + begin + if (AFrequencies[i, numCols] > 0.0) then + Result[i, j] := AFrequencies[i, j] / AFrequencies[i, numCols] + else + Result[i, j] := 0.0; + end; + if (AFrequencies[i, numCols] > 0.0) then + Result[i, numCols] := 1.0 + else + Result[i, numCols] := 0.0; + end; +end; + + procedure TRIDITForm.ColInClick(Sender: TObject); var i: integer; @@ -352,7 +428,8 @@ end; procedure TRIDITForm.Compute; var - Prop: DblDyneMat = nil; + RowProp: DblDyneMat = nil; + ColProp: DblDyneMat = nil; Expected: DblDyneMat = nil; CellChi: DblDyneMat = nil; ColNoSelected: IntDyneVec = nil; @@ -361,7 +438,7 @@ var ColLabels: StrDyneVec = nil; AllRefs : boolean; i, j, RowNo, RefColNo, NoToAnalyze : integer; - Row, Col, Ncases, Nrows, Ncols, df : integer; + Ncases, Nrows, Ncols, df : integer; ChiSquare, ProbChi : double; yates : boolean; Adjchisqr, Adjprobchi: double; @@ -397,105 +474,20 @@ begin ColLabels[i] := ColList.Items[i]; ColNoSelected[i+1] := GetVariableIndex(OS3MainFrm.DataGrid, ColLabels[i]); end; - - // Allocate and initialize - SetLength(Freq, NRows+1, NCols+1); - SetLength(Prop, NRows+1, NCols+1); - SetLength(Expected, NRows, NCols); - SetLength(CellChi, NRows, NCols); - for i := 1 to NRows + 1 do - for j := 1 to NCols + 1 do - Freq[i-1,j-1] := 0; RowLabels[NRows] := 'Total'; ColLabels[NCols] := 'Total'; - // Get cell data - NCases := 0; - for i := 1 to NoCases do - begin - Row := i; - for j := 1 to Ncols do - begin - Col := ColNoSelected[j]; - Freq[i-1,j-1] := StrToInt(OS3MainFrm.DataGrid.Cells[Col,Row]); -// result := GetValue(Row,Col,intvalue,dblvalue,strvalue); -// if (result = 1) Freq[i-1][j-1] := 0; -// else Freq[i-1][j-1] := intvalue; - Ncases := Ncases + Freq[i-1,j-1]; - end; - end; - Freq[Nrows][Ncols] := Ncases; + // Get frequencies + GetFrequencies(ColNoSelected, NRows, NCols, NCases, Freq); - // Now, calculate expected values + // Calculate expected values and cell chi-squares + GetExpectedAndCellChiSqr(Freq, Expected, cellChi, chiSquare, AdjChiSqr, df, yates); + probChi := 1.0 - ChiSquaredProb(ChiSquare, df); // prob. larger chi + if yates then + AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df); - // Get row totals first - for i := 1 to Nrows do - for j := 1 to Ncols do - Freq[i-1,Ncols] := Freq[i-1,Ncols] + Freq[i-1,j-1]; - - // Get col totals next - for j := 1 to Ncols do - for i := 1 to Nrows do - Freq[Nrows,j-1] := Freq[Nrows,j-1] + Freq[i-1,j-1]; - - // Then get expected values and cell chi-squares - ChiSquare := 0.0; - Adjchisqr := 0.0; - yates := YatesChk.Checked and (NRows = 2) and (NCols = 2); - - if (NRows > 1) and (NCols > 1) then - begin - for i := 1 to NRows do - begin - for j := 1 to NCols do - begin - Expected[i-1,j-1] := Freq[NRows,j-1] * Freq[i-1,NCols] / Ncases; - if (Expected[i-1,j-1] > 0.0) then - CellChi[i-1,j-1] := sqr(Freq[i-1,j-1] - Expected[i-1,j-1])/ Expected[i-1,j-1] - else - begin - ErrorMsg('Zero expected value found.'); - CellChi[i-1,j-1] := 0.0; - end; - ChiSquare := ChiSquare + CellChi[i-1,j-1]; - end; - end; - df := (NRows - 1) * (NCols - 1); - - if yates then // 2 x 2 corrected chi-square - begin - AdjChiSqr := abs((Freq[0,0] * Freq[1,1]) - (Freq[0,1] * Freq[1,0])); - AdjChiSqr := sqr(AdjChiSqr - NCases / 2.0) * NCases; // numerator - AdjChiSqr := AdjChiSqr / (Freq[0,2] * Freq[1,2] * Freq[2,0] * Freq[2,1]); - AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df); - end; - end; - - if (NRows = 1) then // equal probability - begin - for j := 0 to NCols - 1 do - begin - Expected[0,j] := NCases / NCols; - if (Expected[0][j] > 0) then - CellChi[0,j] := sqr(Freq[0,j] - Expected[0,j]) / Expected[0,j]; - ChiSquare := ChiSquare + CellChi[0,j]; - end; - df := Ncols - 1; - end; - - if (NCols = 1) then // equal probability - begin - for i := 0 to Nrows - 1 do - begin - Expected[i,0] := Ncases / Nrows; - if (Expected[i,0] > 0) then - CellChi[i,0] := sqr(Freq[i,0] - Expected[i,0]) / Expected[i,0]; - ChiSquare := ChiSquare + CellChi[i,0]; - end; - df := Nrows - 1; - end; - - ProbChi := 1.0 - ChiSquaredProb(ChiSquare, df); // prob. larger chi + // Calculate column proportions (needed by Analyze() routine) + ColProp := CalcColProportions(Freq); // Print results to output form lReport := TStringList.Create; @@ -505,9 +497,6 @@ begin begin FrequenciesPage.TabVisible := true; - lReport.Add('CHI-SQUARE ANALYSIS RESULTS'); - lReport.Add('No. of Cases: %d', [Ncases]); - if ObsChk.Checked then begin IntArrayPrint(Freq, NRows+1, NCols+1, 'Frequencies', RowLabels, ColLabels, 'OBSERVED FREQUENCIES', lReport); @@ -526,48 +515,19 @@ begin end else FrequenciesPage.TabVisible := false; - // Print row/col properties + // Print row/col proportions if PropChk.Checked then begin RowColPropsPage.TabVisible := true; - lReport.Add('CHI-SQUARE ANALYSIS RESULTS'); - lReport.Add('No. of Cases: %d', [Ncases]); - - for i := 1 to NRows + 1 do - begin - for j := 1 to NCols do - begin - if (Freq[i-1,NCols] > 0.0) then - Prop[i-1,j-1] := Freq[i-1,j-1] / Freq[i-1,NCols] - else - Prop[i-1,j-1] := 0.0; - end; - if (Freq[i-1,NCols] > 0.0) then - Prop[i-1,NCols] := 1.0 - else - Prop[i-1,NCols] := 0.0; - end; - MatPrint(Prop, Nrows+1, Ncols+1, 'ROW PROPORTIONS', RowLabels, ColLabels, NoCases, lReport); + RowProp := CalcRowProportions(Freq); + MatPrint(RowProp, NRows+1, NCols+1, 'ROW PROPORTIONS', RowLabels, ColLabels, NoCases, lReport); + RowProp := nil; // not needed any more lReport.Add(DIVIDER_SMALL_AUTO); lReport.Add(''); - for j := 1 to Ncols + 1 do - begin - for i := 1 to Nrows do - begin - if (Freq[Nrows,j-1] > 0.0) then - Prop[i-1,j-1] := Freq[i-1,j-1] / Freq[Nrows,j-1] - else - Prop[i-1,j-1] := 0.0; - end; - if (Freq[Nrows,j-1] > 0.0) then - Prop[Nrows,j-1] := 1.0 - else - Prop[Nrows,j-1] := 0.0; - end; - MatPrint(Prop, Nrows+1, Ncols+1, 'COLUMN PROPORTIONS', RowLabels, ColLabels, NoCases, lReport); + MatPrint(ColProp, NRows+1, NCols+1, 'COLUMN PROPORTIONS', RowLabels, ColLabels, NoCases, lReport); FRowColPropsReportFrame.DisplayReport(lReport); lReport.Clear; @@ -578,8 +538,6 @@ begin if ChiChk.Checked then begin CellChiSqrPage.TabVisible := true; - lReport.Add('CHI-SQUARE ANALYSIS RESULTS'); - lReport.Add('No. of Cases: %d', [Ncases]); MatPrint(CellChi, Nrows, Ncols, 'CHI-SQUARED VALUE FOR CELLS', RowLabels, ColLabels, NoCases, lReport); FCellChiSqrReportFrame.DisplayReport(lReport); lReport.Clear; @@ -593,13 +551,13 @@ begin lReport.Add(''); lReport.Add( 'Chi-square: %8.3f', [ChiSquare]); lReport.Add( ' with D.F. %8d', [df]); - lReport.Add( ' and Probability > value: %8.3f', [ProbChi]); + lReport.Add( ' and probability > value: %8.3f', [ProbChi]); lReport.Add(''); if yates then begin lReport.Add('Chi-square using Yates correction: %8.3f', [AdjChiSqr]); - lReport.Add(' and Probability > value: %8.3f', [Adjprobchi]); + lReport.Add(' and probability > value: %8.3f', [Adjprobchi]); end; likelihood := 0.0; @@ -610,13 +568,13 @@ begin likelihood := -2.0 * likelihood; problikelihood := 1.0 - ChiSquaredProb(likelihood, df); lReport.Add( 'Likelihood Ratio: %8.3f', [likelihood]); - lReport.Add( ' with Probability > value: %8.4f', [problikelihood]); + lReport.Add( ' with probability > value: %8.4f', [problikelihood]); lReport.Add(''); if ((Nrows > 1) and (Ncols > 1)) then begin phi := sqrt(ChiSquare / Ncases); - lReport.Add('phi correlation: %8.4f', [phi]); + lReport.Add('phi Correlation: %8.4f', [phi]); pearsonr := 0.0; SumX := 0.0; @@ -664,26 +622,26 @@ begin for i := 0 to NoToAnalyze - 1 do begin RefColNo := ColNoSelected[i+1] - 2; - Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, Prop, Nrows, lReport); + Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, ColProp, Nrows, lReport); if i < NoToAnalyze-1 then begin lReport.Add(''); - lReport.Add(DIVIDER_SMALL_AUTO); +// lReport.Add(DIVIDER_SMALL_AUTO); + lReport.Add(DIVIDER_AUTO); lReport.Add(''); end; end; - end else + end + else // only one selected reference variable begin NoToAnalyze := ColList.Items.Count; // get column of reference variable for i := 1 to NoVariables do if (RefEdit.Text = OS3MainFrm.DataGrid.Cells[i,0]) then RefColNo := i; - for j := 0 to NoToAnalyze - 1 do if (ColNoSelected[j+1] = RefColNo) then RefColNo := j; - - Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, Prop, Nrows, lReport); + Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, ColProp, Nrows, lReport); end; FReportFrame.DisplayReport(lReport); @@ -694,6 +652,151 @@ begin end; +procedure TRIDITForm.GetExpectedAndCellChiSqr(const AFrequencies: IntDyneMat; + out AExpected, ACellChiSqr: DblDyneMat; out AChiSqr, AdjChiSqr: Double; + out ADegreesOfFreedom: Integer; out AYates: Boolean); +var + i, j, n, m: Integer; + numRows, numCols, numCases: Integer; +begin + MatSize(AFrequencies, n,m); // n,m contain totals row and column + numRows := n-1; + numCols := m-1; + numCases := AFrequencies[numRows, numCols]; + + AExpected := nil; + ACellChiSqr := nil; + SetLength(AExpected, numRows, numCols); + SetLength(ACellChiSqr, numRows, numCols); + + AChiSqr := 0; + AdjChiSqr := -1; // -1 indicates non-existing value in case of AYates=false + AYates := YatesChk.Checked and (numRows = 2) and (numCols = 2); + + if (numRows > 1) and (numCols > 1) then + begin + for i := 0 to numRows-1 do + begin + for j := 0 to numCols-1 do + begin + AExpected[i, j] := AFrequencies[numRows, j] * AFrequencies[i, numCols] / numCases; + if (AExpected[i, j] > 0) then + ACellChiSqr[i, j] := sqr(AFrequencies[i, j] - AExpected[i, j]) / AExpected[i, j] + else + begin + ErrorMsg('Zero expected value found.'); + ACellChiSqr[i, j] := 0; + end; + AChiSqr := AChiSqr + ACellChiSqr[i, j]; + end; + end; + ADegreesOfFreedom := (numRows - 1) * (numCols - 1); + + // 2x2 corrected chi-square + if AYates then + begin + AdjChiSqr := abs((AFrequencies[0, 0] * AFrequencies[1, 1]) - (AFrequencies[0, 1] * AFrequencies[1, 0])); + AdjChiSqr := sqr(AdjChiSqr - numCases / 2.0) * numCases; // numerator + AdjChiSqr := AdjChiSqr / (AFrequencies[0, 2] * AFrequencies[1,2] * AFrequencies[2,0] * AFrequencies[2,1]); + end; + end; + + (* + for i := 1 to NRows do + begin + for j := 1 to NCols do + begin + Expected[i-1,j-1] := Freq[NRows,j-1] * Freq[i-1,NCols] / Ncases; + if (Expected[i-1,j-1] > 0.0) then + CellChi[i-1,j-1] := sqr(Freq[i-1,j-1] - Expected[i-1,j-1])/ Expected[i-1,j-1] + else + begin + ErrorMsg('Zero expected value found.'); + CellChi[i-1,j-1] := 0.0; + end; + ChiSquare := ChiSquare + CellChi[i-1,j-1]; + end; + end; + df := (NRows - 1) * (NCols - 1); + + if yates then // 2 x 2 corrected chi-square + begin + AdjChiSqr := abs((Freq[0,0] * Freq[1,1]) - (Freq[0,1] * Freq[1,0])); + AdjChiSqr := sqr(AdjChiSqr - NCases / 2.0) * NCases; // numerator + AdjChiSqr := AdjChiSqr / (Freq[0,2] * Freq[1,2] * Freq[2,0] * Freq[2,1]); + AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df); + end; + end; + *) + + + // Equal probability + if (numRows = 1) then + begin + for j := 0 to numCols - 1 do + begin + AExpected[0, j] := numCases / numCols; + if (AExpected[0, j] > 0) then + ACellChiSqr[0, j] := sqr(AFrequencies[0, j] - AExpected[0, j]) / AExpected[0, j]; + AChiSqr := AChiSqr + ACellChiSqr[0, j]; + end; + ADegreesOfFreedom := numCols - 1; + end; + + // Equal probability + if (numCols = 1) then + begin + for i := 0 to numRows - 1 do + begin + AExpected[i, 0] := numCases / numRows; + if (AExpected[i, 0] > 0) then + ACellChiSqr[i, 0] := sqr(AFrequencies[i, 0] - AExpected[i, 0]) / AExpected[i, 0]; + AChiSqr := AChiSqr + ACellChiSqr[i, 0]; + end; + ADegreesOfFreedom := numRows - 1; + end; +end; + + +procedure TRIDITForm.GetFrequencies(const AColNoSelected: IntDyneVec; + ANumRows, ANumCols: Integer; out ANumCases: Integer; out AFrequencies: IntDyneMat); +var + i, j, row, col: Integer; +begin + // over-dimension the matrix because row and column totals will be in + // last column and row. + AFrequencies := nil; + SetLength(AFrequencies, ANumRows+1, ANumCols+1); + for i := 0 to ANumRows do + for j := 0 to ANumCols do + AFrequencies[i, j] := 0; + + // Get cell data + ANumCases := 0; + for i := 0 to NoCases-1 do + begin + row := i+1; + for j := 0 to ANumCols-1 do + begin + col := AColNoSelected[j+1]; // +1 because row index is at index 0 + AFrequencies[i, j] := StrToInt(OS3MainFrm.DataGrid.Cells[col, row]); + ANumCases := ANumCases + AFrequencies[i, j]; + end; + end; + AFrequencies[ANumRows, ANumCols] := ANumCases; + + // Calculate row totals + for i := 0 to ANumRows-1 do + for j := 0 to ANumCols-1 do + AFrequencies[i, ANumCols] := AFrequencies[i, ANumCols] + AFrequencies[i, j]; + + // Calculate col totals + for j := 0 to ANumCols-1 do + for i := 0 to ANumRows-1 do + AFrequencies[ANumRows, j] := AFrequencies[ANumRows, j] + AFrequencies[i, j]; +end; + + procedure TRIDITForm.RefGrpClick(Sender: TObject); begin RefEdit.Visible := RefGrp.ItemIndex > 0;