LazStats: Some refactoring of RIDITUnit.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7831 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-10-31 23:13:18 +00:00
parent 151ba06d69
commit 41c7131588
2 changed files with 256 additions and 153 deletions

View File

@ -78,9 +78,6 @@ type
out ANumRows, ANumCols, ANumCases: Integer;
out AFrequencies: IntDyneMat);
procedure GetYatesCorrection(const AFrequencies: IntDyneMat;
out AdjChiSqr: Double);
procedure GetColProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
@ -90,6 +87,9 @@ type
procedure GetTotalProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
procedure GetYatesCorrection(const AFrequencies: IntDyneMat;
out AdjChiSqr: Double);
procedure ProcessAndReportCellChiSqr(const ACellChiSqr: DblDyneMat;
const ARowLabels, AColLabels: StrDyneVec; ANumCases: Integer);

View File

@ -63,6 +63,17 @@ type
NoToAnalyze: integer; Freq: IntDyneMat; Props: DblDyneMat;
NoRows: integer; AReport: TStrings);
function CalcColProportions(const AFrequencies: IntDyneMat): DblDyneMat;
function CalcRowProportions(const AFrequencies: IntDyneMat): DblDyneMat;
procedure GetExpectedAndCellChiSqr(const AFrequencies: IntDyneMat;
out AExpected, ACellChiSqr: DblDyneMat; out AChiSqr, AdjChiSqr: Double;
out ADegreesOfFreedom: Integer; out AYates: Boolean);
procedure GetFrequencies(const AColNoSelected: IntDyneVec;
ANumRows, ANumCols: Integer;
out ANumCases: Integer; out AFrequencies: IntDyneMat);
protected
procedure AdjustConstraints; override;
procedure Compute; override;
@ -84,7 +95,7 @@ implementation
uses
Math,
Utils, GridProcs;
Utils, GridProcs, MatrixUnit;
{ TRIDITForm }
@ -176,8 +187,9 @@ begin
details := DetailsChk.Checked;
AReport.Add('ANALYSIS FOR STANDARD %s', [ColLabels[RefCol]]);
// AReport.Add('');
AReport.Add('');
{
// print frequencies
outline := 'Frequencies Observed';
IntArrayPrint(Freq, NoRows, NoToAnalyze, 'Frequencies', RowLabels, ColLabels, outline, AReport);
@ -185,6 +197,7 @@ begin
// print column proportions
outline := 'Column Proportions Observed';
MatPrint(Props, NoRows, NoToAnalyze, outline, RowLabels, ColLabels, NoCases, AReport);
}
// Get sizes in each column
for i := 0 to NoToAnalyze - 1 do
@ -199,19 +212,27 @@ begin
refprob[i,1] := Props[i,j] / 2.0;
end;
refprob[0,2] := 0.0;
for i := 1 to NoRows - 1 do refprob[i,2] := refprob[i-1,2] + refprob[i-1,0];
for i := 0 to NoRows - 1 do refprob[i,3] := refprob[i,1] + refprob[i,2];
if (details) then // print calculations table
// for i := 1 to NoRows - 1 do refprob[i,2] := refprob[i-1,2] + refprob[i-1,0];
for i := 0 to NoRows - 2 do
refprob[i+1, 2] := refprob[i, 2] + refprob[i, 0];
for i := 0 to NoRows - 1 do
refprob[i, 3] := refprob[i, 1] + refprob[i, 2];
// Print calculations table
if details then
begin
outline := 'Ridit calculations for ' + ColLabels[j];
MatPrint(refprob, NoRows, 4, outline, RowLabels, ColLabels, NoCases, AReport);
AReport.Add(DIVIDER_SMALL_AUTO);
end;
// store results in probdists
for i := 0 to NoRows - 1 do probdists[i,j] := refprob[i,3];
end;
outline := 'Ridits for all variables';
MatPrint(probdists, NoRows, NoToAnalyze, outline, RowLabels, ColLabels, NoCases, AReport);
AReport.Add(DIVIDER_SMALL_AUTO);
// obtain mean ridits for the all variables using the reference variable
for i := 0 to NoToAnalyze - 1 do
@ -225,6 +246,7 @@ begin
// print the means using the reference variable
outline := 'Mean RIDITS Using the Reference Values';
DynVectorPrint(meanridits, NoToAnalyze, outline, ColLabels, NoCases, AReport);
AReport.Add(DIVIDER_SMALL_AUTO);
// obtain the weighted grand mean ridit
OverMeanRidit := 0.0;
@ -292,6 +314,60 @@ begin
end;
function TRIDITForm.CalcColProportions(const AFrequencies: IntDyneMat): DblDyneMat;
var
i, j, n, m, numRows, numCols: Integer;
begin
MatSize(AFrequencies, n,m);
numRows := n-1;
numCols := m-1;
Result := nil;
SetLength(Result, n,m);
for j := 0 to numCols do
begin
for i := 0 to numRows-1 do
begin
if (AFrequencies[numRows, j] > 0.0) then
Result[i, j] := AFrequencies[i, j] / AFrequencies[numRows, j]
else
Result[i, j] := 0.0;
end;
if (AFrequencies[numRows, j] > 0.0) then
Result[numRows, j] := 1.0
else
Result[numRows, j] := 0.0;
end;
end;
function TRIDITForm.CalcRowProportions(const AFrequencies: IntDyneMat): DblDyneMat;
var
i, j, n, m, numRows, numCols: Integer;
begin
MatSize(AFrequencies, n,m);
numRows := n-1;
numCols := m-1;
Result := nil;
SetLength(Result, n,m);
for i := 0 to numRows do
begin
for j := 0 to numCols-1 do
begin
if (AFrequencies[i, numCols] > 0.0) then
Result[i, j] := AFrequencies[i, j] / AFrequencies[i, numCols]
else
Result[i, j] := 0.0;
end;
if (AFrequencies[i, numCols] > 0.0) then
Result[i, numCols] := 1.0
else
Result[i, numCols] := 0.0;
end;
end;
procedure TRIDITForm.ColInClick(Sender: TObject);
var
i: integer;
@ -352,7 +428,8 @@ end;
procedure TRIDITForm.Compute;
var
Prop: DblDyneMat = nil;
RowProp: DblDyneMat = nil;
ColProp: DblDyneMat = nil;
Expected: DblDyneMat = nil;
CellChi: DblDyneMat = nil;
ColNoSelected: IntDyneVec = nil;
@ -361,7 +438,7 @@ var
ColLabels: StrDyneVec = nil;
AllRefs : boolean;
i, j, RowNo, RefColNo, NoToAnalyze : integer;
Row, Col, Ncases, Nrows, Ncols, df : integer;
Ncases, Nrows, Ncols, df : integer;
ChiSquare, ProbChi : double;
yates : boolean;
Adjchisqr, Adjprobchi: double;
@ -397,105 +474,20 @@ begin
ColLabels[i] := ColList.Items[i];
ColNoSelected[i+1] := GetVariableIndex(OS3MainFrm.DataGrid, ColLabels[i]);
end;
// Allocate and initialize
SetLength(Freq, NRows+1, NCols+1);
SetLength(Prop, NRows+1, NCols+1);
SetLength(Expected, NRows, NCols);
SetLength(CellChi, NRows, NCols);
for i := 1 to NRows + 1 do
for j := 1 to NCols + 1 do
Freq[i-1,j-1] := 0;
RowLabels[NRows] := 'Total';
ColLabels[NCols] := 'Total';
// Get cell data
NCases := 0;
for i := 1 to NoCases do
begin
Row := i;
for j := 1 to Ncols do
begin
Col := ColNoSelected[j];
Freq[i-1,j-1] := StrToInt(OS3MainFrm.DataGrid.Cells[Col,Row]);
// result := GetValue(Row,Col,intvalue,dblvalue,strvalue);
// if (result = 1) Freq[i-1][j-1] := 0;
// else Freq[i-1][j-1] := intvalue;
Ncases := Ncases + Freq[i-1,j-1];
end;
end;
Freq[Nrows][Ncols] := Ncases;
// Get frequencies
GetFrequencies(ColNoSelected, NRows, NCols, NCases, Freq);
// Now, calculate expected values
// Calculate expected values and cell chi-squares
GetExpectedAndCellChiSqr(Freq, Expected, cellChi, chiSquare, AdjChiSqr, df, yates);
probChi := 1.0 - ChiSquaredProb(ChiSquare, df); // prob. larger chi
if yates then
AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df);
// Get row totals first
for i := 1 to Nrows do
for j := 1 to Ncols do
Freq[i-1,Ncols] := Freq[i-1,Ncols] + Freq[i-1,j-1];
// Get col totals next
for j := 1 to Ncols do
for i := 1 to Nrows do
Freq[Nrows,j-1] := Freq[Nrows,j-1] + Freq[i-1,j-1];
// Then get expected values and cell chi-squares
ChiSquare := 0.0;
Adjchisqr := 0.0;
yates := YatesChk.Checked and (NRows = 2) and (NCols = 2);
if (NRows > 1) and (NCols > 1) then
begin
for i := 1 to NRows do
begin
for j := 1 to NCols do
begin
Expected[i-1,j-1] := Freq[NRows,j-1] * Freq[i-1,NCols] / Ncases;
if (Expected[i-1,j-1] > 0.0) then
CellChi[i-1,j-1] := sqr(Freq[i-1,j-1] - Expected[i-1,j-1])/ Expected[i-1,j-1]
else
begin
ErrorMsg('Zero expected value found.');
CellChi[i-1,j-1] := 0.0;
end;
ChiSquare := ChiSquare + CellChi[i-1,j-1];
end;
end;
df := (NRows - 1) * (NCols - 1);
if yates then // 2 x 2 corrected chi-square
begin
AdjChiSqr := abs((Freq[0,0] * Freq[1,1]) - (Freq[0,1] * Freq[1,0]));
AdjChiSqr := sqr(AdjChiSqr - NCases / 2.0) * NCases; // numerator
AdjChiSqr := AdjChiSqr / (Freq[0,2] * Freq[1,2] * Freq[2,0] * Freq[2,1]);
AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df);
end;
end;
if (NRows = 1) then // equal probability
begin
for j := 0 to NCols - 1 do
begin
Expected[0,j] := NCases / NCols;
if (Expected[0][j] > 0) then
CellChi[0,j] := sqr(Freq[0,j] - Expected[0,j]) / Expected[0,j];
ChiSquare := ChiSquare + CellChi[0,j];
end;
df := Ncols - 1;
end;
if (NCols = 1) then // equal probability
begin
for i := 0 to Nrows - 1 do
begin
Expected[i,0] := Ncases / Nrows;
if (Expected[i,0] > 0) then
CellChi[i,0] := sqr(Freq[i,0] - Expected[i,0]) / Expected[i,0];
ChiSquare := ChiSquare + CellChi[i,0];
end;
df := Nrows - 1;
end;
ProbChi := 1.0 - ChiSquaredProb(ChiSquare, df); // prob. larger chi
// Calculate column proportions (needed by Analyze() routine)
ColProp := CalcColProportions(Freq);
// Print results to output form
lReport := TStringList.Create;
@ -505,9 +497,6 @@ begin
begin
FrequenciesPage.TabVisible := true;
lReport.Add('CHI-SQUARE ANALYSIS RESULTS');
lReport.Add('No. of Cases: %d', [Ncases]);
if ObsChk.Checked then
begin
IntArrayPrint(Freq, NRows+1, NCols+1, 'Frequencies', RowLabels, ColLabels, 'OBSERVED FREQUENCIES', lReport);
@ -526,48 +515,19 @@ begin
end else
FrequenciesPage.TabVisible := false;
// Print row/col properties
// Print row/col proportions
if PropChk.Checked then
begin
RowColPropsPage.TabVisible := true;
lReport.Add('CHI-SQUARE ANALYSIS RESULTS');
lReport.Add('No. of Cases: %d', [Ncases]);
for i := 1 to NRows + 1 do
begin
for j := 1 to NCols do
begin
if (Freq[i-1,NCols] > 0.0) then
Prop[i-1,j-1] := Freq[i-1,j-1] / Freq[i-1,NCols]
else
Prop[i-1,j-1] := 0.0;
end;
if (Freq[i-1,NCols] > 0.0) then
Prop[i-1,NCols] := 1.0
else
Prop[i-1,NCols] := 0.0;
end;
MatPrint(Prop, Nrows+1, Ncols+1, 'ROW PROPORTIONS', RowLabels, ColLabels, NoCases, lReport);
RowProp := CalcRowProportions(Freq);
MatPrint(RowProp, NRows+1, NCols+1, 'ROW PROPORTIONS', RowLabels, ColLabels, NoCases, lReport);
RowProp := nil; // not needed any more
lReport.Add(DIVIDER_SMALL_AUTO);
lReport.Add('');
for j := 1 to Ncols + 1 do
begin
for i := 1 to Nrows do
begin
if (Freq[Nrows,j-1] > 0.0) then
Prop[i-1,j-1] := Freq[i-1,j-1] / Freq[Nrows,j-1]
else
Prop[i-1,j-1] := 0.0;
end;
if (Freq[Nrows,j-1] > 0.0) then
Prop[Nrows,j-1] := 1.0
else
Prop[Nrows,j-1] := 0.0;
end;
MatPrint(Prop, Nrows+1, Ncols+1, 'COLUMN PROPORTIONS', RowLabels, ColLabels, NoCases, lReport);
MatPrint(ColProp, NRows+1, NCols+1, 'COLUMN PROPORTIONS', RowLabels, ColLabels, NoCases, lReport);
FRowColPropsReportFrame.DisplayReport(lReport);
lReport.Clear;
@ -578,8 +538,6 @@ begin
if ChiChk.Checked then
begin
CellChiSqrPage.TabVisible := true;
lReport.Add('CHI-SQUARE ANALYSIS RESULTS');
lReport.Add('No. of Cases: %d', [Ncases]);
MatPrint(CellChi, Nrows, Ncols, 'CHI-SQUARED VALUE FOR CELLS', RowLabels, ColLabels, NoCases, lReport);
FCellChiSqrReportFrame.DisplayReport(lReport);
lReport.Clear;
@ -593,13 +551,13 @@ begin
lReport.Add('');
lReport.Add( 'Chi-square: %8.3f', [ChiSquare]);
lReport.Add( ' with D.F. %8d', [df]);
lReport.Add( ' and Probability > value: %8.3f', [ProbChi]);
lReport.Add( ' and probability > value: %8.3f', [ProbChi]);
lReport.Add('');
if yates then
begin
lReport.Add('Chi-square using Yates correction: %8.3f', [AdjChiSqr]);
lReport.Add(' and Probability > value: %8.3f', [Adjprobchi]);
lReport.Add(' and probability > value: %8.3f', [Adjprobchi]);
end;
likelihood := 0.0;
@ -610,13 +568,13 @@ begin
likelihood := -2.0 * likelihood;
problikelihood := 1.0 - ChiSquaredProb(likelihood, df);
lReport.Add( 'Likelihood Ratio: %8.3f', [likelihood]);
lReport.Add( ' with Probability > value: %8.4f', [problikelihood]);
lReport.Add( ' with probability > value: %8.4f', [problikelihood]);
lReport.Add('');
if ((Nrows > 1) and (Ncols > 1)) then
begin
phi := sqrt(ChiSquare / Ncases);
lReport.Add('phi correlation: %8.4f', [phi]);
lReport.Add('phi Correlation: %8.4f', [phi]);
pearsonr := 0.0;
SumX := 0.0;
@ -664,26 +622,26 @@ begin
for i := 0 to NoToAnalyze - 1 do
begin
RefColNo := ColNoSelected[i+1] - 2;
Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, Prop, Nrows, lReport);
Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, ColProp, Nrows, lReport);
if i < NoToAnalyze-1 then
begin
lReport.Add('');
lReport.Add(DIVIDER_SMALL_AUTO);
// lReport.Add(DIVIDER_SMALL_AUTO);
lReport.Add(DIVIDER_AUTO);
lReport.Add('');
end;
end;
end else
end
else
// only one selected reference variable
begin
NoToAnalyze := ColList.Items.Count;
// get column of reference variable
for i := 1 to NoVariables do
if (RefEdit.Text = OS3MainFrm.DataGrid.Cells[i,0]) then RefColNo := i;
for j := 0 to NoToAnalyze - 1 do
if (ColNoSelected[j+1] = RefColNo) then RefColNo := j;
Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, Prop, Nrows, lReport);
Analyze(RefColNo, RowLabels,ColLabels, NoToAnalyze, Freq, ColProp, Nrows, lReport);
end;
FReportFrame.DisplayReport(lReport);
@ -694,6 +652,151 @@ begin
end;
procedure TRIDITForm.GetExpectedAndCellChiSqr(const AFrequencies: IntDyneMat;
out AExpected, ACellChiSqr: DblDyneMat; out AChiSqr, AdjChiSqr: Double;
out ADegreesOfFreedom: Integer; out AYates: Boolean);
var
i, j, n, m: Integer;
numRows, numCols, numCases: Integer;
begin
MatSize(AFrequencies, n,m); // n,m contain totals row and column
numRows := n-1;
numCols := m-1;
numCases := AFrequencies[numRows, numCols];
AExpected := nil;
ACellChiSqr := nil;
SetLength(AExpected, numRows, numCols);
SetLength(ACellChiSqr, numRows, numCols);
AChiSqr := 0;
AdjChiSqr := -1; // -1 indicates non-existing value in case of AYates=false
AYates := YatesChk.Checked and (numRows = 2) and (numCols = 2);
if (numRows > 1) and (numCols > 1) then
begin
for i := 0 to numRows-1 do
begin
for j := 0 to numCols-1 do
begin
AExpected[i, j] := AFrequencies[numRows, j] * AFrequencies[i, numCols] / numCases;
if (AExpected[i, j] > 0) then
ACellChiSqr[i, j] := sqr(AFrequencies[i, j] - AExpected[i, j]) / AExpected[i, j]
else
begin
ErrorMsg('Zero expected value found.');
ACellChiSqr[i, j] := 0;
end;
AChiSqr := AChiSqr + ACellChiSqr[i, j];
end;
end;
ADegreesOfFreedom := (numRows - 1) * (numCols - 1);
// 2x2 corrected chi-square
if AYates then
begin
AdjChiSqr := abs((AFrequencies[0, 0] * AFrequencies[1, 1]) - (AFrequencies[0, 1] * AFrequencies[1, 0]));
AdjChiSqr := sqr(AdjChiSqr - numCases / 2.0) * numCases; // numerator
AdjChiSqr := AdjChiSqr / (AFrequencies[0, 2] * AFrequencies[1,2] * AFrequencies[2,0] * AFrequencies[2,1]);
end;
end;
(*
for i := 1 to NRows do
begin
for j := 1 to NCols do
begin
Expected[i-1,j-1] := Freq[NRows,j-1] * Freq[i-1,NCols] / Ncases;
if (Expected[i-1,j-1] > 0.0) then
CellChi[i-1,j-1] := sqr(Freq[i-1,j-1] - Expected[i-1,j-1])/ Expected[i-1,j-1]
else
begin
ErrorMsg('Zero expected value found.');
CellChi[i-1,j-1] := 0.0;
end;
ChiSquare := ChiSquare + CellChi[i-1,j-1];
end;
end;
df := (NRows - 1) * (NCols - 1);
if yates then // 2 x 2 corrected chi-square
begin
AdjChiSqr := abs((Freq[0,0] * Freq[1,1]) - (Freq[0,1] * Freq[1,0]));
AdjChiSqr := sqr(AdjChiSqr - NCases / 2.0) * NCases; // numerator
AdjChiSqr := AdjChiSqr / (Freq[0,2] * Freq[1,2] * Freq[2,0] * Freq[2,1]);
AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df);
end;
end;
*)
// Equal probability
if (numRows = 1) then
begin
for j := 0 to numCols - 1 do
begin
AExpected[0, j] := numCases / numCols;
if (AExpected[0, j] > 0) then
ACellChiSqr[0, j] := sqr(AFrequencies[0, j] - AExpected[0, j]) / AExpected[0, j];
AChiSqr := AChiSqr + ACellChiSqr[0, j];
end;
ADegreesOfFreedom := numCols - 1;
end;
// Equal probability
if (numCols = 1) then
begin
for i := 0 to numRows - 1 do
begin
AExpected[i, 0] := numCases / numRows;
if (AExpected[i, 0] > 0) then
ACellChiSqr[i, 0] := sqr(AFrequencies[i, 0] - AExpected[i, 0]) / AExpected[i, 0];
AChiSqr := AChiSqr + ACellChiSqr[i, 0];
end;
ADegreesOfFreedom := numRows - 1;
end;
end;
procedure TRIDITForm.GetFrequencies(const AColNoSelected: IntDyneVec;
ANumRows, ANumCols: Integer; out ANumCases: Integer; out AFrequencies: IntDyneMat);
var
i, j, row, col: Integer;
begin
// over-dimension the matrix because row and column totals will be in
// last column and row.
AFrequencies := nil;
SetLength(AFrequencies, ANumRows+1, ANumCols+1);
for i := 0 to ANumRows do
for j := 0 to ANumCols do
AFrequencies[i, j] := 0;
// Get cell data
ANumCases := 0;
for i := 0 to NoCases-1 do
begin
row := i+1;
for j := 0 to ANumCols-1 do
begin
col := AColNoSelected[j+1]; // +1 because row index is at index 0
AFrequencies[i, j] := StrToInt(OS3MainFrm.DataGrid.Cells[col, row]);
ANumCases := ANumCases + AFrequencies[i, j];
end;
end;
AFrequencies[ANumRows, ANumCols] := ANumCases;
// Calculate row totals
for i := 0 to ANumRows-1 do
for j := 0 to ANumCols-1 do
AFrequencies[i, ANumCols] := AFrequencies[i, ANumCols] + AFrequencies[i, j];
// Calculate col totals
for j := 0 to ANumCols-1 do
for i := 0 to ANumRows-1 do
AFrequencies[ANumRows, j] := AFrequencies[ANumRows, j] + AFrequencies[i, j];
end;
procedure TRIDITForm.RefGrpClick(Sender: TObject);
begin
RefEdit.Visible := RefGrp.ItemIndex > 0;