LazStats: Refactor ChiSqrUnit.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7805 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2020-10-26 00:18:22 +00:00
parent bd9d5086b8
commit fb531ea79d
2 changed files with 389 additions and 241 deletions

View File

@ -59,6 +59,35 @@ type
FRowColPropsReportFrame: TReportFrame; FRowColPropsReportFrame: TReportFrame;
FCellChiSqrReportFrame: TReportFrame; FCellChiSqrReportFrame: TReportFrame;
function CalcGStatistic(const AFrequencies: IntDyneMat;
const AExpected: DblDyneMat): Double;
function CalcLikelihoodRatio(const AFrequencies: IntDyneMat;
const AExpected: DblDyneMat): Double;
function CalcPearsonR(const AFrequencies: IntDyneMat): Double;
procedure GetExpectedAndCellChiSqr(const AFrequencies: IntDyneMat;
out AExpected, ACellChiSqr: DblDyneMat;
out AChiSqr: Double);
procedure GetFrequencies(const AColNoSelected: IntDyneVec;
ARowIndex, AColIndex, ADepIndex: Integer;
out ANumRows, ANumCols, ANumCases: Integer;
out AFrequencies: IntDyneMat);
procedure GetYatesCorrection(const AFrequencies: IntDyneMat;
out AdjChiSqr: Double);
procedure GetColProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
procedure GetRowProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
procedure GetTotalProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
protected protected
procedure AdjustConstraints; override; procedure AdjustConstraints; override;
procedure Compute; override; procedure Compute; override;
@ -79,7 +108,7 @@ implementation
uses uses
Math, Math,
Utils, GridProcs; Utils, GridProcs, MatrixUnit;
{ TChiSqrFrm } { TChiSqrFrm }
@ -145,6 +174,75 @@ begin
end; end;
function TChiSqrFrm.CalcGStatistic(const AFrequencies: IntDyneMat;
const AExpected: DblDyneMat): Double;
var
numRows, numCols: Integer;
i, j: Integer;
begin
MatSize(AExpected, numRows, numCols);
Result := 0.0;
for i := 0 to numRows-1 do
for j := 0 to numCols-1 do
if (AExpected[i, j] > 0) then
Result := Result + AFrequencies[i, j] * ln(AFrequencies[i, j] / AExpected[i, j]);
Result := 2.0 * Result;
end;
function TChiSqrFrm.CalcLikelihoodRatio(const AFrequencies: IntDyneMat;
const AExpected: DblDyneMat): Double;
var
numRows, numCols: Integer;
i, j: Integer;
begin
MatSize(AExpected, numRows, numCols);
Result := 0.0;
for i := 0 to numRows-1 do
for j := 0 to numCols-1 do
if (AFrequencies[i, j] > 0.0) then
Result := Result + AFrequencies[i, j] * ln(AExpected[i, j] / AFrequencies[i, j]);
Result := -2.0 * Result;
end;
function TChiSqrFrm.CalcPearsonR(const AFrequencies: IntDyneMat): Double;
var
numRows, numCols, numCases: Integer;
sumX, sumY: Double;
varX, varY: Double;
i, j: Integer;
begin
MatSize(AFrequencies, numRows, numCols);
dec(numRows); // Do not iterate into the totals row and column
dec(numCols);
numCases := AFrequencies[numRows, numCols];
SumX := 0;
SumY := 0;
VarX := 0;
VarY := 0;
for i := 0 to numRows-1 do
sumX := sumX + ( (i+1) * AFrequencies[i, numCols] );
for j := 0 to numCols-1 do
sumY := sumY + ( (j+1) * AFrequencies[numRows, j] );
for i := 0 to numRows-1 do
varX := varX + ( sqr(i+1) * AFrequencies[i, numCols] );
for j := 0 to numCols-1 do
varY := varY + ( sqr(j+1) * AFrequencies[numRows, j] );
varX := varX - sqr(sumX) / numCases;
varY := varY - sqr(sumY) / numCases;
Result := 0;
for i := 0 to numRows-1 do
for j := 0 to numCols-1 do
Result := Result + ((i+1)*(j+1) * AFrequencies[i, j]);
Result := Result - (sumX * sumY / numCases);
Result := Result / sqrt(varX * varY);
end;
procedure TChiSqrFrm.ColInClick(Sender: TObject); procedure TChiSqrFrm.ColInClick(Sender: TObject);
var var
index: integer; index: integer;
@ -179,20 +277,19 @@ var
CellChi: DblDyneMat = nil; CellChi: DblDyneMat = nil;
RowLabels: StrDyneVec = nil; RowLabels: StrDyneVec = nil;
ColLabels: StrDyneVec = nil; ColLabels: StrDyneVec = nil;
yates : boolean = false; yates : boolean;
NoSelected, NCases, NRows, NCols: Integer; NoSelected, NCases, NRows, NCols: Integer;
i, j, RowNo, ColNo, DepNo, MinRow, MaxRow, MinCol, MaxCol: integer; i, j, rowNo, colNo, depNo: integer;
Row, Col, FObs, df: integer; Row, Col, df: integer;
PObs, ChiSquare, ProbChi, phi, SumX, SumY, VarX, VarY, likelihood: double; ChiSquare, probChi, phi: double;
title : string; title : string;
AdjChiSqr, AdjProbChi, probLikelihood, G, pearsonr, MantelHaenszel, MHprob: double; AdjChiSqr, AdjProbChi, pearsonr, G, likelihood, MantelHaenszel, prob: double;
CoefCont, CramerV: double; CoefCont, CramerV: double;
lReport: TStrings; lReport: TStrings;
begin begin
RowNo := GetVariableIndex(OS3MainFrm.DataGrid, RowEdit.Text); rowNo := GetVariableIndex(OS3MainFrm.DataGrid, RowEdit.Text);
ColNo := GetVariableIndex(OS3MainFrm.DataGrid, ColEdit.Text); colNo := GetVariableIndex(OS3MainFrm.DataGrid, ColEdit.Text);
DepNo := GetVariableIndex(OS3MainFrm.DataGrid, DepEdit.Text); depNo := GetVariableIndex(OS3MainFrm.DataGrid, DepEdit.Text);
SetLength(ColNoSelected, NoVariables); SetLength(ColNoSelected, NoVariables);
ColNoSelected[0] := RowNo; ColNoSelected[0] := RowNo;
@ -203,126 +300,29 @@ begin
NoSelected := 3; NoSelected := 3;
ColNoSelected[2] := DepNo; ColNoSelected[2] := DepNo;
end; end;
SetLength(ColNoSelected, NoSelected); SetLength(ColNoSelected, NoSelected); // trim length
// Get min and max of row and col numbers // Get frequencies from the grid
MinRow := MaxInt; GetFrequencies(ColNoselected, rowNo, colNo, depNo, nRows, nCols, nCases, Freq);
MaxRow := -MinRow; df := (nRows - 1) * (nCols - 1);
MinCol := MaxInt;
MaxCol := -MinCol;
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected) then continue;
Row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[RowNo, i])));
Col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ColNo, i])));
if Row > MaxRow then MaxRow := Row;
if Row < MinRow then MinRow := Row;
if Col > MaxCol then MaxCol := Col;
if Col < MinCol then MinCol := Col;
end;
NRows := MaxRow - MinRow + 1;
NCols := MaxCol - MinCol + 1;
// allocate and initialize // Calculate expected values and cell chi-squares
SetLength(Freq, NRows+1, NCols+1); GetExpectedAndCellChiSqr(Freq, Expected, CellChi, ChiSquare);
SetLength(Prop, NRows+1, NCols+1);
SetLength(Expected, NRows, NCols);
SetLength(CellChi, NRows, NCols);
SetLength(RowLabels, NRows+1);
SetLength(ColLabels, NCols+1);
for i := 1 to NRows + 1 do
for j := 1 to NCols + 1 do Freq[i-1, j-1] := 0;
// get cell data
NCases := 0;
case InputGrp.ItemIndex of
0 : begin // count number of cases in each row and column combination
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected) then continue;
NCases := NCases + 1;
Row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[RowNo, i])));
Col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ColNo, i])));
Row := Row - MinRow + 1;
Col := Col - MinCol + 1;
Freq[Row-1, Col-1] := Freq[Row-1, Col-1] + 1;
end;
end;
1 : begin // read frequencies data from grid
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.DataGrid, i, ColNoSelected) then continue;
Row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[RowNo, i])));
Col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ColNo, i])));
Row := Row - MinRow + 1;
Col := Col - MinCol + 1;
FObs := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[DepNo, i])));
Freq[Row-1, Col-1] := Freq[Row-1, Col-1] + FObs;
NCases := NCases + FObs;
end;
end;
2 : begin // get no. of cases and proportions for each cell
NCases := StrToInt(NCasesEdit.Text);
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.Datagrid, i, ColNoSelected) then continue;
Row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[RowNo, i])));
Col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ColNo, i])));
Row := Row - MinRow + 1;
Col := Col - MinCol + 1;
PObs := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[DepNo, i]));
Freq[Row-1, Col-1] := Freq[Row-1, Col-1] + round(PObs * NCases);
end;
end;
end; // end case
Freq[Nrows, Ncols] := NCases;
// Calculate expected values
// Get row totals first
for i := 1 to NRows do
for j := 1 to Ncols do
Freq[i-1, NCols] := Freq[i-1, NCols] + Freq[i-1, j-1];
// Get col totals next
for j := 1 to NCols do
for i := 1 to NRows do
Freq[NRows, j-1] := Freq[NRows, j-1] + Freq[i-1, j-1];
// Then get expected values and cell chi-squares
ChiSquare := 0.0;
AdjChisqr := 0.0;
if (YatesChk.Checked) and (NRows = 2) and (NCols = 2) then
yates := true;
for i := 1 to NRows do
begin
for j := 1 to Ncols do
begin
Expected[i-1, j-1] := Freq[NRows, j-1] * Freq[i-1, NCols] / NCases;
if Expected[i-1, j-1] > 0.0 then
CellChi[i-1, j-1] := sqr(Freq[i-1, j-1] - Expected[i-1, j-1]) / Expected[i-1, j-1]
else begin
ErrorMsg('Zero expected value found.');
CellChi[i-1,j-1] := 0.0;
end;
ChiSquare := ChiSquare + CellChi[i-1, j-1];
end;
end;
df := (Nrows - 1) * (Ncols - 1);
if yates then // 2 x 2 corrected chi-square
begin
AdjChiSqr := abs((Freq[0,0] * Freq[1,1]) - (Freq[0,1] * Freq[1,0]));
AdjChiSqr := sqr(AdjChiSqr - NCases / 2.0) * NCases; // numerator
AdjChiSqr := AdjChiSqr / (Freq[0,2] * Freq[1,2] * Freq[2,0] * Freq[2,1]);
AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df);
end;
ProbChi := 1.0 - ChiSquaredProb(ChiSquare, df); // prob. larger chi ProbChi := 1.0 - ChiSquaredProb(ChiSquare, df); // prob. larger chi
yates := YatesChk.Checked and (nRows = 2) and (nCols = 2);
if yates then begin
GetYatesCorrection(Freq, AdjChiSqr);
AdjProbChi := 1.0 - ChiSquaredProb(AdjChiSqr, df);
end else
AdjChiSqr := 0;
// Get row and column labels for reports
SetLength(RowLabels, NRows+1);
for i := 1 to NRows do RowLabels[i-1] := Format('Row %d', [i]); for i := 1 to NRows do RowLabels[i-1] := Format('Row %d', [i]);
RowLabels[NRows] := 'Total'; RowLabels[NRows] := 'Total';
SetLength(ColLabels, NCols+1);
for j := 1 to NCols do ColLabels[j-1] := Format('Col.%d', [j]); for j := 1 to NCols do ColLabels[j-1] := Format('Col.%d', [j]);
ColLabels[NCols] := 'Total'; ColLabels[NCols] := 'Total';
@ -356,53 +356,24 @@ begin
if PropsChk.Checked then if PropsChk.Checked then
begin begin
RowColPage.TabVisible := true; RowColPage.TabVisible := true;
title := 'ROW PROPORTIONS'; GetRowProportions(Freq, Prop);
for i := 1 to NRows + 1 do
begin
for j := 1 to NCols do
begin
if Freq[i-1, NCols] > 0.0 then
Prop[i-1, j-1] := Freq[i-1, j-1] / Freq[i-1, NCols]
else
Prop[i-1, j-1] := 0.0;
end;
if Freq[i-1, NCols] > 0.0 then
Prop[i-1, NCols] := 1.0
else
Prop[i-1, NCols] := 0.0;
end;
lReport.Add('CHI-SQUARE ANALYSIS RESULTS'); lReport.Add('CHI-SQUARE ANALYSIS RESULTS');
lReport.Add(''); lReport.Add('');
title := 'ROW PROPORTIONS';
MatPrint(Prop, NRows+1, NCols+1, title, RowLabels, ColLabels, NCases, lReport); MatPrint(Prop, NRows+1, NCols+1, title, RowLabels, ColLabels, NCases, lReport);
lReport.Add(DIVIDER_SMALL_AUTO); lReport.Add(DIVIDER_SMALL_AUTO);
lReport.Add(''); lReport.Add('');
GetColProportions(Freq, Prop);
title := 'COLUMN PROPORTIONS'; title := 'COLUMN PROPORTIONS';
for j := 1 to Ncols + 1 do
begin
for i := 1 to Nrows do
begin
if Freq[Nrows,j-1] > 0.0 then
Prop[i-1,j-1] := Freq[i-1,j-1] / Freq[Nrows,j-1]
else
Prop[i-1,j-1] := 0.0;
end;
if Freq[Nrows,j-1] > 0.0 then
Prop[NRows,j-1] := 1.0
else
Prop[NRows,j-1] := 0.0;
end;
MatPrint(Prop, NRows+1, NCols+1, title, RowLabels, ColLabels, NCases, lReport); MatPrint(Prop, NRows+1, NCols+1, title, RowLabels, ColLabels, NCases, lReport);
lReport.Add(DIVIDER_SMALL_AUTO); lReport.Add(DIVIDER_SMALL_AUTO);
lReport.Add(''); lReport.Add('');
GetTotalProportions(Freq, Prop);
Title := 'PROPORTIONS OF TOTAL N'; Title := 'PROPORTIONS OF TOTAL N';
for i := 1 to NRows + 1 do
for j := 1 to NCols + 1 do
Prop[i-1,j-1] := Freq[i-1,j-1] / NCases;
Prop[Nrows, Ncols] := 1.0;
MatPrint(Prop, NRows+1, NCols+1, title, RowLabels, ColLabels, NCases, lReport); MatPrint(Prop, NRows+1, NCols+1, title, RowLabels, ColLabels, NCases, lReport);
lReport.Add(DIVIDER_SMALL_AUTO); lReport.Add(DIVIDER_SMALL_AUTO);
@ -438,26 +409,16 @@ begin
lReport.Add(''); lReport.Add('');
end; end;
likelihood := 0.0; likelihood := CalcLikelihoodRatio(Freq, Expected);
for i := 0 to NRows-1 do prob := 1.0 - ChiSquaredProb(likelihood, df);
for j := 0 to NCols-1 do
if (Freq[i, j] > 0.0) then
likelihood := Likelihood + (Freq[i, j] * (ln(Expected[i, j] / Freq[i,j ])));
likelihood := -2.0 * likelihood;
probLikelihood := 1.0 - ChiSquaredProb(likelihood, df);
lReport.Add('Likelihood Ratio: %.3f', [likelihood]); lReport.Add('Likelihood Ratio: %.3f', [likelihood]);
lReport.Add(' Probability > value is %.4f', [probLikelihood]); lReport.Add(' Probability > value is %.4f', [prob]);
lReport.Add(''); lReport.Add('');
G := 0.0; G := CalcGStatistic(Freq, Expected);
for i := 0 to NRows-1 do prob := 1.0 - ChiSquaredProb(G, df);
for j := 0 to NCols-1 do
if (Expected[i, j] > 0) then
G := G + Freq[i, j] * (ln(Freq[i, j] / Expected[i, j]));
G := 2.0 * G;
probLikelihood := 1.0 - ChiSquaredProb(G, df);
lReport.Add('G statistic: %.3f ', [G]); lReport.Add('G statistic: %.3f ', [G]);
lReport.Add(' Probability > value is %.4f', [G, probLikelihood]); lReport.Add(' Probability > value is %.4f', [prob]);
lReport.Add(''); lReport.Add('');
if ((NRows > 1) and (NCols > 1)) then if ((NRows > 1) and (NCols > 1)) then
@ -466,29 +427,14 @@ begin
lReport.Add('phi correlation: %.4f', [phi]); lReport.Add('phi correlation: %.4f', [phi]);
lReport.Add(''); lReport.Add('');
pearsonr := 0.0; pearsonR := CalcPearsonR(Freq);
SumX := 0.0;
SumY := 0.0;
VarX := 0.0;
VarY := 0.0;
for i := 0 to NRows-1 do SumX := SumX + ( (i+1) * Freq[i, NCols] );
for j := 0 to NCols-1 do SumY := SumY + ( (j+1) * Freq[NRows, j] );
for i := 0 to NRows-1 do VarX := VarX + ( sqr(i+1) * Freq[i, NCols] );
for j := 0 to NCols-1 do VarY := VarY + ( sqr(j+1) * Freq[NRows, j] );
VarX := VarX - sqr(SumX) / NCases;
VarY := VarY - sqr(SumY) / NCases;
for i := 0 to NRows-1 do
for j := 0 to NCols-1 do
pearsonR := pearsonR + ((i+1)*(j+1) * Freq[i, j]);
pearsonR := pearsonR - (SumX * SumY / Ncases);
pearsonR := pearsonR / sqrt(VarX * VarY);
lReport.Add('Pearson Correlation r: %.4f', [pearsonR]); lReport.Add('Pearson Correlation r: %.4f', [pearsonR]);
lReport.Add(''); lReport.Add('');
MantelHaenszel := (NCases-1) * sqr(pearsonR); MantelHaenszel := (NCases-1) * sqr(pearsonR);
MHprob := 1.0 - ChiSquaredProb(MantelHaenszel, 1); prob := 1.0 - ChiSquaredProb(MantelHaenszel, 1);
lReport.Add('Mantel-Haenszel Test of Linear Association: %.3f', [MantelHaenszel]); lReport.Add('Mantel-Haenszel Test of Linear Association: %.3f', [MantelHaenszel]);
lReport.Add(' Probability > value is %.4f', [MantelHaenszel, MHprob]); lReport.Add(' Probability > value is %.4f', [prob]);
lReport.Add(''); lReport.Add('');
CoefCont := sqrt(ChiSquare / (ChiSquare + NCases)); CoefCont := sqrt(ChiSquare / (ChiSquare + NCases));
@ -496,9 +442,9 @@ begin
lReport.Add(''); lReport.Add('');
if (Nrows < Ncols) then if (Nrows < Ncols) then
CramerV := sqrt(ChiSquare / (NCases * ((NRows-1)))) CramerV := sqrt(ChiSquare / (NCases * ((NRows-1))))
else else
CramerV := sqrt(ChiSquare / (NCases * ((NCols-1)))); CramerV := sqrt(ChiSquare / (NCases * ((NCols-1))));
lReport.Add('Cramers V is %.3f', [CramerV]); lReport.Add('Cramers V is %.3f', [CramerV]);
lReport.Add(''); lReport.Add('');
end; end;
@ -511,57 +457,48 @@ begin
// save frequency data file if elected // save frequency data file if elected
if SaveFChk.Checked then if SaveFChk.Checked then
begin begin
OS3MainFrm.mnuFileCloseClick(self); OS3MainFrm.mnuFileCloseClick(self);
OS3MainFrm.FileNameEdit.Text := ''; OS3MainFrm.FileNameEdit.Text := '';
for i := 1 to DictionaryFrm.DictGrid.RowCount - 1 do for i := 1 to DictionaryFrm.DictGrid.RowCount - 1 do
for j := 0 to 7 do DictionaryFrm.DictGrid.Cells[j,i] := ''; for j := 0 to 7 do DictionaryFrm.DictGrid.Cells[j,i] := '';
DictionaryFrm.DictGrid.RowCount := 1; DictionaryFrm.DictGrid.RowCount := 1;
// DictionaryFrm.FileNameEdit.Text := '';
// get labels for new file // get labels for new file
ColLabels[0] := 'ROW'; ColLabels[0] := 'ROW';
ColLabels[1] := 'COL'; ColLabels[1] := 'COL';
ColLabels[2] := 'FREQ'; ColLabels[2] := 'FREQ';
// create new variables
Row := 0; // create new variables
OS3MainFrm.DataGrid.ColCount := 4; Row := 0;
DictionaryFrm.DictGrid.ColCount := 8; OS3MainFrm.DataGrid.ColCount := 4;
NoVariables := 0; DictionaryFrm.DictGrid.ColCount := 8;
for i := 1 to 3 do NoVariables := 0;
begin for i := 1 to 3 do
col := NoVariables + 1; begin
DictionaryFrm.NewVar(col); col := NoVariables + 1;
DictionaryFrm.DictGrid.Cells[1,col] := ColLabels[i-1]; DictionaryFrm.NewVar(col);
OS3MainFrm.DataGrid.Cells[col,0] := ColLabels[i-1]; DictionaryFrm.DictGrid.Cells[1,col] := ColLabels[i-1];
NoVariables := NoVariables + 1; OS3MainFrm.DataGrid.Cells[col,0] := ColLabels[i-1];
end; NoVariables := NoVariables + 1;
OS3MainFrm.DataGrid.RowCount := (Nrows * NCols) + 1; end;
for i := 1 to Nrows do OS3MainFrm.DataGrid.RowCount := (Nrows * NCols) + 1;
begin
for j := 1 to Ncols do for i := 1 to Nrows do
begin begin
Row := Row + 1; for j := 1 to Ncols do
OS3MainFrm.DataGrid.Cells[0,Row] := Format('Case:%d',[Row]); begin
OS3MainFrm.DataGrid.Cells[1,Row] := IntToStr(i); Row := Row + 1;
OS3MainFrm.DataGrid.Cells[2,Row] := IntToStr(j); OS3MainFrm.DataGrid.Cells[0,Row] := Format('Case:%d',[Row]);
OS3MainFrm.DataGrid.Cells[3,Row] := IntToStr(Freq[i-1,j-1]); OS3MainFrm.DataGrid.Cells[1,Row] := IntToStr(i);
end; OS3MainFrm.DataGrid.Cells[2,Row] := IntToStr(j);
end; OS3MainFrm.DataGrid.Cells[3,Row] := IntToStr(Freq[i-1,j-1]);
NoCases := Row; end;
OS3MainFrm.FileNameEdit.Text := 'ChiSqrFreq.LAZ'; end;
OS3MainFrm.NoCasesEdit.Text := IntToStr(NoCases); NoCases := Row;
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables); OS3MainFrm.FileNameEdit.Text := 'ChiSqrFreq.LAZ';
// OS3MainFrm.SaveFileBtnClick(self); OS3MainFrm.NoCasesEdit.Text := IntToStr(NoCases);
OS3MainFrm.NoVarsEdit.Text := IntToStr(NoVariables);
end; end;
//clean up
ColLabels := nil;
RowLabels := nil;
CellChi := nil;
Expected := nil;
Prop := nil;
Freq := nil;
ColNoSelected := nil;
end; end;
@ -590,6 +527,207 @@ begin
end; end;
procedure TChiSqrFrm.GetExpectedAndCellChiSqr(const AFrequencies: IntDyneMat;
out AExpected, ACellChiSqr: DblDyneMat;
out AChiSqr: Double);
var
numRows, numCols, numCases: Integer;
i, j: Integer;
begin
MatSize(AFrequencies, numRows, numCols); // contains the totals row/col
numCases := AFrequencies[numRows-1, numCols-1];
SetLength(AExpected, numRows-1, numCols-1); // -1: we don't need the totals here
SetLength(ACellChiSqr, numRows-1, numCols-1);
AChiSqr := 0;
for i := 0 to numRows-2 do // -2 instead of -1 to skip the totals row
begin
for j := 0 to numCols-2 do // -2 instead of -1 to skip the totals column
begin
AExpected[i, j] := AFrequencies[numRows-1, j] * AFrequencies[i, numCols-1] / numCases;
if AExpected[i, j] > 0 then
ACellChiSqr[i, j] := sqr(AFrequencies[i, j] - AExpected[i, j]) / AExpected[i, j]
else begin
ErrorMsg('Zero expected value found.');
ACellChiSqr[i, j] := 0;
end;
AChiSqr := AChiSqr + ACellChiSqr[i, j];
end;
end;
end;
procedure TChiSqrFrm.GetFrequencies(const AColNoSelected: IntDyneVec;
ARowIndex, AColIndex, ADepIndex: Integer; out ANumRows, ANumCols, ANumCases: Integer;
out AFrequencies: IntDyneMat);
var
i, j: Integer;
row, col: Integer;
minRow, maxRow, minCol, maxCol: Integer;
FObs: Integer;
PObs: Double;
begin
// Get min and max of row and col numbers
minRow := MaxInt;
maxRow := -MinRow;
minCol := MaxInt;
maxCol := -MinCol;
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.DataGrid, i, AColNoSelected) then continue;
row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ARowIndex, i])));
col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[AColIndex, i])));
if row > maxRow then maxRow := row;
if row < minRow then minRow := row;
if col > maxCol then maxCol := col;
if col < minCol then minCol := col;
end;
ANumRows := maxRow - minRow + 1;
ANumCols := maxCol - minCol + 1;
AFrequencies := nil;
SetLength(AFrequencies, ANumRows+1, ANumCols+1); // +1 for row and column totals
ANumCases := 0;
case InputGrp.ItemIndex of
0 : begin // count number of cases in each row and column combination
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.DataGrid, i, AColNoSelected) then continue;
row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ARowIndex, i])));
col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[AColIndex, i])));
row := row - minRow;
col := col - minCol;
AFrequencies[row, col] := AFrequencies[row, col] + 1;
ANumCases := ANumCases + 1;
end;
end;
1 : begin // read frequencies data from grid
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.DataGrid, i, AColNoSelected) then continue;
row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ARowIndex, i])));
col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ARowIndex, i])));
row := row - minRow;
col := col - minCol;
FObs := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ADepIndex, i])));
AFrequencies[row, col] := AFrequencies[row, col] + FObs;
ANumCases := ANumCases + FObs;
end;
end;
2 : begin // get no. of cases and proportions for each cell
ANumCases := StrToInt(NCasesEdit.Text);
for i := 1 to NoCases do
begin
if not GoodRecord(OS3MainFrm.Datagrid, i, AColNoSelected) then continue;
row := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ARowIndex, i])));
col := round(StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[AColIndex, i])));
row := row - minRow + 1;
col := col - minCol + 1;
PObs := StrToFloat(Trim(OS3MainFrm.DataGrid.Cells[ADepIndex, i]));
AFrequencies[row, col] := AFrequencies[row, col] + round(PObs * ANumCases);
end;
end;
end;
// Get row totals
for i := 0 to ANumRows-1 do
for j := 0 to ANumCols-1 do
AFrequencies[i, ANumCols] := AFrequencies[i, ANumCols] + AFrequencies[i, j];
// Get col totals
for j := 0 to ANumCols-1 do
for i := 0 to ANumRows-1 do
AFrequencies[ANumRows, j] := AFrequencies[ANumRows, j] + AFrequencies[i, j];
// Grand total
AFrequencies[ANumRows, ANumCols] := ANumCases;
end;
// 2 x 2 corrected chi-square
procedure TChiSqrFrm.GetYatesCorrection(const AFrequencies: IntDyneMat;
out AdjChiSqr: Double);
var
numRows, numCols, numCases: Integer;
begin
MatSize(AFrequencies, numRows, numCols);
numCases := AFrequencies[numRows-1, numCols-1];
AdjChiSqr := abs((AFrequencies[0,0] * AFrequencies[1,1]) - (AFrequencies[0,1] * AFrequencies[1,0]));
AdjChiSqr := sqr(AdjChiSqr - numCases / 2.0) * numCases; // numerator
AdjChiSqr := AdjChiSqr / (AFrequencies[0,2] * AFrequencies[1,2] * AFrequencies[2,0] * AFrequencies[2,1]);
end;
procedure TChiSqrFrm.GetColProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
var
numRows, numCols: Integer;
i, j: Integer;
begin
MatSize(AFrequencies, numRows, numCols); // totals in last row/col
SetLength(AProportions, numRows, numCols);
for j := 0 to numCols-1 do
begin
for i := 0 to numRows-1 do // Do not process the totals row
begin
if AFrequencies[numRows-1, j] > 0.0 then
AProportions[i, j] := AFrequencies[i, j] / AFrequencies[numRows-1, j]
else
AProportions[i, j] := 0.0;
end;
if AFrequencies[numRows-1, j] > 0.0 then
AProportions[numRows-1,j] := 1.0
else
AProportions[numRows-1,j] := 0.0;
end;
end;
procedure TChiSqrFrm.GetRowProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
var
numRows, numCols: Integer;
i, j: Integer;
begin
MatSize(AFrequencies, numRows, numCols); // totals in last row/col
SetLength(AProportions, numRows, numCols);
for i := 0 to numRows-1 do
begin
for j := 0 to numCols-1 do // do not touch the totals column here
begin
if AFrequencies[i, numCols-1] > 0.0 then
AProportions[i, j] := AFrequencies[i, j] / AFrequencies[i, numCols-1]
else
AProportions[i-1, j-1] := 0.0;
end;
if AFrequencies[i, numCols-1] > 0.0 then
AProportions[i, numCols-1] := 1.0
else
AProportions[i, numCols-1] := 0.0;
end;
end;
procedure TChiSqrFrm.GetTotalProportions(const AFrequencies: IntDyneMat;
out AProportions: DblDyneMat);
var
numRows, numCols, numCases: Integer;
i, j: Integer;
begin
MatSize(AFrequencies, numRows, numCols); // totals in last row/col
numCases := AFrequencies[numRows-1, numCols-1];
SetLength(AProportions, numRows, numCols);
for i := 0 to numRows-1 do
for j := 0 to numCols-1 do
AProportions[i, j] := AFrequencies[i, j] / numCases;
AProportions[numRows-1, numCols-1] := 1.0;
end;
procedure TChiSqrFrm.InputGrpClick(Sender: TObject); procedure TChiSqrFrm.InputGrpClick(Sender: TObject);
begin begin
case InputGrp.ItemIndex of case InputGrp.ItemIndex of

View File

@ -14,6 +14,8 @@ type
TDblMatrix = DblDyneMat; TDblMatrix = DblDyneMat;
TDblVector = DblDyneVec; TDblVector = DblDyneVec;
TIntMatrix = IntDyneMat;
EMatrix = class(Exception); EMatrix = class(Exception);
// Vectors // Vectors
@ -79,6 +81,7 @@ function MatNumRows(A: TDblMatrix): Integer;
function MatRowMeans(A: TDblMatrix): TDblVector; function MatRowMeans(A: TDblMatrix): TDblVector;
function MatRowVector(A: TDblMatrix; ARowIndex: Integer): TDblVector; function MatRowVector(A: TDblMatrix; ARowIndex: Integer): TDblVector;
procedure MatSize(A: TDblMatrix; out n, m: Integer); procedure MatSize(A: TDblMatrix; out n, m: Integer);
procedure MatSize(A: TIntMatrix; out n, m: Integer);
function MatTransposed(A: TDblMatrix): TDblMatrix; function MatTransposed(A: TDblMatrix): TDblMatrix;
function SubMatrix(A: TDblMatrix; i1,j1, i2,j2: Integer): TDblMatrix; function SubMatrix(A: TDblMatrix; i1,j1, i2,j2: Integer): TDblMatrix;
@ -699,6 +702,13 @@ begin
end; end;
procedure MatSize(A: TIntMatrix; out n, m: Integer);
begin
n := Length(A);
m := Length(A[0]);
end;
function MatTransposed(A: TDblMatrix): TDblMatrix; function MatTransposed(A: TDblMatrix): TDblMatrix;
var var
n, m, i, j: Integer; n, m, i, j: Integer;