Files
lazarus-ccr/applications/lazstats/source_orig/descriptiveunit.pas
wp_xxyyzz 045c799d49 LazStats: Adding original source, part 3.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@7882 8e941d3f-bd1b-0410-a28a-d453659cc2b4
2020-11-16 11:07:56 +00:00

405 lines
15 KiB
ObjectPascal

unit DescriptiveUnit;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, FileUtil, LResources, Forms, Controls, Graphics, Dialogs,
StdCtrls, ExtCtrls, Buttons, MainUnit, Globals, functionsLib, OutPutUnit, DataProcs,
DictionaryUnit, contexthelpunit;
type
{ TDescriptiveFrm }
TDescriptiveFrm = class(TForm)
CaseChk: TCheckBox;
CheckBox1: TCheckBox;
AltQrtilesChk: TCheckBox;
HelpBtn: TButton;
Label2: TLabel;
Label3: TLabel;
PcntileChk: TCheckBox;
GroupBox1: TGroupBox;
InBtn: TBitBtn;
OutBtn: TBitBtn;
AllBtn: TBitBtn;
ResetBtn: TButton;
CancelBtn: TButton;
OKBtn: TButton;
CIEdit: TEdit;
Label1: TLabel;
VarList: TListBox;
SelList: TListBox;
procedure AllBtnClick(Sender: TObject);
procedure CancelBtnClick(Sender: TObject);
procedure FormShow(Sender: TObject);
procedure HelpBtnClick(Sender: TObject);
procedure InBtnClick(Sender: TObject);
procedure OKBtnClick(Sender: TObject);
procedure OutBtnClick(Sender: TObject);
procedure ResetBtnClick(Sender: TObject);
private
{ private declarations }
sum, variance, stddev, value, mean, min, max, range, skew, prob, df, CI : double;
kurtosis, z, semean, seskew, sekurtosis, deviation, devsqr, M2, M3, M4 : double;
Q1, Q2, Q3, Q12, Q13, Q14, Q15, Q16, Q17, Q18, Q22, Q23, Q24, Q25, Q26 : double;
Q27, Q28, Q32, Q33, Q34, Q35, Q36, Q37, Q38, IQrange : double;
ncases, noselected : integer;
cellstring, gridstring, outline : string;
selected : IntDyneVec;
public
{ public declarations }
end;
var
DescriptiveFrm: TDescriptiveFrm;
implementation
{ TDescriptiveFrm }
procedure TDescriptiveFrm.ResetBtnClick(Sender: TObject);
var
i : integer;
begin
VarList.Clear;
SelList.Clear;
for i := 1 to NoVariables do
VarList.Items.Add(OS3MainFrm.DataGrid.Cells[i,0]);
CaseChk.Checked := false;
OutBtn.Enabled := false;
CaseChk.Checked := false;
Selected := nil;
end;
procedure TDescriptiveFrm.FormShow(Sender: TObject);
begin
ResetBtnClick(self);
end;
procedure TDescriptiveFrm.HelpBtnClick(Sender: TObject);
begin
ContextHelpForm.HelpMessage((Sender as TButton).tag);
end;
procedure TDescriptiveFrm.InBtnClick(Sender: TObject);
var
index, i : integer;
begin
index := VarList.Items.Count;
i := 0;
while i < index do
begin
if (VarList.Selected[i]) then
begin
SelList.Items.Add(VarList.Items.Strings[i]);
VarList.Items.Delete(i);
index := index - 1;
i := 0;
end
else i := i + 1;
end;
OutBtn.Enabled := true;
end;
procedure TDescriptiveFrm.OKBtnClick(Sender: TObject);
var
i, j, k, m : integer;
num, den, cases : double;
values, pcntrank : DblDyneVec;
begin
noselected := SelList.Items.Count;
SetLength(Selected,noselected);
// Get selected variables
for i := 1 to noselected do
begin
cellstring := SelList.Items.Strings[i-1];
for j := 1 to NoVariables do
if cellstring = OS3MainFrm.DataGrid.Cells[j,0] then selected[i-1] := j;
end;
OutPutFrm.RichEdit.Clear;
// OutPutFrm.RichEdit.ParaGraph.Alignment := taLeftJustify;
OutPutFrm.RichEdit.Lines.Add('DISTRIBUTION PARAMETER ESTIMATES');
OutPutFrm.RichEdit.Lines.Add('');
SetLength(Values,NoCases);
SetLength(pcntrank,NoCases);
for j := 1 to noselected do
begin
deviation := 0.0;
devsqr := 0.0;
M2 := 0.0;
M3 := 0.0;
M4 := 0.0;
sum := 0.0;
variance := 0.0;
stddev := 0.0;
range := 0.0;
skew := 0.0;
kurtosis := 0.0;
ncases := 0;
df := 0.0;
seskew := 0.0;
kurtosis := 0.0;
sekurtosis := 0.0;
k := selected[j-1];
CI := StrToFloat(CIEdit.Text) / 100.0;
prob := CI;
CI := (1.0 - CI) / 2.0;
CI := 1.0 - CI;
if CheckBox1.Checked then // add a new column to the grid
begin
gridstring := OS3MainFrm.DataGrid.Cells[k,0];
gridstring := Gridstring + 'z';
DictionaryFrm.NewVar(NoVariables+1);
DictionaryFrm.DictGrid.Cells[1,NoVariables] := gridstring;
OS3MainFrm.DataGrid.Cells[NoVariables,0] := gridstring;
end;
// Accumulate sums of squares, sums, etc. for variable j
min := 1.0e20;
max := -1.0e20;
for i := 1 to NoCases do
begin
if not GoodRecord(i,noselected,selected) then continue;
if CaseChk.Checked then
begin
if not ValidValue(i,selected[j-1]) then continue;
end
else if not GoodRecord(i,noselected,selected) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]);
ncases := ncases + 1;
values[ncases-1] := value;
df := df + 1.0;
sum := sum + value;
variance := variance + (value * value);
if (value < min) then min := value;
if (value > max) then max := value;
end;
{ for i := 0 to ncases - 1 do // this is for debugging
begin
outline := format('Value = %8.3f',[values[i]]);
OutPutFrm.RichEdit.Lines.Add(outline);
end;
OutPutFrm.ShowModal;
OutPutFrm.RichEdit.Clear; }
if ncases > 0 then
begin
mean := sum / ncases;
range := max - min;
end;
if ncases > 1 then
begin
variance := variance - (sum * sum) / ncases;
variance := variance / (ncases - 1);
stddev := sqrt(variance);
semean := sqrt(variance / ncases);
if ncases < 120 then CI := semean * inverset(CI,df)
else CI := semean * inversez(CI);
end;
if variance = 0.0 then
begin
cellstring := OS3MainFrm.DataGrid.Cells[k,0];
ShowMessage('No Variability in '+ cellstring + ' variable - ending analysis');
exit;
end;
if ncases > 3 then // obtain skew, kurtosis and z scores
begin
for i := 1 to NoCases do
begin
if CaseChk.Checked then
begin
if not ValidValue(i,selected[j-1]) then continue;
end
else if not GoodRecord(i,noselected,selected) then continue;
value := StrToFloat(OS3MainFrm.DataGrid.Cells[k,i]);
if stddev > 0.0 then
begin
deviation := value - mean;
devsqr := deviation * deviation;
M2 := M2 + devsqr;
M3 := M3 + (deviation * devsqr);
M4 := M4 + (devsqr * devsqr);
z := (value - mean) / stddev;
if CheckBox1.Checked then
begin
cellstring := format('%8.5f',[z]);
OS3MainFrm.DataGrid.Cells[NoVariables,i] := cellstring;
end;
end;
end;
if ncases > 2 then
begin
skew := (ncases * M3) /
((ncases - 1) * (ncases - 2) * stddev * variance);
cases := ncases;
num := 6.0 * cases * (cases - 1.0);
den := (cases - 2.0) * (cases + 1.0) * (cases + 3.0);
seskew := sqrt(num / den);
end;
if ncases > 3 then
begin
kurtosis := (ncases * (ncases + 1) * M4) -
(3 * M2 * M2 * (ncases - 1));
kurtosis := kurtosis /
( (ncases - 1) * (ncases - 2) * (ncases - 3) * (variance * variance) );
sekurtosis := sqrt( (4.0 * (ncases * ncases - 1) * (seskew * seskew) ) /
( (ncases - 3) * (ncases + 5) ) );
end;
end;
// output results for the kth variable
cellstring := OS3MainFrm.DataGrid.Cells[k,0];
outline := format('%s (N = %d) Sum = %14.3f',[cellstring,ncases,sum]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Mean = %10.3f Variance = %10.3f Std.Dev. = %10.3f',
[mean, variance, stddev]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Std.Error of Mean = %10.3f',[semean]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('%4.2f percent Confidence Interval for the mean = %8.3f to %8.3f',
[prob * 100.0, mean - CI, mean + CI]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Range = %10.3f Minimum = %10.3f Maximum = %10.3f',
[range, min, max]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Skewness = %10.3f Std. Error of Skew = %10.3f',
[skew, seskew]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Kurtosis = %10.3f Std. Error Kurtosis = %10.3f',
[kurtosis, sekurtosis]);
OutPutFrm.RichEdit.Lines.Add(outline);
OutPutFrm.RichEdit.Lines.Add('');
if ncases > 4 then // get percentiles and quartiles
begin
// get percentile ranks
if pcntilechk.Checked then PRank(k,pcntrank);
// sort values and get quartiles
for i := 0 to ncases - 2 do
begin
for m := i + 1 to ncases -1 do
begin
if values[i] > values[m] then
begin
value := values[i];
values[i] := values[m];
values[m] := value;
end;
end;
end;
Q1 := Quartiles(2,0.25,ncases,values);
Q2 := Quartiles(2,0.5,ncases,values);
Q3 := Quartiles(2,0.75,ncases,values);
IQrange := Q3 - Q1;
outline := format('First Quartile = %8.3f',[Q1]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Median = %8.3f',[Q2]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Third Quartile = %8.3f',[Q3]);
OutPutFrm.RichEdit.Lines.Add(outline);
outline := format('Interquartile range = %8.3f',[IQrange]);
OutPutFrm.RichEdit.Lines.Add(outline);
OutPutFrm.RichEdit.Lines.Add('');
end;
if (AltQrtilesChk.Checked) then
begin
OutPutFrm.RichEdit.Lines.Add('Alternative Methods for Obtaining Quartiles');
OutPutFrm.RichEdit.Lines.Add(' Method 1 2 3 4 5 6 7 8');
OutPutFrm.RichEdit.Lines.Add('Pcntile');
Q1 := Quartiles(1,0.25,ncases,values);
Q12 := Quartiles(2,0.25,ncases,values);
Q13 := Quartiles(3,0.25,ncases,values);
Q14 := Quartiles(4,0.25,ncases,values);
Q15 := Quartiles(5,0.25,ncases,values);
Q16 := Quartiles(6,0.25,ncases,values);
Q17 := Quartiles(7,0.25,ncases,values);
Q18 := Quartiles(8,0.25,ncases,values);
outline := format('Q1 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f',
[Q1,Q12,Q13,Q14,Q15,Q16,Q17,Q18]);
OutPutFrm.RichEdit.Lines.Add(outline);
Q2 := Quartiles(1,0.5,ncases,values);
Q22 := Quartiles(2,0.5,ncases,values);
Q23 := Quartiles(3,0.5,ncases,values);
Q24 := Quartiles(4,0.5,ncases,values);
Q25 := Quartiles(5,0.5,ncases,values);
Q26 := Quartiles(6,0.5,ncases,values);
Q27 := Quartiles(7,0.5,ncases,values);
Q28 := Quartiles(8,0.5,ncases,values);
outline := format('Q2 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f',
[Q2,Q22,Q23,Q24,Q25,Q26,Q27,Q28]);
OutPutFrm.RichEdit.Lines.Add(outline);
Q3 := Quartiles(1,0.75,ncases,values);
Q32 := Quartiles(2,0.75,ncases,values);
Q33 := Quartiles(3,0.75,ncases,values);
Q34 := Quartiles(4,0.75,ncases,values);
Q35 := Quartiles(5,0.75,ncases,values);
Q36 := Quartiles(6,0.75,ncases,values);
Q37 := Quartiles(7,0.75,ncases,values);
Q38 := Quartiles(8,0.75,ncases,values);
outline := format('Q3 %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f',
[Q3,Q32,Q33,Q34,Q35,Q36,Q37,Q38]);
OutPutFrm.RichEdit.Lines.Add(outline);
OutPutFrm.RichEdit.Lines.Add('NOTES:');
OutPutFrm.RichEdit.Lines.Add('Method 1 is the weighted average at X[np] where n is no. of cases, p is percentile / 100');
OutPutFrm.RichEdit.Lines.Add('Method 2 is the weighted average at X[(n+1)p] This is used in this program.');
OutPutFrm.RichEdit.Lines.Add('Method 3 is the empirical distribution function.');
OutPutFrm.RichEdit.Lines.Add('Method 4 is called the empirical distribution function - averaging.');
OutPutFrm.RichEdit.Lines.Add('Method 5 is called the empirical distribution function = Interpolation.');
OutPutFrm.RichEdit.Lines.Add('Method 6 is the closest observation method.');
OutPutFrm.RichEdit.Lines.Add('Method 7 is from the TrueBasic Statistics Graphics Toolkit.');
OutPutFrm.RichEdit.Lines.Add('Method 8 was used in an older Microsoft Excel version.');
OutPutFrm.RichEdit.Lines.Add('See the internet site http://www.xycoon.com/ for the above.');
OutPutFrm.RichEdit.Lines.Add('========================================================');
OutPutFrm.RichEdit.Lines.Add('');
end; // end of experimental alternatives
end; // next j variable
OutPutFrm.ShowModal;
Selected := nil;
Values := nil;
pcntrank := nil;
end;
procedure TDescriptiveFrm.OutBtnClick(Sender: TObject);
var
index: integer;
begin
index := SelList.ItemIndex;
VarList.Items.Add(SelList.Items.Strings[index]);
SelList.Items.Delete(index);
InBtn.Enabled := true;
if SelList.Items.Count = 0 then OutBtn.Enabled := false;
end;
procedure TDescriptiveFrm.CancelBtnClick(Sender: TObject);
begin
selected := nil;
end;
procedure TDescriptiveFrm.AllBtnClick(Sender: TObject);
var
i : integer;
begin
for i := 0 to VarList.Items.Count-1 do
SelList.Items.Add(VarList.Items.Strings[i]);
VarList.Clear;
OutBtn.Enabled := true;
InBtn.Enabled := false;
end;
initialization
{$I descriptiveunit.lrs}
end.