From 136244017b5a29fccd10bdf3f213e649686edf2b Mon Sep 17 00:00:00 2001 From: Jeremy Jay Date: Wed, 23 Feb 2022 00:54:59 -0500 Subject: [PATCH] fixing and improving tests. allow fuzzier comparisons to cell content --- commonxl/cell.go | 155 ++++++++++++++++++++++++++++++++++++ commonxl/sheet.go | 9 +++ testdata/testing.tsv | 7 ++ testdata/testing.xls | Bin 0 -> 8704 bytes xls/comp_test.go | 5 +- xls/sheets.go | 5 +- xls/simple_test.go | 184 ++++++++++++++----------------------------- 7 files changed, 234 insertions(+), 131 deletions(-) create mode 100644 testdata/testing.tsv create mode 100644 testdata/testing.xls diff --git a/commonxl/cell.go b/commonxl/cell.go index e30fbe0..c6f8efc 100644 --- a/commonxl/cell.go +++ b/commonxl/cell.go @@ -95,6 +95,15 @@ func (c Cell) FormatNo() uint16 { return 0 } +// Clone returns the new copy of this Cell. +func (c Cell) Clone() Cell { + c2 := make([]interface{}, len(c)) + for i, x := range c { + c2[i] = x + } + return c2 +} + /////// var boolStrings = map[string]bool{ @@ -304,3 +313,149 @@ func (c *Cell) SetFormatNumber(f uint16) { (*c)[2] = f } } + +func (c Cell) Equal(other Cell) bool { + if c.Type() == FloatCell || other.Type() == FloatCell || + c.Type() == IntegerCell || other.Type() == IntegerCell { + v1, ok := c[0].(float64) + v1x, okx := c[0].(int64) + if okx { + v1 = float64(v1x) + ok = true + } + if !ok { + fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v1) + } + v2, ok := other[0].(float64) + v2x, okx := other[0].(int64) + if okx { + v2 = float64(v2x) + ok = true + } + if !ok { + fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v2) + } + return v1 == v2 + } + + return c.Less(other) == other.Less(c) +} + +func (c Cell) Less(other Cell) bool { + if len(c) == 0 { + return false + } + switch v1 := c[0].(type) { + case nil: + return false + case bool: + // F < T = T + // F < F = F + // T < T = F + // T < F = F + if v1 { + return false + } + + // if v2 is truthy, return true + switch v2 := other[0].(type) { + case nil: + return false + case bool: + return v2 + case int64: + return v2 != 0 + case float64: + return v2 != 0.0 + case string: + return boolStrings[v2] + } + + case int64: + // v1 < v2 + + switch v2 := other[0].(type) { + case nil: + return false + case bool: + x := int64(0) + if v2 { + x = 1 + } + return v1 < x + case int64: + return v1 < v2 + case float64: + if v2 < math.MinInt64 { + return false + } + if v2 > math.MaxInt64 { + return true + } + return float64(v1) < v2 + case string: + var x int64 + _, err := fmt.Sscanf(v2, "%d", &x) + if err == nil { + return v1 < x + } + return fmt.Sprint(v1) < v2 + } + case float64: + switch v2 := other[0].(type) { + case nil: + return false + case bool: + x := float64(0.0) + if v2 { + x = 1.0 + } + return v1 < x + case int64: + if v1 < math.MinInt64 { + return true + } + if v1 > math.MaxInt64 { + return false + } + return v1 < float64(v2) + case float64: + return v1 < v2 + case string: + var x float64 + _, err := fmt.Sscanf(v2, "%g", &x) + if err == nil { + return v1 < x + } + return fmt.Sprint(v1) < v2 + } + case string: + //return v1 < fmt.Sprint(other[0]) + + switch v2 := other[0].(type) { + case nil: + return false + case bool: + return v2 && !boolStrings[v1] + case int64: + var x int64 + _, err := fmt.Sscanf(v1, "%d", &x) + if err == nil { + return x < v2 + } + return v1 < fmt.Sprint(v2) + case float64: + var x float64 + _, err := fmt.Sscanf(v1, "%g", &x) + if err == nil { + return x < v2 + } + return v1 < fmt.Sprint(v2) + case string: + return v1 < v2 + } + + } + + panic("unable to compare cells (invalid internal type)") +} diff --git a/commonxl/sheet.go b/commonxl/sheet.go index 19167db..467fef4 100644 --- a/commonxl/sheet.go +++ b/commonxl/sheet.go @@ -108,6 +108,15 @@ func (s *Sheet) Next() bool { return true } +// Raw extracts the raw Cell interfaces underlying the current row. +func (s *Sheet) Raw() []Cell { + rr := make([]Cell, s.NumCols) + for i, cell := range s.Rows[s.CurRow-1] { + rr[i] = cell.Clone() + } + return rr +} + // Strings extracts values from the current record into a list of strings. func (s *Sheet) Strings() []string { res := make([]string, s.NumCols) diff --git a/testdata/testing.tsv b/testdata/testing.tsv new file mode 100644 index 0000000..876ade3 --- /dev/null +++ b/testdata/testing.tsv @@ -0,0 +1,7 @@ +title 1 title 2 title 3 title 4 +c c c c +b 2 3 4 +b 2 j 4 +b 1 2 1 +b 4 3 2 +1 1 1 1 diff --git a/testdata/testing.xls b/testdata/testing.xls new file mode 100644 index 0000000000000000000000000000000000000000..c068c231fd068d4c116508683f344baf7521d07f GIT binary patch literal 8704 zcmeHMU2I%O75-*_?2SX+-Lz>-+nB{^9XobRoGmn@Bx|*4)Cl#5M&L*dN*vct(xez% zgIWpbCT)O-L^2cwRtiGl1ra3FKdCREI1+Dc+t4%+Dix(Es*reb5D`>rvwYu~dwqA; zyT)l>DB8Ju&dhu>bI$ypJLm4{S2O32KX>C>s0kYo$K~=Ov{c|ONL!o++L>3n%%GyH zgS7I^@X?ro2QkiP5?S1huP~m#6I#kS;oK`nzVPOgb6{MDq^-Y3tG$p-#jMh_ue!;i z!iKYddG4pKa4vrQ@$Pd@^`^CX7#!}$K8*Sd`R-Ti>nSwA+YocoEv<@Y1yv+Myb|yyTJUN zmLGN+=z4O8V?^39Xf=))?=;>{?(mY;e8Z5#NDy(@Vc02`!YNLfM!OzMC~prmCAu?)ig1`^UTY?@s^f2PyQfc;U~hiQPa_ znMgW6)aGM*WI3O!NO?Yfj<<7;2QA%^*o9sUzzJH(s?l;(PD@y-#i^~K#kqsReEY_# z2GD^Sb|-}?4UM7I-GyP=wmm%C6XXNj3e`2*-922@efS3XBx{VYeirvJ_jQsauBEK` zBz9oX#Zi9pH^DTvx-YTi1b50Xt@$v{2X-z=x%jJfi)=b8T;04>PGSqSJ<4a;Z2xAq z8#9|EM7R+_3;@dILBv*&e11n)ltaif0LZMy*Q!{|ShNC*2tik$Yz=Q5c?36DP$ zOCVi{mlbCUEfy~+BrLwB&}uOXu=v_Sviw6_fVYJwPMm-(M!>W(Wg$!yr0U@e*%YR} zwvKrsg#xChrx_J6a^f=HT(t(}ONVjag(2+9lyUhNoA8}2L-_e`r~Yd>N@e~;C8{f; z$b7k2LaE5OR77s9j9jTioZS%>Nn^m+SP@5$9tA5PS0s?*k5n`wmSbB2+tTg49V>9= zEk4Ne^ma{K9dE@mDIGSn%$W5{c&jI<>A3u2>IAHuK z<4+quC=vLswfUZ<4;lZy@snu7ViZize%*+0fN6)5E`toF-NuRE*_+GZ_wLvLP-1Bc zFI)XrjGs3C2jj2V9qo+q*NvYw{zv0~axyn%cLS|?p2--kX6Hp+C=Ofij^OnlfhM@) zqAdJXRK&8>^`w8%=)Z@D_@<(LG;%UAlXeBna>80s1+1wBa<#y1)aXXUc8Wd1E!M_? zeVdD+4z-c|6(L=9MYxKccB~Pjsl3|K8FIanQh4u|7hk(~=L7lehOacd*Z2WL3ak@` zTX`@XjqrY!oJxF=yvErANkDX?Tj|GimFqb=%4`YXK*|f#+ zM%z-;qI`mr8s-u67k8~5Si(h(X_b}Enb<*cVan8En&MhaC#e>b+*(X;m|9G?y-MtT zTX4`TQSdp(ociK(G3L|@pNlgm{mQA%b(9w7)Go@YU6fP1Fehp9f`B;ZM%#)z-G@Ps z#AcCHfW&8!^q_RB50Q*`l?-1l{Ti(##`%Oge0%jJYOgIBme1tXBEf!z2&@l@3K=tb z>Cgxw8OaJ6X=|ZdUMRzlBUvHCOH~iUI$m>2g`5zHguM^4ndYK%eiveXPoz@V9^Sau_^Y?~c&abp(Oo?% zt-gRqSMR8Xzko+=!+ZV^ujxJY?)gzwsc)n}WsO8X!F&6<#^b1UB%;1nD~qPayAx5p z#!f}kf3cUTJj`qZ|AqKJWd#%}{hI6?Tz2|WdEq##*RB8mj62QqYzXJG*U!Vh!-5d+ zX@K`Nz*jcF7uCh3&H8h5wO;UcWO#+268gEdc+-Dr`Y%oYh3BW~zXbkF(|?ht)AV2D z75y*xFO9yR>$Va*`|{ai{Wqnb`7ytr_r3e;jl_~Q*f)vki<^8SN6M1qk9U(6lU9=? zOF?;sFxG!y`WHs!isePh3zYLE|6ZR57Lnv=sf|9U@N16!@2mfMrAJ@!^gqZoB*|M! zlJ_kyP+z+~PSXECH<9!rUq zr$`zbKV{L&uupZkjq#^R8q4jBH7`o93t}dlp=JiOZ?w*uCsA#vxBlUNdB3)DGyjpB Gf&T#F6S1fO literal 0 HcmV?d00001 diff --git a/xls/comp_test.go b/xls/comp_test.go index 7fd51c4..6a61d51 100644 --- a/xls/comp_test.go +++ b/xls/comp_test.go @@ -7,7 +7,7 @@ import ( "testing" ) -func TestManyFiles(t *testing.T) { +func TestAllFiles(t *testing.T) { err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error { if info.IsDir() { return nil @@ -17,7 +17,7 @@ func TestManyFiles(t *testing.T) { } wb, err := Open(p) if err != nil { - return nil //err + return err } sheets, err := wb.List() @@ -25,7 +25,6 @@ func TestManyFiles(t *testing.T) { return err } for _, s := range sheets { - //log.Println(s) sheet, err := wb.Get(s) if err != nil { return err diff --git a/xls/sheets.go b/xls/sheets.go index b595c76..652917b 100644 --- a/xls/sheets.go +++ b/xls/sheets.go @@ -210,7 +210,7 @@ func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) { fno = b.xfs[ixfe] } res.Put(rowIndex, colIndex, rval, fno) - //log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String()) + //log.Printf("RK spec: %d %d = %+v", rowIndex, colIndex, rval) case RecTypeFormula: formulaRow = binary.LittleEndian.Uint16(r.Data[:2]) @@ -302,6 +302,7 @@ func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) { } } res.Set(int(formulaRow), int(formulaCol), fstr) + //log.Printf("String direct: %d %d '%s'", int(formulaRow), int(formulaCol), fstr) case RecTypeLabelSst: rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) @@ -318,7 +319,7 @@ func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) { if b.strings[sstIndex] != "" { res.Put(rowIndex, colIndex, b.strings[sstIndex], fno) } - //log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex]) + //log.Printf("SST spec: %d %d = [%d] '%s' %d", rowIndex, colIndex, sstIndex, b.strings[sstIndex], fno) case RecTypeHLink: firstRow := binary.LittleEndian.Uint16(r.Data[:2]) diff --git a/xls/simple_test.go b/xls/simple_test.go index 6230da8..013d949 100644 --- a/xls/simple_test.go +++ b/xls/simple_test.go @@ -1,20 +1,53 @@ package xls import ( + "bufio" + "log" "os" "strings" "testing" + + "github.com/pbnjay/grate/commonxl" ) -var testFiles = []string{ - "../testdata/multi_test.xls", - "../testdata/basic.xls", - "../testdata/basic2.xls", +var testFilePairs = [][]string{ + {"../testdata/basic.xls", "../testdata/basic.tsv"}, + {"../testdata/testing.xls", "../testdata/testing.tsv"}, + + // TODO: custom formatter support + //{"../testdata/basic2.xls", "../testdata/basic2.tsv"}, + + // TODO: datetime and fraction formatter support + //{"../testdata/multi_test.xls", "../testdata/multi_test.tsv"}, } -func TestLoading(t *testing.T) { - for _, fn := range testFiles { - wb, err := Open(fn) +func loadTestData(fn string, ff *commonxl.Formatter) (*commonxl.Sheet, error) { + f, err := os.Open(fn) + if err != nil { + return nil, err + } + xs := &commonxl.Sheet{ + Formatter: ff, + } + + row := 0 + s := bufio.NewScanner(f) + for s.Scan() { + record := strings.Split(s.Text(), "\t") + for i, val := range record { + xs.Put(row, i, val, 0) + } + row++ + } + return xs, f.Close() +} + +func TestBasic(t *testing.T) { + for _, fnames := range testFilePairs { + var trueData *commonxl.Sheet + log.Println("Testing ", fnames[0]) + + wb, err := Open(fnames[0]) if err != nil { t.Fatal(err) } @@ -23,14 +56,30 @@ func TestLoading(t *testing.T) { if err != nil { t.Fatal(err) } + firstLoad := true for _, s := range sheets { sheet, err := wb.Get(s) if err != nil { t.Fatal(err) } + xsheet := sheet.(*commonxl.Sheet) + if firstLoad { + trueData, err = loadTestData(fnames[1], xsheet.Formatter) + if err != nil { + t.Fatal(err) + } + firstLoad = false + } - for sheet.Next() { - sheet.Strings() + for xrow, xdata := range xsheet.Rows { + for xcol, xval := range xdata { + //t.Logf("at %s (%d,%d) expect '%v'", fnames[0], xrow, xcol, trueData.Rows[xrow][xcol]) + if !trueData.Rows[xrow][xcol].Equal(xval) { + t.Logf("mismatch at %s (%d,%d): '%v' <> '%v' expected", fnames[0], xrow, xcol, + xval, trueData.Rows[xrow][xcol]) + t.Fail() + } + } } } @@ -40,120 +89,3 @@ func TestLoading(t *testing.T) { } } } - -func TestBasic(t *testing.T) { - trueFile, err := os.ReadFile("../testdata/basic.tsv") - if err != nil { - t.Skip() - } - lines := strings.Split(string(trueFile), "\n") - - fn := "../testdata/basic.xls" - wb, err := Open(fn) - if err != nil { - t.Fatal(err) - } - - sheets, err := wb.List() - if err != nil { - t.Fatal(err) - } - for _, s := range sheets { - sheet, err := wb.Get(s) - if err != nil { - t.Fatal(err) - } - - i := 0 - for sheet.Next() { - row := strings.Join(sheet.Strings(), "\t") - if lines[i] != row { - t.Fatalf("line %d mismatch: '%s' <> '%s'", i, row, lines[i]) - } - i++ - } - } - - err = wb.Close() - if err != nil { - t.Fatal(err) - } -} - -func TestBasic2(t *testing.T) { - trueFile, err := os.ReadFile("../testdata/basic2.tsv") - if err != nil { - t.Skip() - } - lines := strings.Split(string(trueFile), "\n") - - fn := "../testdata/basic2.xls" - wb, err := Open(fn) - if err != nil { - t.Fatal(err) - } - - sheets, err := wb.List() - if err != nil { - t.Fatal(err) - } - for _, s := range sheets { - sheet, err := wb.Get(s) - if err != nil { - t.Fatal(err) - } - - i := 0 - for sheet.Next() { - row := strings.Join(sheet.Strings(), "\t") - if lines[i] != row { - t.Fatalf("line %d mismatch: '%s' <> '%s'", i, row, lines[i]) - } - i++ - } - } - - err = wb.Close() - if err != nil { - t.Fatal(err) - } -} - -func TestMulti(t *testing.T) { - trueFile, err := os.ReadFile("../testdata/multi_test.tsv") - if err != nil { - t.Skip() - } - lines := strings.Split(string(trueFile), "\n") - - fn := "../testdata/multi_test.xls" - wb, err := Open(fn) - if err != nil { - t.Fatal(err) - } - - sheets, err := wb.List() - if err != nil { - t.Fatal(err) - } - for _, s := range sheets { - sheet, err := wb.Get(s) - if err != nil { - t.Fatal(err) - } - - i := 0 - for sheet.Next() { - row := strings.Join(sheet.Strings(), "\t") - if lines[i] != row { - t.Fatalf("line %d mismatch: '%s' <> '%s'", i, row, lines[i]) - } - i++ - } - } - - err = wb.Close() - if err != nil { - t.Fatal(err) - } -}