1
0
mirror of https://github.com/pbnjay/grate.git synced 2025-01-06 10:44:07 +02:00

reuse memory as often as we can. update tests

This commit is contained in:
Jeremy Jay 2021-02-13 23:36:42 -05:00
parent a5be267bf7
commit d9793eb9dd
10 changed files with 309 additions and 108 deletions

View File

@ -15,6 +15,9 @@ type Source interface {
// Get a Collection from the source by name.
Get(name string) (Collection, error)
// Close the source and discard memory.
Close() error
}
// Collection represents an iterable collection of records.

View File

@ -22,6 +22,10 @@ func (t *simpleFile) List() ([]string, error) {
return []string{t.filename}, nil
}
func (t *simpleFile) Close() error {
return nil
}
// Get a Collection from the source by name.
func (t *simpleFile) Get(name string) (grate.Collection, error) {
return t, nil

44
xls/comp_test.go Normal file
View File

@ -0,0 +1,44 @@
package xls
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestManyFiles(t *testing.T) {
err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
if !strings.HasSuffix(info.Name(), ".xls") {
return nil
}
wb, err := Open(p)
if err != nil {
return nil //err
}
sheets, err := wb.List()
if err != nil {
return err
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
return err
}
for sheet.Next() {
sheet.Strings()
}
}
return wb.Close()
})
if err != nil {
t.Fatal(err)
}
}

View File

@ -81,17 +81,26 @@ type row struct {
cols []interface{}
}
func (s *WorkSheet) makeCells() {
// ensure we always have a complete matrix
for len(s.rows) <= s.maxRow {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
}
func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
if colIndex > s.maxCol || rowIndex > s.maxRow {
// invalid
return
}
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
/*
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
*/
s.rows[rowIndex].cols[colIndex] = val
}
@ -101,6 +110,9 @@ func (s *WorkSheet) IsEmpty() bool {
}
func (s *WorkSheet) parse() error {
// temporary string buffer
us := make([]uint16, 8224)
inSubstream := 0
for idx, r := range s.b.substreams[s.ss] {
if inSubstream > 0 {
@ -145,7 +157,8 @@ func (s *WorkSheet) parse() error {
s.empty = true
} else {
// pre-allocate cells
s.placeValue(s.maxRow, s.maxCol, nil)
s.makeCells()
//s.placeValue(s.maxRow, s.maxCol, nil)
}
}
}
@ -181,79 +194,74 @@ func (s *WorkSheet) parse() error {
}
case RecTypeBoolErr:
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
if r.Data[7] == 0 {
bv := false
if r.Data[6] == 1 {
bv = true
}
s.placeValue(int(rowIndex), int(colIndex), bv)
s.placeValue(rowIndex, colIndex, bv)
//log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv)
} else {
be, ok := berrLookup[r.Data[6]]
if !ok {
be = "<unknown error>"
}
s.placeValue(int(rowIndex), int(colIndex), be)
s.placeValue(rowIndex, colIndex, be)
//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
}
case RecTypeMulRk:
mr := &shMulRK{}
nrk := int((r.RecSize - 6) / 6)
mr.RowIndex = binary.LittleEndian.Uint16(r.Data[:2])
mr.FirstCol = binary.LittleEndian.Uint16(r.Data[2:4])
mr.Values = make([]RkRec, nrk)
rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
for i := 0; i < nrk; i++ {
off := 4 + i*6
rr := RkRec{}
rr.IXFCell = binary.LittleEndian.Uint16(r.Data[off:])
rr.Value = RKNumber(binary.LittleEndian.Uint32(r.Data[off:]))
mr.Values[i] = rr
ixfe := binary.LittleEndian.Uint16(r.Data[off:])
value := RKNumber(binary.LittleEndian.Uint32(r.Data[off:]))
var rval interface{}
if rr.Value.IsInteger() {
rval = rr.Value.Int()
if value.IsInteger() {
rval = value.Int()
} else {
rval = rr.Value.Float64()
fno := s.b.xfs[rr.IXFCell]
rval = value.Float64()
fno := s.b.xfs[ixfe]
rval, _ = s.b.nfmt.Apply(fno, rval)
}
s.placeValue(int(mr.RowIndex), int(mr.FirstCol)+i, rval)
s.placeValue(rowIndex, colIndex+i, rval)
}
//log.Printf("mulrow spec: %+v", *mr)
case RecTypeNumber:
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
xnum := binary.LittleEndian.Uint64(r.Data[6:])
value := math.Float64frombits(xnum)
fno := s.b.xfs[ixfe]
rval, _ := s.b.nfmt.Apply(fno, value)
s.placeValue(int(rowIndex), int(colIndex), rval)
s.placeValue(rowIndex, colIndex, rval)
//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
case RecTypeRK:
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
rr := RkRec{}
rr.IXFCell = binary.LittleEndian.Uint16(r.Data[4:])
rr.Value = RKNumber(binary.LittleEndian.Uint32(r.Data[6:]))
rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
ixfe := int(binary.LittleEndian.Uint16(r.Data[4:]))
value := RKNumber(binary.LittleEndian.Uint32(r.Data[6:]))
var rval interface{}
if rr.Value.IsInteger() {
rval = rr.Value.Int()
if value.IsInteger() {
rval = value.Int()
} else {
rval = rr.Value.Float64()
fno := s.b.xfs[rr.IXFCell]
rval = value.Float64()
fno := s.b.xfs[ixfe]
rval, _ = s.b.nfmt.Apply(fno, rval)
}
s.placeValue(int(rowIndex), int(colIndex), rval)
s.placeValue(rowIndex, colIndex, rval)
//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
case RecTypeFormula:
@ -301,7 +309,10 @@ func (s *WorkSheet) parse() error {
fstr = string(r.Data[3:])
} else {
raw := r.Data[3:]
us := make([]uint16, charCount)
if int(charCount) > cap(us) {
us = make([]uint16, charCount)
}
us = us[:charCount]
for i := 0; i < int(charCount); i++ {
us[i] = binary.LittleEndian.Uint16(raw)
raw = raw[2:]
@ -322,7 +333,7 @@ func (s *WorkSheet) parse() error {
} else {
raw := r2.Data[1:]
slen := len(raw) / 2
us := make([]uint16, slen)
us = us[:slen]
for i := 0; i < slen; i++ {
us[i] = binary.LittleEndian.Uint16(raw)
raw = raw[2:]
@ -336,35 +347,34 @@ func (s *WorkSheet) parse() error {
s.placeValue(int(formulaRow), int(formulaCol), fstr)
case RecTypeLabelSst:
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
sstIndex := binary.LittleEndian.Uint32(r.Data[6:])
if int(sstIndex) > len(s.b.strings) {
sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:]))
if sstIndex > len(s.b.strings) {
return errors.New("xls: invalid sst index")
}
s.placeValue(int(rowIndex), int(colIndex), s.b.strings[sstIndex])
s.placeValue(rowIndex, colIndex, s.b.strings[sstIndex])
//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
case RecTypeHLink:
loc := &shRef8{}
loc.FirstRow = binary.LittleEndian.Uint16(r.Data[:2])
loc.LastRow = binary.LittleEndian.Uint16(r.Data[2:4])
loc.FirstCol = binary.LittleEndian.Uint16(r.Data[4:6])
loc.LastCol = binary.LittleEndian.Uint16(r.Data[6:])
if int(loc.FirstCol) > s.maxCol {
firstRow := binary.LittleEndian.Uint16(r.Data[:2])
lastRow := binary.LittleEndian.Uint16(r.Data[2:4])
firstCol := binary.LittleEndian.Uint16(r.Data[4:6])
lastCol := binary.LittleEndian.Uint16(r.Data[6:])
if int(firstCol) > s.maxCol {
//log.Println("invalid hyperlink column")
continue
}
if int(loc.FirstRow) > s.maxRow {
if int(firstRow) > s.maxRow {
//log.Println("invalid hyperlink row")
continue
}
if loc.LastRow == 0xFFFF {
loc.LastRow = uint16(s.maxRow)
if lastRow == 0xFFFF {
lastRow = uint16(s.maxRow)
}
if loc.LastCol == 0xFF {
loc.LastCol = uint16(s.maxCol)
if lastCol == 0xFF {
lastCol = uint16(s.maxCol)
}
displayText, linkText, err := decodeHyperlinks(r.Data[8:])
@ -374,18 +384,18 @@ func (s *WorkSheet) parse() error {
}
// apply merge cell rules
for rn := int(loc.FirstRow); rn <= int(loc.LastRow); rn++ {
for cn := int(loc.FirstCol); cn <= int(loc.LastCol); cn++ {
if rn == int(loc.FirstRow) && cn == int(loc.FirstCol) {
for rn := int(firstRow); rn <= int(lastRow); rn++ {
for cn := int(firstCol); cn <= int(lastCol); cn++ {
if rn == int(firstRow) && cn == int(firstCol) {
s.placeValue(rn, cn, displayText+" <"+linkText+">")
} else if cn == int(loc.FirstCol) {
} else if cn == int(firstCol) {
// first and last column MAY be the same
if rn == int(loc.LastRow) {
if rn == int(lastRow) {
s.placeValue(rn, cn, endRowMerged)
} else {
s.placeValue(rn, cn, continueRowMerged)
}
} else if cn == int(loc.LastCol) {
} else if cn == int(lastCol) {
// first and last column are NOT the same
s.placeValue(rn, cn, endColumnMerged)
} else {
@ -397,32 +407,31 @@ func (s *WorkSheet) parse() error {
case RecTypeMergeCells:
cmcs := binary.LittleEndian.Uint16(r.Data[:2])
raw := r.Data[2:]
loc := shRef8{}
for i := 0; i < int(cmcs); i++ {
loc.FirstRow = binary.LittleEndian.Uint16(raw[:2])
loc.LastRow = binary.LittleEndian.Uint16(raw[2:4])
loc.FirstCol = binary.LittleEndian.Uint16(raw[4:6])
loc.LastCol = binary.LittleEndian.Uint16(raw[6:])
firstRow := binary.LittleEndian.Uint16(r.Data[:2])
lastRow := binary.LittleEndian.Uint16(r.Data[2:4])
firstCol := binary.LittleEndian.Uint16(r.Data[4:6])
lastCol := binary.LittleEndian.Uint16(r.Data[6:])
raw = raw[8:]
if loc.LastRow == 0xFFFF {
loc.LastRow = uint16(s.maxRow)
if lastRow == 0xFFFF {
lastRow = uint16(s.maxRow)
}
if loc.LastCol == 0xFF {
loc.LastCol = uint16(s.maxCol)
if lastCol == 0xFF {
lastCol = uint16(s.maxCol)
}
for rn := int(loc.FirstRow); rn <= int(loc.LastRow); rn++ {
for cn := int(loc.FirstCol); cn <= int(loc.LastCol); cn++ {
if rn == int(loc.FirstRow) && cn == int(loc.FirstCol) {
for rn := int(firstRow); rn <= int(lastRow); rn++ {
for cn := int(firstCol); cn <= int(lastCol); cn++ {
if rn == int(firstRow) && cn == int(firstCol) {
// should be a value there already!
} else if cn == int(loc.FirstCol) {
} else if cn == int(firstCol) {
// first and last column MAY be the same
if rn == int(loc.LastRow) {
if rn == int(lastRow) {
s.placeValue(rn, cn, endRowMerged)
} else {
s.placeValue(rn, cn, continueRowMerged)
}
} else if cn == int(loc.LastCol) {
} else if cn == int(lastCol) {
// first and last column are NOT the same
s.placeValue(rn, cn, endColumnMerged)
} else {

View File

@ -1,19 +1,20 @@
package xls
import (
"context"
"log"
"testing"
)
func TestHeader(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test.xls")
wb, err := Open("../testdata/test.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
for _, s := range wb.Sheets() {
sheets, err := wb.List()
if err != nil {
t.Fatal(err)
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
@ -24,16 +25,24 @@ func TestHeader(t *testing.T) {
sheet.Strings()
}
}
err = wb.Close()
if err != nil {
t.Fatal(err)
}
}
func TestHeader2(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test2.xls")
wb, err := Open("../testdata/test2.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
for _, s := range wb.Sheets() {
sheets, err := wb.List()
if err != nil {
t.Fatal(err)
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
@ -44,16 +53,24 @@ func TestHeader2(t *testing.T) {
sheet.Strings()
}
}
err = wb.Close()
if err != nil {
t.Fatal(err)
}
}
func TestHeader3(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test3.xls")
wb, err := Open("../testdata/test3.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
for _, s := range wb.Sheets() {
sheets, err := wb.List()
if err != nil {
t.Fatal(err)
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
@ -64,17 +81,24 @@ func TestHeader3(t *testing.T) {
sheet.Strings()
}
}
err = wb.Close()
if err != nil {
t.Fatal(err)
}
}
func TestHeader4(t *testing.T) {
wb, err := Open(context.Background(), "testdata/test4.xls")
wb, err := Open("../testdata/test4.xls")
if err != nil {
t.Fatal(err)
}
log.Println(wb.filename)
for _, s := range wb.Sheets() {
sheets, err := wb.List()
if err != nil {
t.Fatal(err)
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
@ -85,4 +109,9 @@ func TestHeader4(t *testing.T) {
sheet.Strings()
}
}
err = wb.Close()
if err != nil {
t.Fatal(err)
}
}

View File

@ -139,12 +139,12 @@ func parseSST(recs []*rec) ([]string, error) {
numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8])
all := make([]string, 0, numStrings)
current := make([]uint16, 32*1024)
buf := recs[0].Data[8:]
for i := 0; i < len(recs); {
var cRunBytes int
var flags byte
var current []uint16
var cbExtRs uint32
for len(buf) > 0 {
@ -177,7 +177,11 @@ func parseSST(recs []*rec) ([]string, error) {
// this block will read the string data, but transparently
// handle continuing across records
current = make([]uint16, slen)
if int(slen) > cap(current) {
current = make([]uint16, slen)
} else {
current = current[:slen]
}
for j := 0; j < int(slen); j++ {
if len(buf) == 0 {
i++

View File

@ -12,6 +12,7 @@ import (
"errors"
"io"
"log"
"sync"
"github.com/pbnjay/grate"
"github.com/pbnjay/grate/commonxl"
@ -146,14 +147,35 @@ func (b *WorkBook) loadFromStreamWithDecryptor(raw []byte, dec crypto.Decryptor)
return b.loadFromStream2(alldata, true)
}
func (b *WorkBook) Close() error {
// return records to the pool for reuse
for i, sub := range b.substreams {
for _, r := range sub {
r.Data = nil // allow GC
recPool.Put(r)
}
b.substreams[i] = b.substreams[i][:0]
}
b.substreams = b.substreams[:0]
return nil
}
func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error {
b.h = &header{}
substr := -1
nestedBOF := 0
b.substreams = b.substreams[:0]
b.pos2substream = make(map[int64]int, 10)
b.fpos = 0
// IMPORTANT: if there are any existing record, we need to return them to the pool
for i, sub := range b.substreams {
for _, r := range sub {
recPool.Put(r)
}
b.substreams[i] = b.substreams[i][:0]
}
b.substreams = b.substreams[:0]
rawfull := raw
nr, no, err := b.nextRecord(raw)
for err == nil {
@ -290,14 +312,24 @@ func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error {
return err
}
var recPool = sync.Pool{
New: func() interface{} {
return &rec{}
},
}
func (b *WorkBook) nextRecord(raw []byte) (*rec, int, error) {
if len(raw) < 4 {
return nil, 0, io.EOF
}
rt := recordType(binary.LittleEndian.Uint16(raw[:2]))
rs := binary.LittleEndian.Uint16(raw[2:4])
if len(raw[4:]) < int(rs) {
rec := recPool.Get().(*rec)
rec.RecType = recordType(binary.LittleEndian.Uint16(raw[:2]))
rec.RecSize = binary.LittleEndian.Uint16(raw[2:4])
if len(raw[4:]) < int(rec.RecSize) {
recPool.Put(rec)
return nil, 4, io.ErrUnexpectedEOF
}
return &rec{rt, rs, raw[4 : 4+rs]}, int(4 + rs), nil
rec.Data = raw[4 : 4+rec.RecSize]
return rec, int(4 + rec.RecSize), nil
}

44
xlsx/comp_test.go Normal file
View File

@ -0,0 +1,44 @@
package xlsx
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestManyFiles(t *testing.T) {
err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
if !strings.HasSuffix(info.Name(), ".xlsx") {
return nil
}
wb, err := Open(p)
if err != nil {
return nil //err
}
sheets, err := wb.List()
if err != nil {
return err
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
return err
}
for sheet.Next() {
sheet.Strings()
}
}
return wb.Close()
})
if err != nil {
t.Fatal(err)
}
}

View File

@ -6,19 +6,39 @@ import (
)
func noTestOpen(t *testing.T) {
_, err := Open("test.xlsx")
if err != nil {
log.Fatal(err)
}
}
func TestOpen2(t *testing.T) {
wb, err := Open("test2.xlsx")
wb, err := Open("test.xlsx")
if err != nil {
log.Fatal(err)
}
for _, s := range wb.Sheets() {
sheets, err := wb.List()
if err != nil {
t.Fatal(err)
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
t.Fatal(err)
}
for sheet.Next() {
sheet.Strings()
}
}
}
func TestOpen2(t *testing.T) {
wb, err := Open("test2.xlsx")
if err != nil {
log.Fatal(err)
}
sheets, err := wb.List()
if err != nil {
t.Fatal(err)
}
for _, s := range sheets {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {

View File

@ -19,6 +19,7 @@ var _ = grate.Register("xlsx", 5, Open)
// Document contains an Office Open XML document.
type Document struct {
filename string
f *os.File
r *zip.Reader
primaryDoc string
@ -30,6 +31,16 @@ type Document struct {
fmt commonxl.Formatter
}
func (d *Document) Close() error {
d.xfs = d.xfs[:0]
d.xfs = nil
d.strings = d.strings[:0]
d.strings = nil
d.sheets = d.sheets[:0]
d.sheets = nil
return d.f.Close()
}
func Open(filename string) (grate.Source, error) {
f, err := os.Open(filename)
if err != nil {
@ -45,6 +56,7 @@ func Open(filename string) (grate.Source, error) {
}
d := &Document{
filename: filename,
f: f,
r: z,
}