mirror of
https://github.com/pbnjay/grate.git
synced 2026-05-21 02:19:04 +02:00
more tweaks to memory usage in xls this time
did not reduce total allocations much (bytes.Reader is more efficient than I thought), but reduced walltime from 99s to 55s for a large collection
This commit is contained in:
@@ -42,11 +42,9 @@ func main() {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
runtime.GC() // get up-to-date statistics
|
||||
if err := pprof.WriteHeapProfile(f); err != nil {
|
||||
log.Fatal("could not write memory profile: ", err)
|
||||
}
|
||||
f.Close() // error handling omitted for example
|
||||
runtime.GC()
|
||||
pprof.WriteHeapProfile(f)
|
||||
f.Close()
|
||||
}()
|
||||
}
|
||||
|
||||
|
||||
@@ -55,6 +55,9 @@ func addCommas(ff FmtFunc) FmtFunc {
|
||||
}
|
||||
|
||||
func identFunc(x *Formatter, v interface{}) string {
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
return fmt.Sprint(v)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,9 @@ type Decryptor interface {
|
||||
// Write implements the io.Writer interface.
|
||||
Write(p []byte) (n int, err error)
|
||||
|
||||
// Bytes returns the decrypted data.
|
||||
Bytes() []byte
|
||||
|
||||
// Flush tells the decryptor to decrypt the latest block.
|
||||
Flush()
|
||||
|
||||
|
||||
@@ -92,6 +92,10 @@ type rc4Writer struct {
|
||||
Password []rune
|
||||
}
|
||||
|
||||
func (d *rc4Writer) Bytes() []byte {
|
||||
return d.buf.Bytes()
|
||||
}
|
||||
|
||||
func (d *rc4Writer) Verify(everifier, everifierHash []byte) error {
|
||||
d.Reset()
|
||||
d.startBlock()
|
||||
|
||||
+54
-46
@@ -4,48 +4,53 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"unicode/utf16"
|
||||
)
|
||||
|
||||
func decodeHyperlinks(r io.Reader) (displayText, linkText string, err error) {
|
||||
var x uint64
|
||||
binary.Read(r, binary.LittleEndian, &x) // skip and discard classid
|
||||
binary.Read(r, binary.LittleEndian, &x)
|
||||
|
||||
var flags, slen uint32
|
||||
binary.Read(r, binary.LittleEndian, &slen)
|
||||
func decodeHyperlinks(raw []byte) (displayText, linkText string, err error) {
|
||||
raw = raw[16:] // skip classid
|
||||
slen := binary.LittleEndian.Uint32(raw[:4])
|
||||
if slen != 2 {
|
||||
return "", "", errors.New("xls: unknown hyperlink version")
|
||||
}
|
||||
|
||||
binary.Read(r, binary.LittleEndian, &flags)
|
||||
flags := binary.LittleEndian.Uint32(raw[4:8])
|
||||
raw = raw[8:]
|
||||
if (flags & hlstmfHasDisplayName) != 0 {
|
||||
binary.Read(r, binary.LittleEndian, &slen)
|
||||
slen = binary.LittleEndian.Uint32(raw[:4])
|
||||
raw = raw[4:]
|
||||
us := make([]uint16, slen)
|
||||
binary.Read(r, binary.LittleEndian, us)
|
||||
for i := 0; i < int(slen); i++ {
|
||||
us[i] = binary.LittleEndian.Uint16(raw)
|
||||
raw = raw[2:]
|
||||
}
|
||||
displayText = string(utf16.Decode(us))
|
||||
}
|
||||
|
||||
if (flags & hlstmfHasFrameName) != 0 {
|
||||
// skip a HyperlinkString containing target Frame
|
||||
binary.Read(r, binary.LittleEndian, &slen)
|
||||
io.CopyN(ioutil.Discard, r, int64(slen*2))
|
||||
slen = binary.LittleEndian.Uint32(raw[:4])
|
||||
raw = raw[4+(slen*2):]
|
||||
}
|
||||
|
||||
if (flags & hlstmfHasMoniker) != 0 {
|
||||
if (flags & hlstmfMonikerSavedAsStr) != 0 {
|
||||
// read HyperlinkString containing the URL
|
||||
binary.Read(r, binary.LittleEndian, &slen)
|
||||
slen = binary.LittleEndian.Uint32(raw[:4])
|
||||
raw = raw[4:]
|
||||
us := make([]uint16, slen)
|
||||
binary.Read(r, binary.LittleEndian, us)
|
||||
for i := 0; i < int(slen); i++ {
|
||||
us[i] = binary.LittleEndian.Uint16(raw)
|
||||
raw = raw[2:]
|
||||
}
|
||||
linkText = string(utf16.Decode(us))
|
||||
|
||||
} else {
|
||||
n := 0
|
||||
var err error
|
||||
linkText, err = parseHyperlinkMoniker(r)
|
||||
linkText, n, err = parseHyperlinkMoniker(raw)
|
||||
raw = raw[n:]
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
@@ -53,9 +58,13 @@ func decodeHyperlinks(r io.Reader) (displayText, linkText string, err error) {
|
||||
}
|
||||
|
||||
if (flags & hlstmfHasLocationStr) != 0 {
|
||||
binary.Read(r, binary.LittleEndian, &slen)
|
||||
slen = binary.LittleEndian.Uint32(raw[:4])
|
||||
raw = raw[4:]
|
||||
us := make([]uint16, slen)
|
||||
binary.Read(r, binary.LittleEndian, us)
|
||||
for i := 0; i < int(slen); i++ {
|
||||
us[i] = binary.LittleEndian.Uint16(raw)
|
||||
raw = raw[2:]
|
||||
}
|
||||
linkText = string(utf16.Decode(us))
|
||||
}
|
||||
|
||||
@@ -64,15 +73,9 @@ func decodeHyperlinks(r io.Reader) (displayText, linkText string, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func parseHyperlinkMoniker(r io.Reader) (string, error) {
|
||||
var classid [16]byte
|
||||
n, err := r.Read(classid[:])
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if n != 16 {
|
||||
return "", io.ErrShortBuffer
|
||||
}
|
||||
func parseHyperlinkMoniker(raw []byte) (string, int, error) {
|
||||
classid := raw[:16]
|
||||
no := 16
|
||||
|
||||
isURLMoniker := true
|
||||
isFileMoniker := true
|
||||
@@ -87,40 +90,45 @@ func parseHyperlinkMoniker(r io.Reader) (string, error) {
|
||||
}
|
||||
}
|
||||
if isURLMoniker {
|
||||
var length uint32
|
||||
binary.Read(r, binary.LittleEndian, &length)
|
||||
length := binary.LittleEndian.Uint32(raw[no:])
|
||||
no += 4
|
||||
length /= 2
|
||||
buf := make([]uint16, length)
|
||||
binary.Read(r, binary.LittleEndian, &buf)
|
||||
for i := 0; i < int(length); i++ {
|
||||
buf[i] = binary.LittleEndian.Uint16(raw[no:])
|
||||
no += 2
|
||||
}
|
||||
if length > 12 && buf[length-13] == 0 {
|
||||
buf = buf[:length-12]
|
||||
}
|
||||
return string(utf16.Decode(buf)), nil
|
||||
return string(utf16.Decode(buf)), no, nil
|
||||
}
|
||||
if isFileMoniker {
|
||||
var x uint16
|
||||
var length uint32
|
||||
binary.Read(r, binary.LittleEndian, &x) //cAnti
|
||||
binary.Read(r, binary.LittleEndian, &length) //ansiLength
|
||||
buf := make([]byte, length)
|
||||
binary.Read(r, binary.LittleEndian, &buf)
|
||||
//x := binary.LittleEndian.Uint16(raw[no:]) //cAnti
|
||||
length := binary.LittleEndian.Uint32(raw[no+2:]) //ansiLength
|
||||
no += 6
|
||||
buf := raw[no : no+int(length)]
|
||||
|
||||
// skip 24 bytes for misc fixed properties
|
||||
io.CopyN(ioutil.Discard, r, 24)
|
||||
// skip 24 more bytes for misc fixed properties
|
||||
no += int(length) + 24
|
||||
|
||||
binary.Read(r, binary.LittleEndian, &length) // cbUnicodePathSize
|
||||
length = binary.LittleEndian.Uint32(raw[no:]) // cbUnicodePathSize
|
||||
no += 4
|
||||
if length > 0 {
|
||||
io.CopyN(ioutil.Discard, r, 6)
|
||||
no += 6
|
||||
length -= 6
|
||||
buf2 := make([]uint16, length/2)
|
||||
binary.Read(r, binary.LittleEndian, &buf2)
|
||||
return string(utf16.Decode(buf2)), nil
|
||||
for i := 0; i < int(length/2); i++ {
|
||||
buf2[i] = binary.LittleEndian.Uint16(raw[no:])
|
||||
no += 2
|
||||
}
|
||||
return string(utf16.Decode(buf2)), no, nil
|
||||
}
|
||||
|
||||
return string(buf), nil
|
||||
return string(buf), no, nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("xls: unknown moniker classid")
|
||||
return "", 0, fmt.Errorf("xls: unknown moniker classid")
|
||||
}
|
||||
|
||||
// HLink flags
|
||||
|
||||
+75
-90
@@ -1,7 +1,6 @@
|
||||
package xls
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -91,9 +90,6 @@ func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
|
||||
// ensure we always have a complete matrix
|
||||
for len(s.rows) <= rowIndex {
|
||||
emptyRow := make([]interface{}, s.maxCol+1)
|
||||
for i := 0; i <= s.maxCol; i++ {
|
||||
emptyRow[i] = staticBlank
|
||||
}
|
||||
s.rows = append(s.rows, &row{emptyRow})
|
||||
}
|
||||
|
||||
@@ -126,15 +122,11 @@ func (s *WorkSheet) parse() error {
|
||||
}
|
||||
|
||||
case RecTypeDimensions:
|
||||
bb := bytes.NewReader(r.Data)
|
||||
var minRow, maxRow uint32
|
||||
var minCol, maxCol uint16
|
||||
|
||||
// max = 0-based index of the row AFTER the last valid index
|
||||
binary.Read(bb, binary.LittleEndian, &minRow)
|
||||
binary.Read(bb, binary.LittleEndian, &maxRow) // max = 0x010000
|
||||
binary.Read(bb, binary.LittleEndian, &minCol)
|
||||
binary.Read(bb, binary.LittleEndian, &maxCol) // max = 0x000100
|
||||
minRow := binary.LittleEndian.Uint32(r.Data[:4])
|
||||
maxRow := binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000
|
||||
minCol := binary.LittleEndian.Uint16(r.Data[8:10])
|
||||
maxCol := binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100
|
||||
if grate.Debug {
|
||||
log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)",
|
||||
minCol, minRow, maxCol, maxRow)
|
||||
@@ -153,18 +145,8 @@ func (s *WorkSheet) parse() error {
|
||||
s.empty = true
|
||||
} else {
|
||||
// pre-allocate cells
|
||||
s.placeValue(s.maxRow, s.maxCol, staticBlank)
|
||||
s.placeValue(s.maxRow, s.maxCol, nil)
|
||||
}
|
||||
|
||||
case RecTypeRow:
|
||||
bb := bytes.NewReader(r.Data)
|
||||
row := &shRow{}
|
||||
binary.Read(bb, binary.LittleEndian, row)
|
||||
if (row.Reserved & 0xFFFF) != 0 {
|
||||
log.Println("invalid Row spec")
|
||||
continue
|
||||
}
|
||||
//log.Printf("row spec: %+v", *row)
|
||||
}
|
||||
}
|
||||
inSubstream = 0
|
||||
@@ -180,8 +162,6 @@ func (s *WorkSheet) parse() error {
|
||||
continue
|
||||
}
|
||||
|
||||
bb := bytes.NewReader(r.Data)
|
||||
|
||||
// sec 2.1.7.20.6 Common Productions ABNF:
|
||||
/*
|
||||
CELLTABLE = 1*(1*Row *CELL 1*DBCell) *EntExU2
|
||||
@@ -199,17 +179,11 @@ func (s *WorkSheet) parse() error {
|
||||
inSubstream++
|
||||
continue
|
||||
}
|
||||
case RecTypeBlank:
|
||||
var rowIndex, colIndex uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
//log.Printf("blank spec: %d %d", rowIndex, colIndex)
|
||||
|
||||
case RecTypeBoolErr:
|
||||
var rowIndex, colIndex, ixfe uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
|
||||
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
|
||||
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
|
||||
if r.Data[7] == 0 {
|
||||
bv := false
|
||||
if r.Data[6] == 1 {
|
||||
@@ -226,22 +200,17 @@ func (s *WorkSheet) parse() error {
|
||||
//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
|
||||
}
|
||||
|
||||
case RecTypeMulBlank:
|
||||
var rowIndex, firstCol uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &firstCol)
|
||||
// nrk := int((r.RecSize - 6) / 6)
|
||||
// log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
|
||||
|
||||
case RecTypeMulRk:
|
||||
mr := &shMulRK{}
|
||||
nrk := int((r.RecSize - 6) / 6)
|
||||
binary.Read(bb, binary.LittleEndian, &mr.RowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &mr.FirstCol)
|
||||
mr.RowIndex = binary.LittleEndian.Uint16(r.Data[:2])
|
||||
mr.FirstCol = binary.LittleEndian.Uint16(r.Data[2:4])
|
||||
mr.Values = make([]RkRec, nrk)
|
||||
for i := 0; i < nrk; i++ {
|
||||
off := 4 + i*6
|
||||
rr := RkRec{}
|
||||
binary.Read(bb, binary.LittleEndian, &rr)
|
||||
rr.IXFCell = binary.LittleEndian.Uint16(r.Data[off:])
|
||||
rr.Value = RKNumber(binary.LittleEndian.Uint32(r.Data[off:]))
|
||||
mr.Values[i] = rr
|
||||
|
||||
var rval interface{}
|
||||
@@ -254,16 +223,14 @@ func (s *WorkSheet) parse() error {
|
||||
}
|
||||
s.placeValue(int(mr.RowIndex), int(mr.FirstCol)+i, rval)
|
||||
}
|
||||
binary.Read(bb, binary.LittleEndian, &mr.LastCol)
|
||||
//log.Printf("mulrow spec: %+v", *mr)
|
||||
|
||||
case RecTypeNumber:
|
||||
var rowIndex, colIndex, ixfe uint16
|
||||
var xnum uint64
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
binary.Read(bb, binary.LittleEndian, &xnum)
|
||||
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
|
||||
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
|
||||
ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
|
||||
xnum := binary.LittleEndian.Uint64(r.Data[6:])
|
||||
|
||||
value := math.Float64frombits(xnum)
|
||||
fno := s.b.xfs[ixfe]
|
||||
rval, _ := s.b.nfmt.Apply(fno, value)
|
||||
@@ -272,11 +239,11 @@ func (s *WorkSheet) parse() error {
|
||||
//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
|
||||
|
||||
case RecTypeRK:
|
||||
var rowIndex, colIndex uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
|
||||
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
|
||||
rr := RkRec{}
|
||||
binary.Read(bb, binary.LittleEndian, &rr)
|
||||
rr.IXFCell = binary.LittleEndian.Uint16(r.Data[4:])
|
||||
rr.Value = RKNumber(binary.LittleEndian.Uint32(r.Data[6:]))
|
||||
|
||||
var rval interface{}
|
||||
if rr.Value.IsInteger() {
|
||||
@@ -290,10 +257,9 @@ func (s *WorkSheet) parse() error {
|
||||
//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
|
||||
|
||||
case RecTypeFormula:
|
||||
var ixfe uint16
|
||||
binary.Read(bb, binary.LittleEndian, &formulaRow)
|
||||
binary.Read(bb, binary.LittleEndian, &formulaCol)
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
formulaRow = binary.LittleEndian.Uint16(r.Data[:2])
|
||||
formulaCol = binary.LittleEndian.Uint16(r.Data[2:4])
|
||||
ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
|
||||
fdata := r.Data[6:]
|
||||
if fdata[6] == 0xFF && r.Data[7] == 0xFF {
|
||||
switch fdata[0] {
|
||||
@@ -319,8 +285,7 @@ func (s *WorkSheet) parse() error {
|
||||
log.Println("unknown formula value type")
|
||||
}
|
||||
} else {
|
||||
var xnum uint64
|
||||
binary.Read(bb, binary.LittleEndian, &xnum)
|
||||
xnum := binary.LittleEndian.Uint64(r.Data[6:])
|
||||
value := math.Float64frombits(xnum)
|
||||
fno := s.b.xfs[ixfe]
|
||||
rval, _ := s.b.nfmt.Apply(fno, value)
|
||||
@@ -329,16 +294,18 @@ func (s *WorkSheet) parse() error {
|
||||
//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)
|
||||
|
||||
case RecTypeString:
|
||||
var charCount uint16
|
||||
var flags byte
|
||||
binary.Read(bb, binary.LittleEndian, &charCount)
|
||||
binary.Read(bb, binary.LittleEndian, &flags)
|
||||
charCount := binary.LittleEndian.Uint16(r.Data[:2])
|
||||
flags := r.Data[2]
|
||||
fstr := ""
|
||||
if (flags & 1) == 0 {
|
||||
fstr = string(r.Data[3:])
|
||||
} else {
|
||||
raw := r.Data[3:]
|
||||
us := make([]uint16, charCount)
|
||||
binary.Read(bb, binary.LittleEndian, us)
|
||||
for i := 0; i < int(charCount); i++ {
|
||||
us[i] = binary.LittleEndian.Uint16(raw)
|
||||
raw = raw[2:]
|
||||
}
|
||||
fstr = string(utf16.Decode(us))
|
||||
}
|
||||
|
||||
@@ -353,9 +320,13 @@ func (s *WorkSheet) parse() error {
|
||||
if (r2.Data[0] & 1) == 0 {
|
||||
fstr += string(r2.Data[1:])
|
||||
} else {
|
||||
bb2 := bytes.NewReader(r2.Data[1:])
|
||||
us := make([]uint16, len(r2.Data)-1)
|
||||
binary.Read(bb2, binary.LittleEndian, us)
|
||||
raw := r2.Data[1:]
|
||||
slen := len(raw) / 2
|
||||
us := make([]uint16, slen)
|
||||
for i := 0; i < slen; i++ {
|
||||
us[i] = binary.LittleEndian.Uint16(raw)
|
||||
raw = raw[2:]
|
||||
}
|
||||
fstr += string(utf16.Decode(us))
|
||||
}
|
||||
ridx2++
|
||||
@@ -365,12 +336,10 @@ func (s *WorkSheet) parse() error {
|
||||
s.placeValue(int(formulaRow), int(formulaCol), fstr)
|
||||
|
||||
case RecTypeLabelSst:
|
||||
var rowIndex, colIndex, ixfe uint16
|
||||
var sstIndex uint32
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
binary.Read(bb, binary.LittleEndian, &sstIndex)
|
||||
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
|
||||
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
|
||||
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
|
||||
sstIndex := binary.LittleEndian.Uint32(r.Data[6:])
|
||||
if int(sstIndex) > len(s.b.strings) {
|
||||
return errors.New("xls: invalid sst index")
|
||||
}
|
||||
@@ -379,7 +348,10 @@ func (s *WorkSheet) parse() error {
|
||||
|
||||
case RecTypeHLink:
|
||||
loc := &shRef8{}
|
||||
binary.Read(bb, binary.LittleEndian, loc)
|
||||
loc.FirstRow = binary.LittleEndian.Uint16(r.Data[:2])
|
||||
loc.LastRow = binary.LittleEndian.Uint16(r.Data[2:4])
|
||||
loc.FirstCol = binary.LittleEndian.Uint16(r.Data[4:6])
|
||||
loc.LastCol = binary.LittleEndian.Uint16(r.Data[6:])
|
||||
if int(loc.FirstCol) > s.maxCol {
|
||||
//log.Println("invalid hyperlink column")
|
||||
continue
|
||||
@@ -395,7 +367,7 @@ func (s *WorkSheet) parse() error {
|
||||
loc.LastCol = uint16(s.maxCol)
|
||||
}
|
||||
|
||||
displayText, linkText, err := decodeHyperlinks(bb)
|
||||
displayText, linkText, err := decodeHyperlinks(r.Data[8:])
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
@@ -423,11 +395,16 @@ func (s *WorkSheet) parse() error {
|
||||
}
|
||||
|
||||
case RecTypeMergeCells:
|
||||
var cmcs uint16
|
||||
binary.Read(bb, binary.LittleEndian, &cmcs)
|
||||
mcRefs := make([]shRef8, cmcs)
|
||||
binary.Read(bb, binary.LittleEndian, &mcRefs)
|
||||
for _, loc := range mcRefs {
|
||||
cmcs := binary.LittleEndian.Uint16(r.Data[:2])
|
||||
raw := r.Data[2:]
|
||||
loc := shRef8{}
|
||||
for i := 0; i < int(cmcs); i++ {
|
||||
loc.FirstRow = binary.LittleEndian.Uint16(raw[:2])
|
||||
loc.LastRow = binary.LittleEndian.Uint16(raw[2:4])
|
||||
loc.FirstCol = binary.LittleEndian.Uint16(raw[4:6])
|
||||
loc.LastCol = binary.LittleEndian.Uint16(raw[6:])
|
||||
raw = raw[8:]
|
||||
|
||||
if loc.LastRow == 0xFFFF {
|
||||
loc.LastRow = uint16(s.maxRow)
|
||||
}
|
||||
@@ -454,16 +431,21 @@ func (s *WorkSheet) parse() error {
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
case RecTypeBlank, RecTypeMulBlank:
|
||||
// cells default value is blank, no need for these
|
||||
|
||||
case RecTypeContinue:
|
||||
// the only situation so far is when used in RecTypeString above
|
||||
case RecTypeContinue:
|
||||
// the only situation so far is when used in RecTypeString above
|
||||
|
||||
case RecTypeRow, RecTypeDimensions, RecTypeEOF, RecTypeWsBool:
|
||||
// handled in initial pass
|
||||
default:
|
||||
if grate.Debug {
|
||||
log.Println(" Unhandled sheet record type:", r.RecType, ridx)
|
||||
}
|
||||
case RecTypeRow, RecTypeDimensions, RecTypeEOF, RecTypeWsBool:
|
||||
// handled in initial pass
|
||||
|
||||
default:
|
||||
if grate.Debug {
|
||||
log.Println(" Unhandled sheet record type:", r.RecType, ridx)
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -485,6 +467,9 @@ func (s *WorkSheet) Strings() []string {
|
||||
currow := s.rows[s.iterRow]
|
||||
res := make([]string, len(currow.cols))
|
||||
for i, col := range currow.cols {
|
||||
if col == nil || col == "" {
|
||||
continue
|
||||
}
|
||||
res[i] = fmt.Sprint(col)
|
||||
}
|
||||
return res
|
||||
|
||||
+26
-35
@@ -9,64 +9,55 @@ import (
|
||||
)
|
||||
|
||||
// 2.5.240
|
||||
func decodeShortXLUnicodeString(r io.Reader) (string, error) {
|
||||
var cch, flags uint8
|
||||
err := binary.Read(r, binary.LittleEndian, &cch)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
err = binary.Read(r, binary.LittleEndian, &flags)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
func decodeShortXLUnicodeString(raw []byte) (string, int, error) {
|
||||
// identical to decodeXLUnicodeString except for cch=8bits instead of 16
|
||||
cch := int(raw[0])
|
||||
flags := raw[1]
|
||||
raw = raw[2:]
|
||||
|
||||
content := make([]uint16, cch)
|
||||
if (flags & 0x1) == 0 {
|
||||
// 16-bit characters but only the bottom 8bits
|
||||
contentBytes := make([]byte, cch)
|
||||
n, err2 := io.ReadFull(r, contentBytes)
|
||||
if n == 0 && err2 != io.ErrUnexpectedEOF {
|
||||
err = err2
|
||||
}
|
||||
contentBytes := raw[:cch]
|
||||
for i, x := range contentBytes {
|
||||
content[i] = uint16(x)
|
||||
}
|
||||
cch += 2 // to return the offset
|
||||
} else {
|
||||
// 16-bit characters
|
||||
err = binary.Read(r, binary.LittleEndian, content)
|
||||
for i := 0; i < cch; i++ {
|
||||
content[i] = binary.LittleEndian.Uint16(raw[:2])
|
||||
raw = raw[2:]
|
||||
}
|
||||
cch += cch + 2 // to return the offset
|
||||
}
|
||||
return string(utf16.Decode(content)), nil
|
||||
return string(utf16.Decode(content)), cch, nil
|
||||
}
|
||||
|
||||
// 2.5.294
|
||||
func decodeXLUnicodeString(r io.Reader) (string, error) {
|
||||
var cch uint16
|
||||
var flags uint8
|
||||
err := binary.Read(r, binary.LittleEndian, &cch)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
err = binary.Read(r, binary.LittleEndian, &flags)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
func decodeXLUnicodeString(raw []byte) (string, int, error) {
|
||||
// identical to decodeShortXLUnicodeString except for cch=16bits instead of 8
|
||||
cch := int(binary.LittleEndian.Uint16(raw[:2]))
|
||||
flags := raw[2]
|
||||
raw = raw[3:]
|
||||
|
||||
content := make([]uint16, cch)
|
||||
if (flags & 0x1) == 0 {
|
||||
// 16-bit characters but only the bottom 8bits
|
||||
contentBytes := make([]byte, cch)
|
||||
n, err2 := io.ReadFull(r, contentBytes)
|
||||
if n == 0 && err2 != io.ErrUnexpectedEOF {
|
||||
err = err2
|
||||
}
|
||||
contentBytes := raw[:cch]
|
||||
for i, x := range contentBytes {
|
||||
content[i] = uint16(x)
|
||||
}
|
||||
cch += 3 // to return the offset
|
||||
} else {
|
||||
// 16-bit characters
|
||||
err = binary.Read(r, binary.LittleEndian, content)
|
||||
for i := 0; i < cch; i++ {
|
||||
content[i] = binary.LittleEndian.Uint16(raw[:2])
|
||||
raw = raw[2:]
|
||||
}
|
||||
cch += cch + 3 // to return the offset
|
||||
}
|
||||
return string(utf16.Decode(content)), nil
|
||||
return string(utf16.Decode(content)), cch, nil
|
||||
}
|
||||
|
||||
// 2.5.293
|
||||
|
||||
+58
-105
@@ -7,7 +7,6 @@ package xls
|
||||
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
@@ -66,28 +65,29 @@ func Open(filename string) (grate.Source, error) {
|
||||
if err != nil {
|
||||
return nil, grate.WrapErr(err, grate.ErrNotInFormat)
|
||||
}
|
||||
err = b.loadFromStream(rdr)
|
||||
raw, err := io.ReadAll(rdr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = b.loadFromStream(raw)
|
||||
return b, err
|
||||
}
|
||||
|
||||
func (b *WorkBook) loadFromStream(r io.ReadSeeker) error {
|
||||
return b.loadFromStream2(r, false)
|
||||
func (b *WorkBook) loadFromStream(raw []byte) error {
|
||||
return b.loadFromStream2(raw, false)
|
||||
}
|
||||
|
||||
func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decryptor) error {
|
||||
func (b *WorkBook) loadFromStreamWithDecryptor(raw []byte, dec crypto.Decryptor) error {
|
||||
if grate.Debug {
|
||||
log.Println(" Decrypting xls stream with standard RC4")
|
||||
}
|
||||
_, err := r.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
log.Println("xls: dec-seek1 failed")
|
||||
return err
|
||||
}
|
||||
|
||||
pos := 0
|
||||
zeros := [8224]byte{}
|
||||
|
||||
type overlay struct {
|
||||
Pos int64
|
||||
Pos int
|
||||
|
||||
RecType recordType
|
||||
DataBytes uint16
|
||||
@@ -95,25 +95,13 @@ func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decry
|
||||
}
|
||||
replaceBlocks := []overlay{}
|
||||
|
||||
obuf := &bytes.Buffer{}
|
||||
for err == nil {
|
||||
var err error
|
||||
for err == nil && len(raw[pos:]) > 4 {
|
||||
o := overlay{}
|
||||
o.Pos, _ = r.Seek(0, io.SeekCurrent)
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &o.RecType)
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
continue
|
||||
}
|
||||
log.Println("xls: dec-read1 failed")
|
||||
return err
|
||||
}
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &o.DataBytes)
|
||||
if err != nil {
|
||||
log.Println("xls: dec-read2 failed")
|
||||
return err
|
||||
}
|
||||
o.Pos = pos
|
||||
o.RecType = recordType(binary.LittleEndian.Uint16(raw[pos : pos+2]))
|
||||
o.DataBytes = binary.LittleEndian.Uint16(raw[pos+2 : pos+4])
|
||||
pos += 4
|
||||
|
||||
// copy to output and decryption stream
|
||||
binary.Write(dec, binary.LittleEndian, o.RecType)
|
||||
@@ -122,35 +110,29 @@ func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decry
|
||||
|
||||
switch o.RecType {
|
||||
case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
|
||||
// copy original data into output
|
||||
o.Data = make([]byte, o.DataBytes)
|
||||
_, err = io.ReadFull(r, o.Data)
|
||||
if err != nil {
|
||||
log.Println("FAIL err", err)
|
||||
}
|
||||
// untouched data goes directly into output
|
||||
o.Data = raw[pos : pos+int(o.DataBytes)]
|
||||
pos += int(o.DataBytes)
|
||||
dec.Write(zeros[:int(o.DataBytes)])
|
||||
tocopy = 0
|
||||
|
||||
case RecTypeBoundSheet8:
|
||||
// copy 32-bit position to output
|
||||
o.Data = make([]byte, 4)
|
||||
_, err = io.ReadFull(r, o.Data)
|
||||
if err != nil {
|
||||
log.Println("FAIL err", err)
|
||||
}
|
||||
o.Data = raw[pos : pos+4]
|
||||
pos += 4
|
||||
dec.Write(zeros[:4])
|
||||
tocopy -= 4
|
||||
}
|
||||
|
||||
if tocopy > 0 {
|
||||
_, err = io.CopyN(dec, r, int64(tocopy))
|
||||
_, err = dec.Write(raw[pos : pos+tocopy])
|
||||
pos += tocopy
|
||||
}
|
||||
replaceBlocks = append(replaceBlocks, o)
|
||||
}
|
||||
dec.Flush()
|
||||
io.Copy(obuf, dec)
|
||||
|
||||
alldata := obuf.Bytes()
|
||||
alldata := dec.Bytes()
|
||||
for _, o := range replaceBlocks {
|
||||
offs := int(o.Pos)
|
||||
binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType))
|
||||
@@ -161,18 +143,21 @@ func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decry
|
||||
}
|
||||
}
|
||||
|
||||
return b.loadFromStream2(bytes.NewReader(alldata), true)
|
||||
return b.loadFromStream2(alldata, true)
|
||||
}
|
||||
|
||||
func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error {
|
||||
b.h = &header{}
|
||||
substr := -1
|
||||
nestedBOF := 0
|
||||
b.substreams = b.substreams[:0]
|
||||
b.pos2substream = make(map[int64]int, 10)
|
||||
b.fpos = 0
|
||||
nr, err := b.nextRecord(r)
|
||||
|
||||
rawfull := raw
|
||||
nr, no, err := b.nextRecord(raw)
|
||||
for err == nil {
|
||||
raw = raw[no:]
|
||||
switch nr.RecType {
|
||||
case RecTypeEOF:
|
||||
nestedBOF--
|
||||
@@ -196,7 +181,7 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
log.Println("xls: rc4 encryption failed to set up", err)
|
||||
return err
|
||||
}
|
||||
return b.loadFromStreamWithDecryptor(r, dec)
|
||||
return b.loadFromStreamWithDecryptor(rawfull, dec)
|
||||
case 2, 3, 4:
|
||||
log.Println("need Crypto API RC4 decryptor")
|
||||
return errors.New("xls: unsupported Crypto API encryption method")
|
||||
@@ -206,7 +191,7 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
}
|
||||
|
||||
b.substreams[substr] = append(b.substreams[substr], nr)
|
||||
nr, err = b.nextRecord(r)
|
||||
nr, no, err = b.nextRecord(raw)
|
||||
}
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
@@ -220,7 +205,7 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
log.Printf(" Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
|
||||
}
|
||||
for i, nr := range records {
|
||||
var bb io.Reader = bytes.NewReader(nr.Data)
|
||||
//var bb io.Reader = bytes.NewReader(nr.Data)
|
||||
|
||||
switch nr.RecType {
|
||||
case RecTypeSST:
|
||||
@@ -243,9 +228,12 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
// done
|
||||
|
||||
case RecTypeBOF:
|
||||
err = binary.Read(bb, binary.LittleEndian, b.h)
|
||||
if err != nil {
|
||||
return err
|
||||
b.h = &header{
|
||||
Version: binary.LittleEndian.Uint16(nr.Data[0:2]),
|
||||
DocType: binary.LittleEndian.Uint16(nr.Data[2:4]),
|
||||
RupBuild: binary.LittleEndian.Uint16(nr.Data[4:6]),
|
||||
RupYear: binary.LittleEndian.Uint16(nr.Data[6:8]),
|
||||
MiscBits: binary.LittleEndian.Uint64(nr.Data[8:16]),
|
||||
}
|
||||
|
||||
if b.h.Version != 0x0600 {
|
||||
@@ -261,21 +249,14 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
}
|
||||
|
||||
case RecTypeCodePage:
|
||||
err = binary.Read(bb, binary.LittleEndian, &b.codepage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.codepage = binary.LittleEndian.Uint16(nr.Data)
|
||||
|
||||
case RecTypeDate1904:
|
||||
err = binary.Read(bb, binary.LittleEndian, &b.dateMode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.dateMode = binary.LittleEndian.Uint16(nr.Data)
|
||||
|
||||
case RecTypeFormat:
|
||||
var fmtNo uint16
|
||||
err = binary.Read(bb, binary.LittleEndian, &fmtNo)
|
||||
formatStr, err := decodeXLUnicodeString(bb)
|
||||
fmtNo := binary.LittleEndian.Uint16(nr.Data)
|
||||
formatStr, _, err := decodeXLUnicodeString(nr.Data[2:])
|
||||
if err != nil {
|
||||
log.Println("fail2", err)
|
||||
return err
|
||||
@@ -283,28 +264,17 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
b.nfmt.Add(fmtNo, formatStr)
|
||||
|
||||
case RecTypeXF:
|
||||
var x, fmtNo uint16
|
||||
err = binary.Read(bb, binary.LittleEndian, &x) // ignore font
|
||||
err = binary.Read(bb, binary.LittleEndian, &fmtNo)
|
||||
// ignore font id at nr.Data[0:2]
|
||||
fmtNo := binary.LittleEndian.Uint16(nr.Data[2:])
|
||||
b.xfs = append(b.xfs, fmtNo)
|
||||
|
||||
case RecTypeBoundSheet8:
|
||||
bs := &boundSheet{}
|
||||
err = binary.Read(bb, binary.LittleEndian, &bs.Position)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
bs.Position = binary.LittleEndian.Uint32(nr.Data[:4])
|
||||
bs.HiddenState = nr.Data[4]
|
||||
bs.SheetType = nr.Data[5]
|
||||
|
||||
err = binary.Read(bb, binary.LittleEndian, &bs.HiddenState)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = binary.Read(bb, binary.LittleEndian, &bs.SheetType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
bs.Name, err = decodeShortXLUnicodeString(bb)
|
||||
bs.Name, _, err = decodeShortXLUnicodeString(nr.Data[6:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -320,31 +290,14 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
|
||||
var rt recordType
|
||||
var rs uint16
|
||||
err := binary.Read(r, binary.LittleEndian, &rt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
func (b *WorkBook) nextRecord(raw []byte) (*rec, int, error) {
|
||||
if len(raw) < 4 {
|
||||
return nil, 0, io.EOF
|
||||
}
|
||||
if rt == 0 {
|
||||
return nil, io.EOF
|
||||
rt := recordType(binary.LittleEndian.Uint16(raw[:2]))
|
||||
rs := binary.LittleEndian.Uint16(raw[2:4])
|
||||
if len(raw[4:]) < int(rs) {
|
||||
return nil, 4, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &rs)
|
||||
if rs > 8224 {
|
||||
return nil, errors.New("xls: invalid data format")
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data := make([]byte, rs)
|
||||
_, err = io.ReadFull(r, data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret := &rec{rt, rs, data}
|
||||
return ret, err
|
||||
return &rec{rt, rs, raw[4 : 4+rs]}, int(4 + rs), nil
|
||||
}
|
||||
|
||||
+3
-3
@@ -217,9 +217,6 @@ func (s *Sheet) placeValue(rowIndex, colIndex int, val interface{}) {
|
||||
// ensure we always have a complete matrix
|
||||
for len(s.rows) <= rowIndex {
|
||||
emptyRow := make([]interface{}, s.maxCol+1)
|
||||
for i := 0; i <= s.maxCol; i++ {
|
||||
emptyRow[i] = staticBlank
|
||||
}
|
||||
s.rows = append(s.rows, &row{emptyRow})
|
||||
}
|
||||
s.empty = false
|
||||
@@ -237,6 +234,9 @@ func (s *Sheet) Strings() []string {
|
||||
currow := s.rows[s.iterRow]
|
||||
res := make([]string, len(currow.cols))
|
||||
for i, col := range currow.cols {
|
||||
if col == nil || col == "" {
|
||||
continue
|
||||
}
|
||||
res[i] = fmt.Sprint(col)
|
||||
}
|
||||
return res
|
||||
|
||||
Reference in New Issue
Block a user