1
0
mirror of https://github.com/pbnjay/grate.git synced 2026-05-21 02:19:04 +02:00

more tweaks to memory usage in xls this time

did not reduce total allocations much (bytes.Reader is more efficient
than I thought), but reduced walltime from 99s to 55s for a large collection
This commit is contained in:
Jeremy Jay
2021-02-13 00:06:04 -05:00
parent f990af649d
commit a5be267bf7
9 changed files with 229 additions and 284 deletions
+3 -5
View File
@@ -42,11 +42,9 @@ func main() {
log.Fatal(err)
}
defer func() {
runtime.GC() // get up-to-date statistics
if err := pprof.WriteHeapProfile(f); err != nil {
log.Fatal("could not write memory profile: ", err)
}
f.Close() // error handling omitted for example
runtime.GC()
pprof.WriteHeapProfile(f)
f.Close()
}()
}
+3
View File
@@ -55,6 +55,9 @@ func addCommas(ff FmtFunc) FmtFunc {
}
func identFunc(x *Formatter, v interface{}) string {
if s, ok := v.(string); ok {
return s
}
return fmt.Sprint(v)
}
+3
View File
@@ -17,6 +17,9 @@ type Decryptor interface {
// Write implements the io.Writer interface.
Write(p []byte) (n int, err error)
// Bytes returns the decrypted data.
Bytes() []byte
// Flush tells the decryptor to decrypt the latest block.
Flush()
+4
View File
@@ -92,6 +92,10 @@ type rc4Writer struct {
Password []rune
}
func (d *rc4Writer) Bytes() []byte {
return d.buf.Bytes()
}
func (d *rc4Writer) Verify(everifier, everifierHash []byte) error {
d.Reset()
d.startBlock()
+54 -46
View File
@@ -4,48 +4,53 @@ import (
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"strings"
"unicode/utf16"
)
func decodeHyperlinks(r io.Reader) (displayText, linkText string, err error) {
var x uint64
binary.Read(r, binary.LittleEndian, &x) // skip and discard classid
binary.Read(r, binary.LittleEndian, &x)
var flags, slen uint32
binary.Read(r, binary.LittleEndian, &slen)
func decodeHyperlinks(raw []byte) (displayText, linkText string, err error) {
raw = raw[16:] // skip classid
slen := binary.LittleEndian.Uint32(raw[:4])
if slen != 2 {
return "", "", errors.New("xls: unknown hyperlink version")
}
binary.Read(r, binary.LittleEndian, &flags)
flags := binary.LittleEndian.Uint32(raw[4:8])
raw = raw[8:]
if (flags & hlstmfHasDisplayName) != 0 {
binary.Read(r, binary.LittleEndian, &slen)
slen = binary.LittleEndian.Uint32(raw[:4])
raw = raw[4:]
us := make([]uint16, slen)
binary.Read(r, binary.LittleEndian, us)
for i := 0; i < int(slen); i++ {
us[i] = binary.LittleEndian.Uint16(raw)
raw = raw[2:]
}
displayText = string(utf16.Decode(us))
}
if (flags & hlstmfHasFrameName) != 0 {
// skip a HyperlinkString containing target Frame
binary.Read(r, binary.LittleEndian, &slen)
io.CopyN(ioutil.Discard, r, int64(slen*2))
slen = binary.LittleEndian.Uint32(raw[:4])
raw = raw[4+(slen*2):]
}
if (flags & hlstmfHasMoniker) != 0 {
if (flags & hlstmfMonikerSavedAsStr) != 0 {
// read HyperlinkString containing the URL
binary.Read(r, binary.LittleEndian, &slen)
slen = binary.LittleEndian.Uint32(raw[:4])
raw = raw[4:]
us := make([]uint16, slen)
binary.Read(r, binary.LittleEndian, us)
for i := 0; i < int(slen); i++ {
us[i] = binary.LittleEndian.Uint16(raw)
raw = raw[2:]
}
linkText = string(utf16.Decode(us))
} else {
n := 0
var err error
linkText, err = parseHyperlinkMoniker(r)
linkText, n, err = parseHyperlinkMoniker(raw)
raw = raw[n:]
if err != nil {
return "", "", err
}
@@ -53,9 +58,13 @@ func decodeHyperlinks(r io.Reader) (displayText, linkText string, err error) {
}
if (flags & hlstmfHasLocationStr) != 0 {
binary.Read(r, binary.LittleEndian, &slen)
slen = binary.LittleEndian.Uint32(raw[:4])
raw = raw[4:]
us := make([]uint16, slen)
binary.Read(r, binary.LittleEndian, us)
for i := 0; i < int(slen); i++ {
us[i] = binary.LittleEndian.Uint16(raw)
raw = raw[2:]
}
linkText = string(utf16.Decode(us))
}
@@ -64,15 +73,9 @@ func decodeHyperlinks(r io.Reader) (displayText, linkText string, err error) {
return
}
func parseHyperlinkMoniker(r io.Reader) (string, error) {
var classid [16]byte
n, err := r.Read(classid[:])
if err != nil {
return "", err
}
if n != 16 {
return "", io.ErrShortBuffer
}
func parseHyperlinkMoniker(raw []byte) (string, int, error) {
classid := raw[:16]
no := 16
isURLMoniker := true
isFileMoniker := true
@@ -87,40 +90,45 @@ func parseHyperlinkMoniker(r io.Reader) (string, error) {
}
}
if isURLMoniker {
var length uint32
binary.Read(r, binary.LittleEndian, &length)
length := binary.LittleEndian.Uint32(raw[no:])
no += 4
length /= 2
buf := make([]uint16, length)
binary.Read(r, binary.LittleEndian, &buf)
for i := 0; i < int(length); i++ {
buf[i] = binary.LittleEndian.Uint16(raw[no:])
no += 2
}
if length > 12 && buf[length-13] == 0 {
buf = buf[:length-12]
}
return string(utf16.Decode(buf)), nil
return string(utf16.Decode(buf)), no, nil
}
if isFileMoniker {
var x uint16
var length uint32
binary.Read(r, binary.LittleEndian, &x) //cAnti
binary.Read(r, binary.LittleEndian, &length) //ansiLength
buf := make([]byte, length)
binary.Read(r, binary.LittleEndian, &buf)
//x := binary.LittleEndian.Uint16(raw[no:]) //cAnti
length := binary.LittleEndian.Uint32(raw[no+2:]) //ansiLength
no += 6
buf := raw[no : no+int(length)]
// skip 24 bytes for misc fixed properties
io.CopyN(ioutil.Discard, r, 24)
// skip 24 more bytes for misc fixed properties
no += int(length) + 24
binary.Read(r, binary.LittleEndian, &length) // cbUnicodePathSize
length = binary.LittleEndian.Uint32(raw[no:]) // cbUnicodePathSize
no += 4
if length > 0 {
io.CopyN(ioutil.Discard, r, 6)
no += 6
length -= 6
buf2 := make([]uint16, length/2)
binary.Read(r, binary.LittleEndian, &buf2)
return string(utf16.Decode(buf2)), nil
for i := 0; i < int(length/2); i++ {
buf2[i] = binary.LittleEndian.Uint16(raw[no:])
no += 2
}
return string(utf16.Decode(buf2)), no, nil
}
return string(buf), nil
return string(buf), no, nil
}
return "", fmt.Errorf("xls: unknown moniker classid")
return "", 0, fmt.Errorf("xls: unknown moniker classid")
}
// HLink flags
+75 -90
View File
@@ -1,7 +1,6 @@
package xls
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
@@ -91,9 +90,6 @@ func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
for i := 0; i <= s.maxCol; i++ {
emptyRow[i] = staticBlank
}
s.rows = append(s.rows, &row{emptyRow})
}
@@ -126,15 +122,11 @@ func (s *WorkSheet) parse() error {
}
case RecTypeDimensions:
bb := bytes.NewReader(r.Data)
var minRow, maxRow uint32
var minCol, maxCol uint16
// max = 0-based index of the row AFTER the last valid index
binary.Read(bb, binary.LittleEndian, &minRow)
binary.Read(bb, binary.LittleEndian, &maxRow) // max = 0x010000
binary.Read(bb, binary.LittleEndian, &minCol)
binary.Read(bb, binary.LittleEndian, &maxCol) // max = 0x000100
minRow := binary.LittleEndian.Uint32(r.Data[:4])
maxRow := binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000
minCol := binary.LittleEndian.Uint16(r.Data[8:10])
maxCol := binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100
if grate.Debug {
log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)",
minCol, minRow, maxCol, maxRow)
@@ -153,18 +145,8 @@ func (s *WorkSheet) parse() error {
s.empty = true
} else {
// pre-allocate cells
s.placeValue(s.maxRow, s.maxCol, staticBlank)
s.placeValue(s.maxRow, s.maxCol, nil)
}
case RecTypeRow:
bb := bytes.NewReader(r.Data)
row := &shRow{}
binary.Read(bb, binary.LittleEndian, row)
if (row.Reserved & 0xFFFF) != 0 {
log.Println("invalid Row spec")
continue
}
//log.Printf("row spec: %+v", *row)
}
}
inSubstream = 0
@@ -180,8 +162,6 @@ func (s *WorkSheet) parse() error {
continue
}
bb := bytes.NewReader(r.Data)
// sec 2.1.7.20.6 Common Productions ABNF:
/*
CELLTABLE = 1*(1*Row *CELL 1*DBCell) *EntExU2
@@ -199,17 +179,11 @@ func (s *WorkSheet) parse() error {
inSubstream++
continue
}
case RecTypeBlank:
var rowIndex, colIndex uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
//log.Printf("blank spec: %d %d", rowIndex, colIndex)
case RecTypeBoolErr:
var rowIndex, colIndex, ixfe uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
binary.Read(bb, binary.LittleEndian, &ixfe)
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
if r.Data[7] == 0 {
bv := false
if r.Data[6] == 1 {
@@ -226,22 +200,17 @@ func (s *WorkSheet) parse() error {
//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
}
case RecTypeMulBlank:
var rowIndex, firstCol uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &firstCol)
// nrk := int((r.RecSize - 6) / 6)
// log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
case RecTypeMulRk:
mr := &shMulRK{}
nrk := int((r.RecSize - 6) / 6)
binary.Read(bb, binary.LittleEndian, &mr.RowIndex)
binary.Read(bb, binary.LittleEndian, &mr.FirstCol)
mr.RowIndex = binary.LittleEndian.Uint16(r.Data[:2])
mr.FirstCol = binary.LittleEndian.Uint16(r.Data[2:4])
mr.Values = make([]RkRec, nrk)
for i := 0; i < nrk; i++ {
off := 4 + i*6
rr := RkRec{}
binary.Read(bb, binary.LittleEndian, &rr)
rr.IXFCell = binary.LittleEndian.Uint16(r.Data[off:])
rr.Value = RKNumber(binary.LittleEndian.Uint32(r.Data[off:]))
mr.Values[i] = rr
var rval interface{}
@@ -254,16 +223,14 @@ func (s *WorkSheet) parse() error {
}
s.placeValue(int(mr.RowIndex), int(mr.FirstCol)+i, rval)
}
binary.Read(bb, binary.LittleEndian, &mr.LastCol)
//log.Printf("mulrow spec: %+v", *mr)
case RecTypeNumber:
var rowIndex, colIndex, ixfe uint16
var xnum uint64
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
binary.Read(bb, binary.LittleEndian, &ixfe)
binary.Read(bb, binary.LittleEndian, &xnum)
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
xnum := binary.LittleEndian.Uint64(r.Data[6:])
value := math.Float64frombits(xnum)
fno := s.b.xfs[ixfe]
rval, _ := s.b.nfmt.Apply(fno, value)
@@ -272,11 +239,11 @@ func (s *WorkSheet) parse() error {
//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
case RecTypeRK:
var rowIndex, colIndex uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
rr := RkRec{}
binary.Read(bb, binary.LittleEndian, &rr)
rr.IXFCell = binary.LittleEndian.Uint16(r.Data[4:])
rr.Value = RKNumber(binary.LittleEndian.Uint32(r.Data[6:]))
var rval interface{}
if rr.Value.IsInteger() {
@@ -290,10 +257,9 @@ func (s *WorkSheet) parse() error {
//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
case RecTypeFormula:
var ixfe uint16
binary.Read(bb, binary.LittleEndian, &formulaRow)
binary.Read(bb, binary.LittleEndian, &formulaCol)
binary.Read(bb, binary.LittleEndian, &ixfe)
formulaRow = binary.LittleEndian.Uint16(r.Data[:2])
formulaCol = binary.LittleEndian.Uint16(r.Data[2:4])
ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
fdata := r.Data[6:]
if fdata[6] == 0xFF && r.Data[7] == 0xFF {
switch fdata[0] {
@@ -319,8 +285,7 @@ func (s *WorkSheet) parse() error {
log.Println("unknown formula value type")
}
} else {
var xnum uint64
binary.Read(bb, binary.LittleEndian, &xnum)
xnum := binary.LittleEndian.Uint64(r.Data[6:])
value := math.Float64frombits(xnum)
fno := s.b.xfs[ixfe]
rval, _ := s.b.nfmt.Apply(fno, value)
@@ -329,16 +294,18 @@ func (s *WorkSheet) parse() error {
//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)
case RecTypeString:
var charCount uint16
var flags byte
binary.Read(bb, binary.LittleEndian, &charCount)
binary.Read(bb, binary.LittleEndian, &flags)
charCount := binary.LittleEndian.Uint16(r.Data[:2])
flags := r.Data[2]
fstr := ""
if (flags & 1) == 0 {
fstr = string(r.Data[3:])
} else {
raw := r.Data[3:]
us := make([]uint16, charCount)
binary.Read(bb, binary.LittleEndian, us)
for i := 0; i < int(charCount); i++ {
us[i] = binary.LittleEndian.Uint16(raw)
raw = raw[2:]
}
fstr = string(utf16.Decode(us))
}
@@ -353,9 +320,13 @@ func (s *WorkSheet) parse() error {
if (r2.Data[0] & 1) == 0 {
fstr += string(r2.Data[1:])
} else {
bb2 := bytes.NewReader(r2.Data[1:])
us := make([]uint16, len(r2.Data)-1)
binary.Read(bb2, binary.LittleEndian, us)
raw := r2.Data[1:]
slen := len(raw) / 2
us := make([]uint16, slen)
for i := 0; i < slen; i++ {
us[i] = binary.LittleEndian.Uint16(raw)
raw = raw[2:]
}
fstr += string(utf16.Decode(us))
}
ridx2++
@@ -365,12 +336,10 @@ func (s *WorkSheet) parse() error {
s.placeValue(int(formulaRow), int(formulaCol), fstr)
case RecTypeLabelSst:
var rowIndex, colIndex, ixfe uint16
var sstIndex uint32
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
binary.Read(bb, binary.LittleEndian, &ixfe)
binary.Read(bb, binary.LittleEndian, &sstIndex)
rowIndex := binary.LittleEndian.Uint16(r.Data[:2])
colIndex := binary.LittleEndian.Uint16(r.Data[2:4])
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
sstIndex := binary.LittleEndian.Uint32(r.Data[6:])
if int(sstIndex) > len(s.b.strings) {
return errors.New("xls: invalid sst index")
}
@@ -379,7 +348,10 @@ func (s *WorkSheet) parse() error {
case RecTypeHLink:
loc := &shRef8{}
binary.Read(bb, binary.LittleEndian, loc)
loc.FirstRow = binary.LittleEndian.Uint16(r.Data[:2])
loc.LastRow = binary.LittleEndian.Uint16(r.Data[2:4])
loc.FirstCol = binary.LittleEndian.Uint16(r.Data[4:6])
loc.LastCol = binary.LittleEndian.Uint16(r.Data[6:])
if int(loc.FirstCol) > s.maxCol {
//log.Println("invalid hyperlink column")
continue
@@ -395,7 +367,7 @@ func (s *WorkSheet) parse() error {
loc.LastCol = uint16(s.maxCol)
}
displayText, linkText, err := decodeHyperlinks(bb)
displayText, linkText, err := decodeHyperlinks(r.Data[8:])
if err != nil {
log.Println(err)
continue
@@ -423,11 +395,16 @@ func (s *WorkSheet) parse() error {
}
case RecTypeMergeCells:
var cmcs uint16
binary.Read(bb, binary.LittleEndian, &cmcs)
mcRefs := make([]shRef8, cmcs)
binary.Read(bb, binary.LittleEndian, &mcRefs)
for _, loc := range mcRefs {
cmcs := binary.LittleEndian.Uint16(r.Data[:2])
raw := r.Data[2:]
loc := shRef8{}
for i := 0; i < int(cmcs); i++ {
loc.FirstRow = binary.LittleEndian.Uint16(raw[:2])
loc.LastRow = binary.LittleEndian.Uint16(raw[2:4])
loc.FirstCol = binary.LittleEndian.Uint16(raw[4:6])
loc.LastCol = binary.LittleEndian.Uint16(raw[6:])
raw = raw[8:]
if loc.LastRow == 0xFFFF {
loc.LastRow = uint16(s.maxRow)
}
@@ -454,16 +431,21 @@ func (s *WorkSheet) parse() error {
}
}
}
/*
case RecTypeBlank, RecTypeMulBlank:
// cells default value is blank, no need for these
case RecTypeContinue:
// the only situation so far is when used in RecTypeString above
case RecTypeContinue:
// the only situation so far is when used in RecTypeString above
case RecTypeRow, RecTypeDimensions, RecTypeEOF, RecTypeWsBool:
// handled in initial pass
default:
if grate.Debug {
log.Println(" Unhandled sheet record type:", r.RecType, ridx)
}
case RecTypeRow, RecTypeDimensions, RecTypeEOF, RecTypeWsBool:
// handled in initial pass
default:
if grate.Debug {
log.Println(" Unhandled sheet record type:", r.RecType, ridx)
}
*/
}
}
return nil
@@ -485,6 +467,9 @@ func (s *WorkSheet) Strings() []string {
currow := s.rows[s.iterRow]
res := make([]string, len(currow.cols))
for i, col := range currow.cols {
if col == nil || col == "" {
continue
}
res[i] = fmt.Sprint(col)
}
return res
+26 -35
View File
@@ -9,64 +9,55 @@ import (
)
// 2.5.240
func decodeShortXLUnicodeString(r io.Reader) (string, error) {
var cch, flags uint8
err := binary.Read(r, binary.LittleEndian, &cch)
if err != nil {
return "", err
}
err = binary.Read(r, binary.LittleEndian, &flags)
if err != nil {
return "", err
}
func decodeShortXLUnicodeString(raw []byte) (string, int, error) {
// identical to decodeXLUnicodeString except for cch=8bits instead of 16
cch := int(raw[0])
flags := raw[1]
raw = raw[2:]
content := make([]uint16, cch)
if (flags & 0x1) == 0 {
// 16-bit characters but only the bottom 8bits
contentBytes := make([]byte, cch)
n, err2 := io.ReadFull(r, contentBytes)
if n == 0 && err2 != io.ErrUnexpectedEOF {
err = err2
}
contentBytes := raw[:cch]
for i, x := range contentBytes {
content[i] = uint16(x)
}
cch += 2 // to return the offset
} else {
// 16-bit characters
err = binary.Read(r, binary.LittleEndian, content)
for i := 0; i < cch; i++ {
content[i] = binary.LittleEndian.Uint16(raw[:2])
raw = raw[2:]
}
cch += cch + 2 // to return the offset
}
return string(utf16.Decode(content)), nil
return string(utf16.Decode(content)), cch, nil
}
// 2.5.294
func decodeXLUnicodeString(r io.Reader) (string, error) {
var cch uint16
var flags uint8
err := binary.Read(r, binary.LittleEndian, &cch)
if err != nil {
return "", err
}
err = binary.Read(r, binary.LittleEndian, &flags)
if err != nil {
return "", err
}
func decodeXLUnicodeString(raw []byte) (string, int, error) {
// identical to decodeShortXLUnicodeString except for cch=16bits instead of 8
cch := int(binary.LittleEndian.Uint16(raw[:2]))
flags := raw[2]
raw = raw[3:]
content := make([]uint16, cch)
if (flags & 0x1) == 0 {
// 16-bit characters but only the bottom 8bits
contentBytes := make([]byte, cch)
n, err2 := io.ReadFull(r, contentBytes)
if n == 0 && err2 != io.ErrUnexpectedEOF {
err = err2
}
contentBytes := raw[:cch]
for i, x := range contentBytes {
content[i] = uint16(x)
}
cch += 3 // to return the offset
} else {
// 16-bit characters
err = binary.Read(r, binary.LittleEndian, content)
for i := 0; i < cch; i++ {
content[i] = binary.LittleEndian.Uint16(raw[:2])
raw = raw[2:]
}
cch += cch + 3 // to return the offset
}
return string(utf16.Decode(content)), nil
return string(utf16.Decode(content)), cch, nil
}
// 2.5.293
+58 -105
View File
@@ -7,7 +7,6 @@ package xls
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06
import (
"bytes"
"context"
"encoding/binary"
"errors"
@@ -66,28 +65,29 @@ func Open(filename string) (grate.Source, error) {
if err != nil {
return nil, grate.WrapErr(err, grate.ErrNotInFormat)
}
err = b.loadFromStream(rdr)
raw, err := io.ReadAll(rdr)
if err != nil {
return nil, err
}
err = b.loadFromStream(raw)
return b, err
}
func (b *WorkBook) loadFromStream(r io.ReadSeeker) error {
return b.loadFromStream2(r, false)
func (b *WorkBook) loadFromStream(raw []byte) error {
return b.loadFromStream2(raw, false)
}
func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decryptor) error {
func (b *WorkBook) loadFromStreamWithDecryptor(raw []byte, dec crypto.Decryptor) error {
if grate.Debug {
log.Println(" Decrypting xls stream with standard RC4")
}
_, err := r.Seek(0, io.SeekStart)
if err != nil {
log.Println("xls: dec-seek1 failed")
return err
}
pos := 0
zeros := [8224]byte{}
type overlay struct {
Pos int64
Pos int
RecType recordType
DataBytes uint16
@@ -95,25 +95,13 @@ func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decry
}
replaceBlocks := []overlay{}
obuf := &bytes.Buffer{}
for err == nil {
var err error
for err == nil && len(raw[pos:]) > 4 {
o := overlay{}
o.Pos, _ = r.Seek(0, io.SeekCurrent)
err = binary.Read(r, binary.LittleEndian, &o.RecType)
if err != nil {
if err == io.EOF {
continue
}
log.Println("xls: dec-read1 failed")
return err
}
err = binary.Read(r, binary.LittleEndian, &o.DataBytes)
if err != nil {
log.Println("xls: dec-read2 failed")
return err
}
o.Pos = pos
o.RecType = recordType(binary.LittleEndian.Uint16(raw[pos : pos+2]))
o.DataBytes = binary.LittleEndian.Uint16(raw[pos+2 : pos+4])
pos += 4
// copy to output and decryption stream
binary.Write(dec, binary.LittleEndian, o.RecType)
@@ -122,35 +110,29 @@ func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decry
switch o.RecType {
case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
// copy original data into output
o.Data = make([]byte, o.DataBytes)
_, err = io.ReadFull(r, o.Data)
if err != nil {
log.Println("FAIL err", err)
}
// untouched data goes directly into output
o.Data = raw[pos : pos+int(o.DataBytes)]
pos += int(o.DataBytes)
dec.Write(zeros[:int(o.DataBytes)])
tocopy = 0
case RecTypeBoundSheet8:
// copy 32-bit position to output
o.Data = make([]byte, 4)
_, err = io.ReadFull(r, o.Data)
if err != nil {
log.Println("FAIL err", err)
}
o.Data = raw[pos : pos+4]
pos += 4
dec.Write(zeros[:4])
tocopy -= 4
}
if tocopy > 0 {
_, err = io.CopyN(dec, r, int64(tocopy))
_, err = dec.Write(raw[pos : pos+tocopy])
pos += tocopy
}
replaceBlocks = append(replaceBlocks, o)
}
dec.Flush()
io.Copy(obuf, dec)
alldata := obuf.Bytes()
alldata := dec.Bytes()
for _, o := range replaceBlocks {
offs := int(o.Pos)
binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType))
@@ -161,18 +143,21 @@ func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decry
}
}
return b.loadFromStream2(bytes.NewReader(alldata), true)
return b.loadFromStream2(alldata, true)
}
func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error {
b.h = &header{}
substr := -1
nestedBOF := 0
b.substreams = b.substreams[:0]
b.pos2substream = make(map[int64]int, 10)
b.fpos = 0
nr, err := b.nextRecord(r)
rawfull := raw
nr, no, err := b.nextRecord(raw)
for err == nil {
raw = raw[no:]
switch nr.RecType {
case RecTypeEOF:
nestedBOF--
@@ -196,7 +181,7 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
log.Println("xls: rc4 encryption failed to set up", err)
return err
}
return b.loadFromStreamWithDecryptor(r, dec)
return b.loadFromStreamWithDecryptor(rawfull, dec)
case 2, 3, 4:
log.Println("need Crypto API RC4 decryptor")
return errors.New("xls: unsupported Crypto API encryption method")
@@ -206,7 +191,7 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
}
b.substreams[substr] = append(b.substreams[substr], nr)
nr, err = b.nextRecord(r)
nr, no, err = b.nextRecord(raw)
}
if err == io.EOF {
err = nil
@@ -220,7 +205,7 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
log.Printf(" Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
}
for i, nr := range records {
var bb io.Reader = bytes.NewReader(nr.Data)
//var bb io.Reader = bytes.NewReader(nr.Data)
switch nr.RecType {
case RecTypeSST:
@@ -243,9 +228,12 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
// done
case RecTypeBOF:
err = binary.Read(bb, binary.LittleEndian, b.h)
if err != nil {
return err
b.h = &header{
Version: binary.LittleEndian.Uint16(nr.Data[0:2]),
DocType: binary.LittleEndian.Uint16(nr.Data[2:4]),
RupBuild: binary.LittleEndian.Uint16(nr.Data[4:6]),
RupYear: binary.LittleEndian.Uint16(nr.Data[6:8]),
MiscBits: binary.LittleEndian.Uint64(nr.Data[8:16]),
}
if b.h.Version != 0x0600 {
@@ -261,21 +249,14 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
}
case RecTypeCodePage:
err = binary.Read(bb, binary.LittleEndian, &b.codepage)
if err != nil {
return err
}
b.codepage = binary.LittleEndian.Uint16(nr.Data)
case RecTypeDate1904:
err = binary.Read(bb, binary.LittleEndian, &b.dateMode)
if err != nil {
return err
}
b.dateMode = binary.LittleEndian.Uint16(nr.Data)
case RecTypeFormat:
var fmtNo uint16
err = binary.Read(bb, binary.LittleEndian, &fmtNo)
formatStr, err := decodeXLUnicodeString(bb)
fmtNo := binary.LittleEndian.Uint16(nr.Data)
formatStr, _, err := decodeXLUnicodeString(nr.Data[2:])
if err != nil {
log.Println("fail2", err)
return err
@@ -283,28 +264,17 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
b.nfmt.Add(fmtNo, formatStr)
case RecTypeXF:
var x, fmtNo uint16
err = binary.Read(bb, binary.LittleEndian, &x) // ignore font
err = binary.Read(bb, binary.LittleEndian, &fmtNo)
// ignore font id at nr.Data[0:2]
fmtNo := binary.LittleEndian.Uint16(nr.Data[2:])
b.xfs = append(b.xfs, fmtNo)
case RecTypeBoundSheet8:
bs := &boundSheet{}
err = binary.Read(bb, binary.LittleEndian, &bs.Position)
if err != nil {
return err
}
bs.Position = binary.LittleEndian.Uint32(nr.Data[:4])
bs.HiddenState = nr.Data[4]
bs.SheetType = nr.Data[5]
err = binary.Read(bb, binary.LittleEndian, &bs.HiddenState)
if err != nil {
return err
}
err = binary.Read(bb, binary.LittleEndian, &bs.SheetType)
if err != nil {
return err
}
bs.Name, err = decodeShortXLUnicodeString(bb)
bs.Name, _, err = decodeShortXLUnicodeString(nr.Data[6:])
if err != nil {
return err
}
@@ -320,31 +290,14 @@ func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
return err
}
func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
var rt recordType
var rs uint16
err := binary.Read(r, binary.LittleEndian, &rt)
if err != nil {
return nil, err
func (b *WorkBook) nextRecord(raw []byte) (*rec, int, error) {
if len(raw) < 4 {
return nil, 0, io.EOF
}
if rt == 0 {
return nil, io.EOF
rt := recordType(binary.LittleEndian.Uint16(raw[:2]))
rs := binary.LittleEndian.Uint16(raw[2:4])
if len(raw[4:]) < int(rs) {
return nil, 4, io.ErrUnexpectedEOF
}
err = binary.Read(r, binary.LittleEndian, &rs)
if rs > 8224 {
return nil, errors.New("xls: invalid data format")
}
if err != nil {
return nil, err
}
data := make([]byte, rs)
_, err = io.ReadFull(r, data)
if err != nil {
return nil, err
}
ret := &rec{rt, rs, data}
return ret, err
return &rec{rt, rs, raw[4 : 4+rs]}, int(4 + rs), nil
}
+3 -3
View File
@@ -217,9 +217,6 @@ func (s *Sheet) placeValue(rowIndex, colIndex int, val interface{}) {
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
for i := 0; i <= s.maxCol; i++ {
emptyRow[i] = staticBlank
}
s.rows = append(s.rows, &row{emptyRow})
}
s.empty = false
@@ -237,6 +234,9 @@ func (s *Sheet) Strings() []string {
currow := s.rows[s.iterRow]
res := make([]string, len(currow.cols))
for i, col := range currow.cols {
if col == nil || col == "" {
continue
}
res[i] = fmt.Sprint(col)
}
return res