1
0
mirror of https://github.com/pbnjay/grate.git synced 2024-12-13 13:58:27 +02:00

many bugfixes and edge cases, impl most cell types

This commit is contained in:
Jeremy Jay 2021-02-08 11:02:37 -05:00
parent f794a5ef9b
commit 80c3b4cc81
8 changed files with 372 additions and 169 deletions

View File

@ -23,21 +23,27 @@ func main() {
sanitize := regexp.MustCompile("[^a-zA-Z0-9]+")
newlines := regexp.MustCompile("[ \n\r\t]+")
for _, fn := range flag.Args() {
log.Printf("Opening file '%s' ...", fn)
wb, err := xls.Open(context.Background(), fn)
if err != nil {
log.Fatal(err)
log.Println(err)
continue
}
log.Println(fn)
ext := filepath.Ext(fn)
fn2 := filepath.Base(strings.TrimSuffix(fn, ext))
for _, s := range wb.Sheets() {
log.Printf(" Opening Sheet '%s'...", s)
sheet, err := wb.Get(s)
if err != nil {
log.Println(err)
continue
}
if sheet.IsEmpty() {
log.Println(" Empty sheet. Skipping.")
continue
}
s2 := sanitize.ReplaceAllString(s, "_")
f, err := os.Create(fn2 + "." + s2 + ".tsv")
if err != nil {
@ -61,7 +67,6 @@ func main() {
}
if nonblank || !*skipBlanks {
fmt.Fprintln(f, strings.Join(row, "\t"))
f.Sync()
}
}
f.Close()

View File

@ -12,6 +12,7 @@ import (
"errors"
"io"
"io/ioutil"
"log"
"unicode/utf16"
)
@ -122,7 +123,8 @@ func (d *doc) load(rx io.ReadSeeker) error {
panic("unknown major version")
}
if h.MinorVersion != 0x3E {
panic("unknown minor version")
log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion)
//panic("unknown minor version")
}
for _, v := range h.Reserved1 {
@ -186,6 +188,9 @@ func (d *doc) load(rx io.ReadSeeker) error {
}
offs := int64(1+sid2) << int32(h.SectorShift)
if offs > int64(len(d.data)) {
return errors.New("xls/cfb: unable to load file")
}
sector := d.data[offs:]
for j := 0; j < numFATentries; j++ {
sid3 := le.Uint32(sector)
@ -246,7 +251,7 @@ func (d *doc) buildDirs(br *bytes.Reader) error {
d.ministreamstart = uint32(dirent.StartingSectorLocation)
d.ministreamsize = uint32(dirent.StreamSize)
case typeStorage:
panic("got a storage? what to do now?")
//log.Println("got a storage? what to do now?")
case typeStream:
/*
var freader io.Reader

View File

@ -51,6 +51,10 @@ func (s *SliceReader) Seek(offset int64, whence int) (int64, error) {
}
// current offset in stream
trueOffset := int64(s.Offset) + s.CSize[int(s.Index)]
if offset == 0 && whence == io.SeekCurrent {
// just asking for current position
return trueOffset, nil
}
switch whence {
case io.SeekStart:

View File

@ -41,8 +41,10 @@ func (d *rc4Writer) Reset() {
func (d *rc4Writer) Flush() {
var zeros [1024]byte
endpad := 0
if d.offset < 1024 {
d.offset += copy(d.bytes[d.offset:], zeros[:])
endpad = copy(d.bytes[d.offset:], zeros[:])
d.offset += endpad
}
if d.offset != 1024 {
panic("invalid offset fill")
@ -51,7 +53,7 @@ func (d *rc4Writer) Flush() {
// decrypt and write results to output buffer
d.startBlock()
d.dec.XORKeyStream(d.bytes[:], d.bytes[:])
d.buf.Write(d.bytes[:])
d.buf.Write(d.bytes[:1024-endpad])
d.offset = 0
d.block++

View File

@ -12,9 +12,11 @@ import (
)
func (b *WorkBook) Sheets() []string {
res := make([]string, len(b.sheets))
for i, s := range b.sheets {
res[i] = s.Name
res := make([]string, 0, len(b.sheets))
for _, s := range b.sheets {
if (s.HiddenState & 0x03) == 0 {
res = append(res, s.Name)
}
}
return res
}
@ -40,6 +42,7 @@ type WorkSheet struct {
rows []*row
maxcol int
empty bool
iterRow int
}
@ -79,30 +82,91 @@ func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
s.rows[rowIndex].cols[colIndex] = val
}
func (s *WorkSheet) IsEmpty() bool {
return s.empty
}
func (s *WorkSheet) parse() error {
var minRow, maxRow uint32
var minCol, maxCol uint16
for _, r := range s.b.substreams[s.ss] {
if r.RecType == RecTypeWsBool {
if (r.Data[1] & 0x10) != 0 {
// it's a dialog
return nil
}
}
}
var formulaRow, formulaCol uint16
for _, r := range s.b.substreams[s.ss] {
bb := bytes.NewReader(r.Data)
switch r.RecType {
case RecTypeWindow2:
opts := binary.LittleEndian.Uint16(r.Data)
// right-to-left = 0x40, selected = 0x400
log.Printf("sheet options: %x", opts)
//case RecTypeWindow2:
//opts := binary.LittleEndian.Uint16(r.Data)
// right-to-left = 0x40, selected = 0x400
//log.Printf("sheet options: %x", opts)
case RecTypeDimensions:
binary.Read(bb, binary.LittleEndian, &minRow)
binary.Read(bb, binary.LittleEndian, &maxRow)
binary.Read(bb, binary.LittleEndian, &minCol)
binary.Read(bb, binary.LittleEndian, &maxCol)
//log.Printf("dimensions: %d,%d + %dx%d", minRow&0x0000FFFF, minCol,
// (maxRow&0x0000FFFF)-(minRow&0x0000FFFF), maxCol-minCol)
if minRow > 0x0000FFFF || maxRow > 0x00010000 {
log.Println("invalid dimensions")
}
if minCol > 0x00FF || maxCol > 0x0100 {
log.Println("invalid dimensions")
}
if (maxRow-minRow) == 0 && (maxCol-minCol) == 0 {
s.empty = true
}
case RecTypeRow:
row := &shRow{}
binary.Read(bb, binary.LittleEndian, row)
log.Printf("row spec: %+v", *row)
if (row.Reserved & 0xFFFF) != 0 {
log.Println("invalid Row spec")
continue
}
//log.Printf("row spec: %+v", *row)
case RecTypeBlank:
var rowIndex, colIndex uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
log.Printf("blank spec: %d %d", rowIndex, colIndex)
//log.Printf("blank spec: %d %d", rowIndex, colIndex)
case RecTypeBoolErr:
var rowIndex, colIndex, ixfe uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
binary.Read(bb, binary.LittleEndian, &ixfe)
if r.Data[7] == 0 {
bv := false
if r.Data[6] == 1 {
bv = true
}
s.placeValue(int(rowIndex), int(colIndex), bv)
//log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv)
} else {
be, ok := berrLookup[r.Data[6]]
if !ok {
be = "<unknown error>"
}
s.placeValue(int(rowIndex), int(colIndex), be)
//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
}
case RecTypeMulBlank:
var rowIndex, firstCol uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &firstCol)
nrk := int((r.RecSize - 6) / 6)
log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
// nrk := int((r.RecSize - 6) / 6)
// log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
case RecTypeMulRk:
mr := &shMulRK{}
nrk := int((r.RecSize - 6) / 6)
@ -123,8 +187,7 @@ func (s *WorkSheet) parse() error {
s.placeValue(int(mr.RowIndex), int(mr.FirstCol)+i, rval)
}
binary.Read(bb, binary.LittleEndian, &mr.LastCol)
log.Printf("mulrow spec: %+v", *mr)
//log.Printf("mulrow spec: %+v", *mr)
case RecTypeNumber:
var rowIndex, colIndex, ixfe uint16
@ -135,7 +198,7 @@ func (s *WorkSheet) parse() error {
binary.Read(bb, binary.LittleEndian, &xnum)
value := math.Float64frombits(xnum)
s.placeValue(int(rowIndex), int(colIndex), value)
log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
case RecTypeRK:
var rowIndex, colIndex uint16
@ -151,28 +214,59 @@ func (s *WorkSheet) parse() error {
rval = rr.Value.Float64()
}
s.placeValue(int(rowIndex), int(colIndex), rval)
log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
case RecTypeFormula:
var rowIndex, colIndex uint16
binary.Read(bb, binary.LittleEndian, &rowIndex)
binary.Read(bb, binary.LittleEndian, &colIndex)
log.Printf("formula spec: %d %d ~~ %+v", rowIndex, colIndex, r.Data)
var ixfe uint16
binary.Read(bb, binary.LittleEndian, &formulaRow)
binary.Read(bb, binary.LittleEndian, &formulaCol)
binary.Read(bb, binary.LittleEndian, &ixfe)
fdata := r.Data[6:]
if fdata[6] == 0xFF && r.Data[7] == 0xFF {
switch fdata[0] {
case 0:
// string in next record
case 1:
// boolean
bv := false
if fdata[2] != 0 {
bv = true
}
s.placeValue(int(formulaRow), int(formulaCol), bv)
case 2:
// error value
be, ok := berrLookup[fdata[2]]
if !ok {
be = "<unknown error>"
}
s.placeValue(int(formulaRow), int(formulaCol), be)
case 3:
// blank string
default:
log.Println("unknown formula value type")
}
} else {
var xnum uint64
binary.Read(bb, binary.LittleEndian, &xnum)
value := math.Float64frombits(xnum)
s.placeValue(int(formulaRow), int(formulaCol), value)
}
//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)
case RecTypeString:
var charCount, flags uint16
var charCount uint16
var flags byte
binary.Read(bb, binary.LittleEndian, &charCount)
binary.Read(bb, binary.LittleEndian, &flags)
s := ""
fstr := ""
if (flags & 1) == 0 {
s = string(r.Data[4:])
fstr = string(r.Data[3:])
} else {
us := make([]uint16, charCount)
binary.Read(bb, binary.LittleEndian, us)
s = string(utf16.Decode(us))
fstr = string(utf16.Decode(us))
}
log.Printf("string spec: = %s", s)
s.placeValue(int(formulaRow), int(formulaCol), fstr)
case RecTypeLabelSst:
var rowIndex, colIndex, ixfe uint16
@ -181,8 +275,11 @@ func (s *WorkSheet) parse() error {
binary.Read(bb, binary.LittleEndian, &colIndex)
binary.Read(bb, binary.LittleEndian, &ixfe)
binary.Read(bb, binary.LittleEndian, &sstIndex)
if int(sstIndex) > len(s.b.strings) {
panic("invalid sst")
}
s.placeValue(int(rowIndex), int(colIndex), s.b.strings[sstIndex])
log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
case RecTypeHLink:
loc := &shRef8{}
@ -214,13 +311,14 @@ func (s *WorkSheet) parse() error {
binary.Read(bb, binary.LittleEndian, &cmcs)
mcRefs := make([]shRef8, cmcs)
binary.Read(bb, binary.LittleEndian, &mcRefs)
log.Printf("MergeCells spec: %d records", cmcs)
for j, mc := range mcRefs {
log.Printf(" %d: %+v", j, mc)
}
//log.Printf("MergeCells spec: %d records", cmcs)
// TODO: implement markers to annotate these in tabular output
// for j, mc := range mcRefs {
// log.Printf(" %d: %+v", j, mc)
// }
default:
log.Println("worksheet", r.RecType, r.RecSize)
//log.Println("worksheet", r.RecType, r.RecSize)
}
}
@ -249,13 +347,15 @@ func (s *WorkSheet) Strings() []string {
}
// Scan extracts values from the row into the provided arguments
// Arguments must be pointers to one of 4 supported types:
// int, float64, string, or time.Time
// Arguments must be pointers to one of 5 supported types:
// bool, int, float64, string, or time.Time
func (s *WorkSheet) Scan(args ...interface{}) error {
currow := s.rows[s.iterRow]
for i, a := range args {
switch v := a.(type) {
case *bool:
*v = currow.cols[i].(bool)
case *int:
*v = currow.cols[i].(int)
case *float64:
@ -272,4 +372,15 @@ func (s *WorkSheet) Scan(args ...interface{}) error {
}
// ErrInvalidType is returned by Scan for invalid arguments.
var ErrInvalidType = errors.New("xls: Scan only supports *int, *float64, *string, *time.Time arguments")
var ErrInvalidType = errors.New("xls: Scan only supports *bool, *int, *float64, *string, *time.Time arguments")
var berrLookup = map[byte]string{
0x00: "#NULL!",
0x07: "#DIV/0!",
0x0F: "#VALUE!",
0x17: "#REF!",
0x1D: "#NAME?",
0x24: "#NUM!",
0x2A: "#N/A",
0x2B: "#GETTING_DATA",
}

View File

@ -14,14 +14,14 @@ func TestHeader(t *testing.T) {
log.Println(wb.filename)
for _, s := range wb.Sheets() {
log.Println(s)
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
t.Fatal(err)
}
for sheet.Next() {
log.Println(sheet.Strings())
sheet.Strings()
}
}
}
@ -32,6 +32,18 @@ func TestHeader2(t *testing.T) {
t.Fatal(err)
}
log.Println(wb.filename)
for _, s := range wb.Sheets() {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
t.Fatal(err)
}
for sheet.Next() {
sheet.Strings()
}
}
}
func TestHeader3(t *testing.T) {
@ -40,6 +52,18 @@ func TestHeader3(t *testing.T) {
t.Fatal(err)
}
log.Println(wb.filename)
for _, s := range wb.Sheets() {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
t.Fatal(err)
}
for sheet.Next() {
sheet.Strings()
}
}
}
func TestHeader4(t *testing.T) {
@ -49,4 +73,16 @@ func TestHeader4(t *testing.T) {
t.Fatal(err)
}
log.Println(wb.filename)
for _, s := range wb.Sheets() {
//log.Println(s)
sheet, err := wb.Get(s)
if err != nil {
t.Fatal(err)
}
for sheet.Next() {
sheet.Strings()
}
}
}

View File

@ -4,7 +4,6 @@ import (
"encoding/binary"
"io"
"io/ioutil"
"log"
"unicode/utf16"
)
@ -45,36 +44,29 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
var cbExtRs int32
err := binary.Read(r, binary.LittleEndian, &cch)
if err != nil {
log.Println("x1", err)
return "", err
}
err = binary.Read(r, binary.LittleEndian, &flags)
if err != nil {
log.Println("x2", err)
return "", err
}
if (flags & 0x8) != 0 {
log.Println("FORMATTING PRESENT")
// rich formating data is present
err = binary.Read(r, binary.LittleEndian, &cRun)
if err != nil {
log.Println("x3", err)
return "", err
}
}
if (flags & 0x4) != 0 {
log.Println("PHONETIC PRESENT")
// phonetic string data is present
err = binary.Read(r, binary.LittleEndian, &cbExtRs)
if err != nil {
log.Println("x4", err)
return "", err
}
}
content := make([]uint16, cch)
if (flags & 0x1) == 0 {
log.Println("8BIT DATA", cch)
// 16-bit characters but only the bottom 8bits
contentBytes := make([]byte, cch)
n, err2 := io.ReadFull(r, contentBytes)
@ -91,30 +83,25 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
}
} else {
log.Println("16BIT DATA", cch)
// 16-bit characters
err = binary.Read(r, binary.LittleEndian, content)
}
if err != nil {
log.Println("x5", err)
return "", err
}
//////
if cRun > 0 {
log.Println("READING FORMATTING DATA")
// rich formating data is present
_, err = io.CopyN(ioutil.Discard, r, int64(cRun)*4)
if err != nil {
log.Println("x6", err)
return "", err
}
}
if cbExtRs > 0 {
log.Println("READING PHONETIC DATA")
// phonetic string data is present
n, err := io.CopyN(ioutil.Discard, r, int64(cbExtRs))
_, err = io.CopyN(ioutil.Discard, r, int64(cbExtRs))
if err != nil {
log.Println("x7", n, cbExtRs, err)
return "", err
}
}
@ -125,25 +112,16 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
// read in an array of XLUnicodeRichExtendedString s
func parseSST(recs []*rec) ([]string, error) {
totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
//totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8])
// cell count limit is 65k x 256
if numStrings > 65536*256 {
log.Println("INVALID COUNTS total=", totalRefs, " -- n strings=", numStrings)
totalRefs = 0
numStrings = 65536 * 256
}
log.Println("total=", totalRefs, " -- n strings=", numStrings)
all := make([]string, 0, numStrings)
buf := recs[0].Data[8:]
for i := 0; i < len(recs); {
var blen int
var cRunBytes int
var flags byte
var current []byte
var current []uint16
var cbExtRs uint32
for len(buf) > 0 {
@ -152,12 +130,6 @@ func parseSST(recs []*rec) ([]string, error) {
flags = buf[0]
buf = buf[1:]
blen = int(slen)
if (flags & 0x1) != 0 {
// 16-bit characters
blen = int(slen) * 2
}
if (flags & 0x8) != 0 {
// rich formating data is present
cRun := binary.LittleEndian.Uint16(buf)
@ -170,37 +142,47 @@ func parseSST(recs []*rec) ([]string, error) {
buf = buf[4:]
}
///////
blx := len(buf)
bly := len(buf) - 5
if blx > 5 {
blx = 5
}
if bly < 0 {
bly = 0
}
// this block will read the string data, but transparently
// handle continuing across records
current = make([]byte, blen)
n := copy(current, buf)
current = current[:n]
buf = buf[n:]
for len(current) < blen {
i++
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
n = int(blen) - len(current)
if n > len(buf) {
n = len(buf)
current = make([]uint16, slen)
for j := 0; j < int(slen); j++ {
if len(buf) == 0 {
i++
if (recs[i].Data[0] & 1) == 0 {
flags &= 0xFE
} else {
flags |= 1
}
buf = recs[i].Data[1:]
}
if (flags & 1) == 0 { //8-bit
current[j] = uint16(buf[0])
buf = buf[1:]
} else { //16-bit
current[j] = uint16(binary.LittleEndian.Uint16(buf[:2]))
buf = buf[2:]
if len(buf) == 1 {
panic("off by one")
}
}
current = append(current, buf[:n]...)
buf = buf[n:]
}
if (flags & 1) == 0 {
s := string(current)
all = append(all, s)
} else {
x := make([]uint16, len(current)/2)
for y := 0; y < len(current); y += 2 {
x[y/2] = binary.LittleEndian.Uint16(current[y : y+2])
}
s := string(utf16.Decode(x))
all = append(all, s)
}
s := string(utf16.Decode(current))
all = append(all, s)
///////
//log.Println(len(all), all[len(all)-1])
for cRunBytes > 0 {
if len(buf) >= int(cRunBytes) {
buf = buf[cRunBytes:]
@ -208,7 +190,7 @@ func parseSST(recs []*rec) ([]string, error) {
} else {
cRunBytes -= len(buf)
i++
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
buf = recs[i].Data
}
}
@ -219,7 +201,7 @@ func parseSST(recs []*rec) ([]string, error) {
} else {
cbExtRs -= uint32(len(buf))
i++
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
buf = recs[i].Data
}
}
}

View File

@ -23,6 +23,7 @@ type WorkBook struct {
ctx context.Context
doc cfb.Document
prot bool
h *header
sheets []*boundSheet
codepage uint16
@ -34,8 +35,10 @@ type WorkBook struct {
fpos int64
pos2substream map[int64]int
}
decryptors map[int]crypto.Decryptor
func (b *WorkBook) IsProtected() bool {
return b.prot
}
func Open(ctx context.Context, filename string) (*WorkBook, error) {
@ -56,33 +59,134 @@ func Open(ctx context.Context, filename string) (*WorkBook, error) {
if err != nil {
return nil, err
}
//br := bufio.NewReader(rdr)
err = b.loadFromStream(rdr)
return b, err
}
func (b *WorkBook) loadFromStream(r io.Reader) error {
b.decryptors = make(map[int]crypto.Decryptor)
func (b *WorkBook) loadFromStream(r io.ReadSeeker) error {
return b.loadFromStream2(r, false)
}
func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decryptor) error {
_, err := r.Seek(0, io.SeekStart)
if err != nil {
log.Println("xls: dec-seek1 failed")
return err
}
zeros := [8224]byte{}
type overlay struct {
Pos int64
RecType recordType
DataBytes uint16
Data []byte // NB len() not necessarily = DataBytes
}
replaceBlocks := []overlay{}
obuf := &bytes.Buffer{}
for err == nil {
o := overlay{}
o.Pos, _ = r.Seek(0, io.SeekCurrent)
err = binary.Read(r, binary.LittleEndian, &o.RecType)
if err != nil {
if err == io.EOF {
continue
}
log.Println("xls: dec-read1 failed")
return err
}
err = binary.Read(r, binary.LittleEndian, &o.DataBytes)
if err != nil {
log.Println("xls: dec-read2 failed")
return err
}
// copy to output and decryption stream
binary.Write(dec, binary.LittleEndian, o.RecType)
binary.Write(dec, binary.LittleEndian, o.DataBytes)
tocopy := int(o.DataBytes)
switch o.RecType {
case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
// copy original data into output
o.Data = make([]byte, o.DataBytes)
_, err = io.ReadFull(r, o.Data)
if err != nil {
log.Println("FAIL err", err)
}
dec.Write(zeros[:int(o.DataBytes)])
tocopy = 0
case RecTypeBoundSheet8:
// copy 32-bit position to output
o.Data = make([]byte, 4)
_, err = io.ReadFull(r, o.Data)
if err != nil {
log.Println("FAIL err", err)
}
dec.Write(zeros[:4])
tocopy -= 4
}
if tocopy > 0 {
_, err = io.CopyN(dec, r, int64(tocopy))
}
replaceBlocks = append(replaceBlocks, o)
}
dec.Flush()
io.Copy(obuf, dec)
alldata := obuf.Bytes()
for _, o := range replaceBlocks {
offs := int(o.Pos)
binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType))
binary.LittleEndian.PutUint16(alldata[offs+2:], uint16(o.DataBytes))
if len(o.Data) > 0 {
offs += 4
copy(alldata[offs:], o.Data)
}
}
return b.loadFromStream2(bytes.NewReader(alldata), true)
}
func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
b.h = &header{}
substr := -1
nestedBOF := 0
b.substreams = b.substreams[:0]
b.pos2substream = make(map[int64]int, 10)
b.fpos = 0
nr, err := b.nextRecord(r)
for err == nil {
if nr.RecType == RecTypeBOF {
substr++
b.substreams = append(b.substreams, []*rec{})
b.pos2substream[b.fpos] = substr
switch nr.RecType {
case RecTypeEOF:
nestedBOF--
case RecTypeBOF:
// when substreams are nested, keep them in the same grouping
if nestedBOF == 0 {
substr = len(b.substreams)
b.substreams = append(b.substreams, []*rec{})
b.pos2substream[b.fpos] = substr
}
nestedBOF++
}
b.fpos += int64(4 + len(nr.Data))
if nr.RecType == RecTypeFilePass {
if nr.RecType == RecTypeFilePass && !isDecrypted {
etype := binary.LittleEndian.Uint16(nr.Data)
switch etype {
case 1:
b.decryptors[substr], err = crypto.NewBasicRC4(nr.Data[2:])
dec, err := crypto.NewBasicRC4(nr.Data[2:])
if err != nil {
log.Println("xls: rc4 encryption failed to set up", err)
return err
}
return b.loadFromStreamWithDecryptor(r, dec)
case 2, 3, 4:
log.Println("need Crypto API RC4 decryptor")
return errors.New("xls: unsupported Crypto API encryption method")
@ -101,60 +205,13 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
return err
}
for ss, records := range b.substreams {
log.Printf("Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
if dec, ok := b.decryptors[ss]; ok {
log.Printf("Decrypting substream...")
dec.Reset()
var head [4]byte
for _, nr := range records {
binary.LittleEndian.PutUint16(head[:], uint16(nr.RecType))
binary.LittleEndian.PutUint16(head[2:], nr.RecSize)
// send the record for decryption
dec.Write(head[:])
dec.Write(nr.Data)
}
dec.Flush()
newrecset := make([]*rec, 0, len(records))
for _, nr := range records {
dec.Read(head[:]) // discard 4 byte header
dr := &rec{
RecType: nr.RecType,
RecSize: nr.RecSize,
Data: make([]byte, int(nr.RecSize)),
}
dec.Read(dr.Data)
switch nr.RecType {
case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
// keep original data
copy(dr.Data, nr.Data)
case RecTypeBoundSheet8:
// copy the position un-decrypted
copy(dr.Data[:4], nr.Data)
default:
// apply decryption
}
newrecset = append(newrecset, dr)
}
b.substreams[ss] = newrecset
records = newrecset
}
for _, records := range b.substreams {
//log.Printf("Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
for i, nr := range records {
var bb io.Reader = bytes.NewReader(nr.Data)
switch nr.RecType {
case RecTypeSST:
//log.Println(i, nr.RecType)
recSet := []*rec{nr}
lastIndex := i
@ -162,6 +219,7 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
lastIndex++
recSet = append(recSet, records[lastIndex])
}
b.strings, err = parseSST(recSet)
if err != nil {
return err
@ -170,7 +228,7 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
case RecTypeContinue:
// no-op (used above)
case RecTypeEOF:
log.Println("End Of Stream")
// done
case RecTypeBOF:
err = binary.Read(bb, binary.LittleEndian, b.h)
@ -187,24 +245,21 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
// we only support the workbook or worksheet substreams
log.Println("xls: unsupported document type")
break
//break
}
case RecTypeCodePage:
//log.Println(i, nr.RecType)
err = binary.Read(bb, binary.LittleEndian, &b.codepage)
if err != nil {
return err
}
case RecTypeDate1904:
//log.Println(i, nr.RecType)
err = binary.Read(bb, binary.LittleEndian, &b.dateMode)
if err != nil {
return err
}
case RecTypeBoundSheet8:
//log.Println(i, nr.RecType)
bs := &boundSheet{}
err = binary.Read(bb, binary.LittleEndian, &bs.Position)
if err != nil {
@ -229,7 +284,6 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
return err
}
b.sheets = append(b.sheets, bs)
log.Println("SHEET", bs.Name, "at pos", bs.Position)
default:
//log.Println(i, "SKIPPED", nr.RecType)
}
@ -239,8 +293,6 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
return err
}
var errSkipped = errors.New("xls: skipped record type")
func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
var rt recordType
var rs uint16
@ -248,8 +300,14 @@ func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
if err != nil {
return nil, err
}
if rt == 0 {
return nil, io.EOF
}
err = binary.Read(r, binary.LittleEndian, &rs)
if rs > 8224 {
return nil, errors.New("xls: invalid data format")
}
if err != nil {
return nil, err
}