many bugfixes and edge cases, impl most cell types

2026-05-16 17:16:40 +02:00 · 2021-02-08 11:02:37 -05:00
parent f794a5ef9b
commit 80c3b4cc81
8 changed files with 372 additions and 169 deletions
@@ -23,21 +23,27 @@ func main() {
 	sanitize := regexp.MustCompile("[^a-zA-Z0-9]+")
 	newlines := regexp.MustCompile("[ \n\r\t]+")
 	for _, fn := range flag.Args() {
+		log.Printf("Opening file '%s' ...", fn)
 		wb, err := xls.Open(context.Background(), fn)
 		if err != nil {
-			log.Fatal(err)
+			log.Println(err)
+			continue
 		}
-		log.Println(fn)

 		ext := filepath.Ext(fn)
 		fn2 := filepath.Base(strings.TrimSuffix(fn, ext))

 		for _, s := range wb.Sheets() {
+			log.Printf("  Opening Sheet '%s'...", s)
 			sheet, err := wb.Get(s)
 			if err != nil {
 				log.Println(err)
 				continue
 			}
+			if sheet.IsEmpty() {
+				log.Println("    Empty sheet. Skipping.")
+				continue
+			}
 			s2 := sanitize.ReplaceAllString(s, "_")
 			f, err := os.Create(fn2 + "." + s2 + ".tsv")
 			if err != nil {
@@ -61,7 +67,6 @@ func main() {
 				}
 				if nonblank || !*skipBlanks {
 					fmt.Fprintln(f, strings.Join(row, "\t"))
-					f.Sync()
 				}
 			}
 			f.Close()
@@ -12,6 +12,7 @@ import (
 	"errors"
 	"io"
 	"io/ioutil"
+	"log"
 	"unicode/utf16"
 )

@@ -122,7 +123,8 @@ func (d *doc) load(rx io.ReadSeeker) error {
 			panic("unknown major version")
 		}
 		if h.MinorVersion != 0x3E {
-			panic("unknown minor version")
+			log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion)
+			//panic("unknown minor version")
 		}

 		for _, v := range h.Reserved1 {
@@ -186,6 +188,9 @@ func (d *doc) load(rx io.ReadSeeker) error {
 				}

 				offs := int64(1+sid2) << int32(h.SectorShift)
+				if offs > int64(len(d.data)) {
+					return errors.New("xls/cfb: unable to load file")
+				}
 				sector := d.data[offs:]
 				for j := 0; j < numFATentries; j++ {
 					sid3 := le.Uint32(sector)
@@ -246,7 +251,7 @@ func (d *doc) buildDirs(br *bytes.Reader) error {
 			d.ministreamstart = uint32(dirent.StartingSectorLocation)
 			d.ministreamsize = uint32(dirent.StreamSize)
 		case typeStorage:
-			panic("got a storage? what to do now?")
+			//log.Println("got a storage? what to do now?")
 		case typeStream:
 			/*
 				var freader io.Reader
@@ -51,6 +51,10 @@ func (s *SliceReader) Seek(offset int64, whence int) (int64, error) {
 	}
 	// current offset in stream
 	trueOffset := int64(s.Offset) + s.CSize[int(s.Index)]
+	if offset == 0 && whence == io.SeekCurrent {
+		// just asking for current position
+		return trueOffset, nil
+	}

 	switch whence {
 	case io.SeekStart:
@@ -41,8 +41,10 @@ func (d *rc4Writer) Reset() {
 func (d *rc4Writer) Flush() {
 	var zeros [1024]byte

+	endpad := 0
 	if d.offset < 1024 {
-		d.offset += copy(d.bytes[d.offset:], zeros[:])
+		endpad = copy(d.bytes[d.offset:], zeros[:])
+		d.offset += endpad
 	}
 	if d.offset != 1024 {
 		panic("invalid offset fill")
@@ -51,7 +53,7 @@ func (d *rc4Writer) Flush() {
 	// decrypt and write results to output buffer
 	d.startBlock()
 	d.dec.XORKeyStream(d.bytes[:], d.bytes[:])
-	d.buf.Write(d.bytes[:])
+	d.buf.Write(d.bytes[:1024-endpad])

 	d.offset = 0
 	d.block++
@@ -12,9 +12,11 @@ import (
 )

 func (b *WorkBook) Sheets() []string {
-	res := make([]string, len(b.sheets))
-	for i, s := range b.sheets {
-		res[i] = s.Name
+	res := make([]string, 0, len(b.sheets))
+	for _, s := range b.sheets {
+		if (s.HiddenState & 0x03) == 0 {
+			res = append(res, s.Name)
+		}
 	}
 	return res
 }
@@ -40,6 +42,7 @@ type WorkSheet struct {

 	rows   []*row
 	maxcol int
+	empty  bool

 	iterRow int
 }
@@ -79,30 +82,91 @@ func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
 	s.rows[rowIndex].cols[colIndex] = val
 }

+func (s *WorkSheet) IsEmpty() bool {
+	return s.empty
+}
+
 func (s *WorkSheet) parse() error {
+	var minRow, maxRow uint32
+	var minCol, maxCol uint16
+	for _, r := range s.b.substreams[s.ss] {
+		if r.RecType == RecTypeWsBool {
+			if (r.Data[1] & 0x10) != 0 {
+				// it's a dialog
+				return nil
+			}
+		}
+	}
+
+	var formulaRow, formulaCol uint16
 	for _, r := range s.b.substreams[s.ss] {
 		bb := bytes.NewReader(r.Data)

 		switch r.RecType {
-		case RecTypeWindow2:
-			opts := binary.LittleEndian.Uint16(r.Data)
-			// right-to-left = 0x40, selected = 0x400
-			log.Printf("sheet options: %x", opts)
+		//case RecTypeWindow2:
+		//opts := binary.LittleEndian.Uint16(r.Data)
+		// right-to-left = 0x40, selected = 0x400
+		//log.Printf("sheet options: %x", opts)
+		case RecTypeDimensions:
+			binary.Read(bb, binary.LittleEndian, &minRow)
+			binary.Read(bb, binary.LittleEndian, &maxRow)
+			binary.Read(bb, binary.LittleEndian, &minCol)
+			binary.Read(bb, binary.LittleEndian, &maxCol)
+			//log.Printf("dimensions: %d,%d + %dx%d", minRow&0x0000FFFF, minCol,
+			//	(maxRow&0x0000FFFF)-(minRow&0x0000FFFF), maxCol-minCol)
+			if minRow > 0x0000FFFF || maxRow > 0x00010000 {
+				log.Println("invalid dimensions")
+			}
+			if minCol > 0x00FF || maxCol > 0x0100 {
+				log.Println("invalid dimensions")
+			}
+			if (maxRow-minRow) == 0 && (maxCol-minCol) == 0 {
+				s.empty = true
+			}
+
 		case RecTypeRow:
 			row := &shRow{}
 			binary.Read(bb, binary.LittleEndian, row)
-			log.Printf("row spec: %+v", *row)
+			if (row.Reserved & 0xFFFF) != 0 {
+				log.Println("invalid Row spec")
+				continue
+			}
+			//log.Printf("row spec: %+v", *row)
+
 		case RecTypeBlank:
 			var rowIndex, colIndex uint16
 			binary.Read(bb, binary.LittleEndian, &rowIndex)
 			binary.Read(bb, binary.LittleEndian, &colIndex)
-			log.Printf("blank spec: %d %d", rowIndex, colIndex)
+			//log.Printf("blank spec: %d %d", rowIndex, colIndex)
+
+		case RecTypeBoolErr:
+			var rowIndex, colIndex, ixfe uint16
+			binary.Read(bb, binary.LittleEndian, &rowIndex)
+			binary.Read(bb, binary.LittleEndian, &colIndex)
+			binary.Read(bb, binary.LittleEndian, &ixfe)
+			if r.Data[7] == 0 {
+				bv := false
+				if r.Data[6] == 1 {
+					bv = true
+				}
+				s.placeValue(int(rowIndex), int(colIndex), bv)
+				//log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv)
+			} else {
+				be, ok := berrLookup[r.Data[6]]
+				if !ok {
+					be = "<unknown error>"
+				}
+				s.placeValue(int(rowIndex), int(colIndex), be)
+				//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
+			}
+
 		case RecTypeMulBlank:
 			var rowIndex, firstCol uint16
 			binary.Read(bb, binary.LittleEndian, &rowIndex)
 			binary.Read(bb, binary.LittleEndian, &firstCol)
-			nrk := int((r.RecSize - 6) / 6)
-			log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
+		//	nrk := int((r.RecSize - 6) / 6)
+		//	log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
+
 		case RecTypeMulRk:
 			mr := &shMulRK{}
 			nrk := int((r.RecSize - 6) / 6)
@@ -123,8 +187,7 @@ func (s *WorkSheet) parse() error {
 				s.placeValue(int(mr.RowIndex), int(mr.FirstCol)+i, rval)
 			}
 			binary.Read(bb, binary.LittleEndian, &mr.LastCol)
-
-			log.Printf("mulrow spec: %+v", *mr)
+			//log.Printf("mulrow spec: %+v", *mr)

 		case RecTypeNumber:
 			var rowIndex, colIndex, ixfe uint16
@@ -135,7 +198,7 @@ func (s *WorkSheet) parse() error {
 			binary.Read(bb, binary.LittleEndian, &xnum)
 			value := math.Float64frombits(xnum)
 			s.placeValue(int(rowIndex), int(colIndex), value)
-			log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
+			//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)

 		case RecTypeRK:
 			var rowIndex, colIndex uint16
@@ -151,28 +214,59 @@ func (s *WorkSheet) parse() error {
 				rval = rr.Value.Float64()
 			}
 			s.placeValue(int(rowIndex), int(colIndex), rval)
-			log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
+			//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())

 		case RecTypeFormula:
-			var rowIndex, colIndex uint16
-			binary.Read(bb, binary.LittleEndian, &rowIndex)
-			binary.Read(bb, binary.LittleEndian, &colIndex)
-
-			log.Printf("formula spec: %d %d ~~ %+v", rowIndex, colIndex, r.Data)
+			var ixfe uint16
+			binary.Read(bb, binary.LittleEndian, &formulaRow)
+			binary.Read(bb, binary.LittleEndian, &formulaCol)
+			binary.Read(bb, binary.LittleEndian, &ixfe)
+			fdata := r.Data[6:]
+			if fdata[6] == 0xFF && r.Data[7] == 0xFF {
+				switch fdata[0] {
+				case 0:
+					// string in next record
+				case 1:
+					// boolean
+					bv := false
+					if fdata[2] != 0 {
+						bv = true
+					}
+					s.placeValue(int(formulaRow), int(formulaCol), bv)
+				case 2:
+					// error value
+					be, ok := berrLookup[fdata[2]]
+					if !ok {
+						be = "<unknown error>"
+					}
+					s.placeValue(int(formulaRow), int(formulaCol), be)
+				case 3:
+					// blank string
+				default:
+					log.Println("unknown formula value type")
+				}
+			} else {
+				var xnum uint64
+				binary.Read(bb, binary.LittleEndian, &xnum)
+				value := math.Float64frombits(xnum)
+				s.placeValue(int(formulaRow), int(formulaCol), value)
+			}
+			//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)

 		case RecTypeString:
-			var charCount, flags uint16
+			var charCount uint16
+			var flags byte
 			binary.Read(bb, binary.LittleEndian, &charCount)
 			binary.Read(bb, binary.LittleEndian, &flags)
-			s := ""
+			fstr := ""
 			if (flags & 1) == 0 {
-				s = string(r.Data[4:])
+				fstr = string(r.Data[3:])
 			} else {
 				us := make([]uint16, charCount)
 				binary.Read(bb, binary.LittleEndian, us)
-				s = string(utf16.Decode(us))
+				fstr = string(utf16.Decode(us))
 			}
-			log.Printf("string spec:  = %s", s)
+			s.placeValue(int(formulaRow), int(formulaCol), fstr)

 		case RecTypeLabelSst:
 			var rowIndex, colIndex, ixfe uint16
@@ -181,8 +275,11 @@ func (s *WorkSheet) parse() error {
 			binary.Read(bb, binary.LittleEndian, &colIndex)
 			binary.Read(bb, binary.LittleEndian, &ixfe)
 			binary.Read(bb, binary.LittleEndian, &sstIndex)
+			if int(sstIndex) > len(s.b.strings) {
+				panic("invalid sst")
+			}
 			s.placeValue(int(rowIndex), int(colIndex), s.b.strings[sstIndex])
-			log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
+			//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])

 		case RecTypeHLink:
 			loc := &shRef8{}
@@ -214,13 +311,14 @@ func (s *WorkSheet) parse() error {
 			binary.Read(bb, binary.LittleEndian, &cmcs)
 			mcRefs := make([]shRef8, cmcs)
 			binary.Read(bb, binary.LittleEndian, &mcRefs)
-			log.Printf("MergeCells spec: %d records", cmcs)
-			for j, mc := range mcRefs {
-				log.Printf("    %d: %+v", j, mc)
-			}
+			//log.Printf("MergeCells spec: %d records", cmcs)
+			// TODO: implement markers to annotate these in tabular output
+			// for j, mc := range mcRefs {
+			// 	log.Printf("    %d: %+v", j, mc)
+			// }

 		default:
-			log.Println("worksheet", r.RecType, r.RecSize)
+			//log.Println("worksheet", r.RecType, r.RecSize)

 		}
 	}
@@ -249,13 +347,15 @@ func (s *WorkSheet) Strings() []string {
 }

 // Scan extracts values from the row into the provided arguments
-// Arguments must be pointers to one of 4 supported types:
-//     int, float64, string, or time.Time
+// Arguments must be pointers to one of 5 supported types:
+//     bool, int, float64, string, or time.Time
 func (s *WorkSheet) Scan(args ...interface{}) error {
 	currow := s.rows[s.iterRow]

 	for i, a := range args {
 		switch v := a.(type) {
+		case *bool:
+			*v = currow.cols[i].(bool)
 		case *int:
 			*v = currow.cols[i].(int)
 		case *float64:
@@ -272,4 +372,15 @@ func (s *WorkSheet) Scan(args ...interface{}) error {
 }

 // ErrInvalidType is returned by Scan for invalid arguments.
-var ErrInvalidType = errors.New("xls: Scan only supports *int, *float64, *string, *time.Time arguments")
+var ErrInvalidType = errors.New("xls: Scan only supports *bool, *int, *float64, *string, *time.Time arguments")
+
+var berrLookup = map[byte]string{
+	0x00: "#NULL!",
+	0x07: "#DIV/0!",
+	0x0F: "#VALUE!",
+	0x17: "#REF!",
+	0x1D: "#NAME?",
+	0x24: "#NUM!",
+	0x2A: "#N/A",
+	0x2B: "#GETTING_DATA",
+}
@@ -14,14 +14,14 @@ func TestHeader(t *testing.T) {
 	log.Println(wb.filename)

 	for _, s := range wb.Sheets() {
-		log.Println(s)
+		//log.Println(s)
 		sheet, err := wb.Get(s)
 		if err != nil {
 			t.Fatal(err)
 		}

 		for sheet.Next() {
-			log.Println(sheet.Strings())
+			sheet.Strings()
 		}
 	}
 }
@@ -32,6 +32,18 @@ func TestHeader2(t *testing.T) {
 		t.Fatal(err)
 	}
 	log.Println(wb.filename)
+
+	for _, s := range wb.Sheets() {
+		//log.Println(s)
+		sheet, err := wb.Get(s)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for sheet.Next() {
+			sheet.Strings()
+		}
+	}
 }

 func TestHeader3(t *testing.T) {
@@ -40,6 +52,18 @@ func TestHeader3(t *testing.T) {
 		t.Fatal(err)
 	}
 	log.Println(wb.filename)
+
+	for _, s := range wb.Sheets() {
+		//log.Println(s)
+		sheet, err := wb.Get(s)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for sheet.Next() {
+			sheet.Strings()
+		}
+	}
 }

 func TestHeader4(t *testing.T) {
@@ -49,4 +73,16 @@ func TestHeader4(t *testing.T) {
 		t.Fatal(err)
 	}
 	log.Println(wb.filename)
+
+	for _, s := range wb.Sheets() {
+		//log.Println(s)
+		sheet, err := wb.Get(s)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for sheet.Next() {
+			sheet.Strings()
+		}
+	}
 }
@@ -4,7 +4,6 @@ import (
 	"encoding/binary"
 	"io"
 	"io/ioutil"
-	"log"
 	"unicode/utf16"
 )

@@ -45,36 +44,29 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
 	var cbExtRs int32
 	err := binary.Read(r, binary.LittleEndian, &cch)
 	if err != nil {
-		log.Println("x1", err)
 		return "", err
 	}
 	err = binary.Read(r, binary.LittleEndian, &flags)
 	if err != nil {
-		log.Println("x2", err)
 		return "", err
 	}
 	if (flags & 0x8) != 0 {
-		log.Println("FORMATTING PRESENT")
 		// rich formating data is present
 		err = binary.Read(r, binary.LittleEndian, &cRun)
 		if err != nil {
-			log.Println("x3", err)
 			return "", err
 		}
 	}
 	if (flags & 0x4) != 0 {
-		log.Println("PHONETIC PRESENT")
 		// phonetic string data is present
 		err = binary.Read(r, binary.LittleEndian, &cbExtRs)
 		if err != nil {
-			log.Println("x4", err)
 			return "", err
 		}
 	}

 	content := make([]uint16, cch)
 	if (flags & 0x1) == 0 {
-		log.Println("8BIT DATA", cch)
 		// 16-bit characters but only the bottom 8bits
 		contentBytes := make([]byte, cch)
 		n, err2 := io.ReadFull(r, contentBytes)
@@ -91,30 +83,25 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
 		}

 	} else {
-		log.Println("16BIT DATA", cch)
 		// 16-bit characters
 		err = binary.Read(r, binary.LittleEndian, content)
 	}
 	if err != nil {
-		log.Println("x5", err)
+		return "", err
 	}
 	//////

 	if cRun > 0 {
-		log.Println("READING FORMATTING DATA")
 		// rich formating data is present
 		_, err = io.CopyN(ioutil.Discard, r, int64(cRun)*4)
 		if err != nil {
-			log.Println("x6", err)
 			return "", err
 		}
 	}
 	if cbExtRs > 0 {
-		log.Println("READING PHONETIC DATA")
 		// phonetic string data is present
-		n, err := io.CopyN(ioutil.Discard, r, int64(cbExtRs))
+		_, err = io.CopyN(ioutil.Discard, r, int64(cbExtRs))
 		if err != nil {
-			log.Println("x7", n, cbExtRs, err)
 			return "", err
 		}
 	}
@@ -125,25 +112,16 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {

 // read in an array of XLUnicodeRichExtendedString s
 func parseSST(recs []*rec) ([]string, error) {
-	totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
+	//totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
 	numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8])

-	// cell count limit is 65k x 256
-	if numStrings > 65536*256 {
-		log.Println("INVALID COUNTS total=", totalRefs, " -- n strings=", numStrings)
-		totalRefs = 0
-		numStrings = 65536 * 256
-	}
-
-	log.Println("total=", totalRefs, " -- n strings=", numStrings)
 	all := make([]string, 0, numStrings)

 	buf := recs[0].Data[8:]
 	for i := 0; i < len(recs); {
-		var blen int
 		var cRunBytes int
 		var flags byte
-		var current []byte
+		var current []uint16
 		var cbExtRs uint32

 		for len(buf) > 0 {
@@ -152,12 +130,6 @@ func parseSST(recs []*rec) ([]string, error) {
 			flags = buf[0]
 			buf = buf[1:]

-			blen = int(slen)
-			if (flags & 0x1) != 0 {
-				// 16-bit characters
-				blen = int(slen) * 2
-			}
-
 			if (flags & 0x8) != 0 {
 				// rich formating data is present
 				cRun := binary.LittleEndian.Uint16(buf)
@@ -170,37 +142,47 @@ func parseSST(recs []*rec) ([]string, error) {
 				buf = buf[4:]
 			}

+			///////
+			blx := len(buf)
+			bly := len(buf) - 5
+			if blx > 5 {
+				blx = 5
+			}
+			if bly < 0 {
+				bly = 0
+			}
+
 			// this block will read the string data, but transparently
 			// handle continuing across records
-			current = make([]byte, blen)
-			n := copy(current, buf)
-			current = current[:n]
-			buf = buf[n:]
-			for len(current) < blen {
-				i++
-				buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
-
-				n = int(blen) - len(current)
-				if n > len(buf) {
-					n = len(buf)
+			current = make([]uint16, slen)
+			for j := 0; j < int(slen); j++ {
+				if len(buf) == 0 {
+					i++
+					if (recs[i].Data[0] & 1) == 0 {
+						flags &= 0xFE
+					} else {
+						flags |= 1
+					}
+					buf = recs[i].Data[1:]
+				}
+
+				if (flags & 1) == 0 { //8-bit
+					current[j] = uint16(buf[0])
+					buf = buf[1:]
+				} else { //16-bit
+					current[j] = uint16(binary.LittleEndian.Uint16(buf[:2]))
+					buf = buf[2:]
+					if len(buf) == 1 {
+						panic("off by one")
+					}
 				}
-				current = append(current, buf[:n]...)
-				buf = buf[n:]
 			}

-			if (flags & 1) == 0 {
-				s := string(current)
-				all = append(all, s)
-			} else {
-				x := make([]uint16, len(current)/2)
-				for y := 0; y < len(current); y += 2 {
-					x[y/2] = binary.LittleEndian.Uint16(current[y : y+2])
-				}
-				s := string(utf16.Decode(x))
-				all = append(all, s)
-			}
+			s := string(utf16.Decode(current))
+			all = append(all, s)
+
+			///////

-			//log.Println(len(all), all[len(all)-1])
 			for cRunBytes > 0 {
 				if len(buf) >= int(cRunBytes) {
 					buf = buf[cRunBytes:]
@@ -208,7 +190,7 @@ func parseSST(recs []*rec) ([]string, error) {
 				} else {
 					cRunBytes -= len(buf)
 					i++
-					buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
+					buf = recs[i].Data
 				}
 			}

@@ -219,7 +201,7 @@ func parseSST(recs []*rec) ([]string, error) {
 				} else {
 					cbExtRs -= uint32(len(buf))
 					i++
-					buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
+					buf = recs[i].Data
 				}
 			}
 		}
@@ -23,6 +23,7 @@ type WorkBook struct {
 	ctx      context.Context
 	doc      cfb.Document

+	prot     bool
 	h        *header
 	sheets   []*boundSheet
 	codepage uint16
@@ -34,8 +35,10 @@ type WorkBook struct {

 	fpos          int64
 	pos2substream map[int64]int
+}

-	decryptors map[int]crypto.Decryptor
+func (b *WorkBook) IsProtected() bool {
+	return b.prot
 }

 func Open(ctx context.Context, filename string) (*WorkBook, error) {
@@ -56,33 +59,134 @@ func Open(ctx context.Context, filename string) (*WorkBook, error) {
 	if err != nil {
 		return nil, err
 	}
-	//br := bufio.NewReader(rdr)
 	err = b.loadFromStream(rdr)
 	return b, err
 }

-func (b *WorkBook) loadFromStream(r io.Reader) error {
-	b.decryptors = make(map[int]crypto.Decryptor)
+func (b *WorkBook) loadFromStream(r io.ReadSeeker) error {
+	return b.loadFromStream2(r, false)
+}
+
+func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decryptor) error {
+	_, err := r.Seek(0, io.SeekStart)
+	if err != nil {
+		log.Println("xls: dec-seek1 failed")
+		return err
+	}
+
+	zeros := [8224]byte{}
+
+	type overlay struct {
+		Pos int64
+
+		RecType   recordType
+		DataBytes uint16
+		Data      []byte // NB len() not necessarily = DataBytes
+	}
+	replaceBlocks := []overlay{}
+
+	obuf := &bytes.Buffer{}
+	for err == nil {
+		o := overlay{}
+		o.Pos, _ = r.Seek(0, io.SeekCurrent)
+
+		err = binary.Read(r, binary.LittleEndian, &o.RecType)
+		if err != nil {
+			if err == io.EOF {
+				continue
+			}
+			log.Println("xls: dec-read1 failed")
+			return err
+		}
+
+		err = binary.Read(r, binary.LittleEndian, &o.DataBytes)
+		if err != nil {
+			log.Println("xls: dec-read2 failed")
+			return err
+		}
+
+		// copy to output and decryption stream
+		binary.Write(dec, binary.LittleEndian, o.RecType)
+		binary.Write(dec, binary.LittleEndian, o.DataBytes)
+		tocopy := int(o.DataBytes)
+
+		switch o.RecType {
+		case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
+			// copy original data into output
+			o.Data = make([]byte, o.DataBytes)
+			_, err = io.ReadFull(r, o.Data)
+			if err != nil {
+				log.Println("FAIL err", err)
+			}
+			dec.Write(zeros[:int(o.DataBytes)])
+			tocopy = 0
+
+		case RecTypeBoundSheet8:
+			// copy 32-bit position to output
+			o.Data = make([]byte, 4)
+			_, err = io.ReadFull(r, o.Data)
+			if err != nil {
+				log.Println("FAIL err", err)
+			}
+			dec.Write(zeros[:4])
+			tocopy -= 4
+		}
+
+		if tocopy > 0 {
+			_, err = io.CopyN(dec, r, int64(tocopy))
+		}
+		replaceBlocks = append(replaceBlocks, o)
+	}
+	dec.Flush()
+	io.Copy(obuf, dec)
+
+	alldata := obuf.Bytes()
+	for _, o := range replaceBlocks {
+		offs := int(o.Pos)
+		binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType))
+		binary.LittleEndian.PutUint16(alldata[offs+2:], uint16(o.DataBytes))
+		if len(o.Data) > 0 {
+			offs += 4
+			copy(alldata[offs:], o.Data)
+		}
+	}
+
+	return b.loadFromStream2(bytes.NewReader(alldata), true)
+}
+
+func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
 	b.h = &header{}
 	substr := -1
+	nestedBOF := 0
+	b.substreams = b.substreams[:0]
+	b.pos2substream = make(map[int64]int, 10)
+	b.fpos = 0
 	nr, err := b.nextRecord(r)
 	for err == nil {
-		if nr.RecType == RecTypeBOF {
-			substr++
-			b.substreams = append(b.substreams, []*rec{})
-			b.pos2substream[b.fpos] = substr
+		switch nr.RecType {
+		case RecTypeEOF:
+			nestedBOF--
+		case RecTypeBOF:
+			// when substreams are nested, keep them in the same grouping
+			if nestedBOF == 0 {
+				substr = len(b.substreams)
+				b.substreams = append(b.substreams, []*rec{})
+				b.pos2substream[b.fpos] = substr
+			}
+			nestedBOF++
 		}
 		b.fpos += int64(4 + len(nr.Data))

-		if nr.RecType == RecTypeFilePass {
+		if nr.RecType == RecTypeFilePass && !isDecrypted {
 			etype := binary.LittleEndian.Uint16(nr.Data)
 			switch etype {
 			case 1:
-				b.decryptors[substr], err = crypto.NewBasicRC4(nr.Data[2:])
+				dec, err := crypto.NewBasicRC4(nr.Data[2:])
 				if err != nil {
 					log.Println("xls: rc4 encryption failed to set up", err)
 					return err
 				}
+				return b.loadFromStreamWithDecryptor(r, dec)
 			case 2, 3, 4:
 				log.Println("need Crypto API RC4 decryptor")
 				return errors.New("xls: unsupported Crypto API encryption method")
@@ -101,60 +205,13 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
 		return err
 	}

-	for ss, records := range b.substreams {
-		log.Printf("Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
-
-		if dec, ok := b.decryptors[ss]; ok {
-			log.Printf("Decrypting substream...")
-
-			dec.Reset()
-			var head [4]byte
-			for _, nr := range records {
-				binary.LittleEndian.PutUint16(head[:], uint16(nr.RecType))
-				binary.LittleEndian.PutUint16(head[2:], nr.RecSize)
-
-				// send the record for decryption
-				dec.Write(head[:])
-				dec.Write(nr.Data)
-			}
-			dec.Flush()
-
-			newrecset := make([]*rec, 0, len(records))
-			for _, nr := range records {
-				dec.Read(head[:]) // discard 4 byte header
-
-				dr := &rec{
-					RecType: nr.RecType,
-					RecSize: nr.RecSize,
-					Data:    make([]byte, int(nr.RecSize)),
-				}
-				dec.Read(dr.Data)
-
-				switch nr.RecType {
-				case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
-					// keep original data
-					copy(dr.Data, nr.Data)
-				case RecTypeBoundSheet8:
-					// copy the position un-decrypted
-					copy(dr.Data[:4], nr.Data)
-				default:
-					// apply decryption
-				}
-
-				newrecset = append(newrecset, dr)
-			}
-
-			b.substreams[ss] = newrecset
-			records = newrecset
-		}
-
+	for _, records := range b.substreams {
+		//log.Printf("Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
 		for i, nr := range records {
 			var bb io.Reader = bytes.NewReader(nr.Data)

 			switch nr.RecType {
 			case RecTypeSST:
-				//log.Println(i, nr.RecType)
-
 				recSet := []*rec{nr}

 				lastIndex := i
@@ -162,6 +219,7 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
 					lastIndex++
 					recSet = append(recSet, records[lastIndex])
 				}
+
 				b.strings, err = parseSST(recSet)
 				if err != nil {
 					return err
@@ -170,7 +228,7 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
 			case RecTypeContinue:
 				// no-op (used above)
 			case RecTypeEOF:
-				log.Println("End Of Stream")
+				// done

 			case RecTypeBOF:
 				err = binary.Read(bb, binary.LittleEndian, b.h)
@@ -187,24 +245,21 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
 				if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
 					// we only support the workbook or worksheet substreams
 					log.Println("xls: unsupported document type")
-					break
+					//break
 				}

 			case RecTypeCodePage:
-				//log.Println(i, nr.RecType)
 				err = binary.Read(bb, binary.LittleEndian, &b.codepage)
 				if err != nil {
 					return err
 				}

 			case RecTypeDate1904:
-				//log.Println(i, nr.RecType)
 				err = binary.Read(bb, binary.LittleEndian, &b.dateMode)
 				if err != nil {
 					return err
 				}
 			case RecTypeBoundSheet8:
-				//log.Println(i, nr.RecType)
 				bs := &boundSheet{}
 				err = binary.Read(bb, binary.LittleEndian, &bs.Position)
 				if err != nil {
@@ -229,7 +284,6 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
 					return err
 				}
 				b.sheets = append(b.sheets, bs)
-				log.Println("SHEET", bs.Name, "at pos", bs.Position)
 			default:
 				//log.Println(i, "SKIPPED", nr.RecType)
 			}
@@ -239,8 +293,6 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
 	return err
 }

-var errSkipped = errors.New("xls: skipped record type")
-
 func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
 	var rt recordType
 	var rs uint16
@@ -248,8 +300,14 @@ func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
 	if err != nil {
 		return nil, err
 	}
+	if rt == 0 {
+		return nil, io.EOF
+	}

 	err = binary.Read(r, binary.LittleEndian, &rs)
+	if rs > 8224 {
+		return nil, errors.New("xls: invalid data format")
+	}
 	if err != nil {
 		return nil, err
 	}