mirror of
https://github.com/pbnjay/grate.git
synced 2024-12-13 13:58:27 +02:00
many bugfixes and edge cases, impl most cell types
This commit is contained in:
parent
f794a5ef9b
commit
80c3b4cc81
@ -23,21 +23,27 @@ func main() {
|
||||
sanitize := regexp.MustCompile("[^a-zA-Z0-9]+")
|
||||
newlines := regexp.MustCompile("[ \n\r\t]+")
|
||||
for _, fn := range flag.Args() {
|
||||
log.Printf("Opening file '%s' ...", fn)
|
||||
wb, err := xls.Open(context.Background(), fn)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
log.Println(fn)
|
||||
|
||||
ext := filepath.Ext(fn)
|
||||
fn2 := filepath.Base(strings.TrimSuffix(fn, ext))
|
||||
|
||||
for _, s := range wb.Sheets() {
|
||||
log.Printf(" Opening Sheet '%s'...", s)
|
||||
sheet, err := wb.Get(s)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
if sheet.IsEmpty() {
|
||||
log.Println(" Empty sheet. Skipping.")
|
||||
continue
|
||||
}
|
||||
s2 := sanitize.ReplaceAllString(s, "_")
|
||||
f, err := os.Create(fn2 + "." + s2 + ".tsv")
|
||||
if err != nil {
|
||||
@ -61,7 +67,6 @@ func main() {
|
||||
}
|
||||
if nonblank || !*skipBlanks {
|
||||
fmt.Fprintln(f, strings.Join(row, "\t"))
|
||||
f.Sync()
|
||||
}
|
||||
}
|
||||
f.Close()
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"unicode/utf16"
|
||||
)
|
||||
|
||||
@ -122,7 +123,8 @@ func (d *doc) load(rx io.ReadSeeker) error {
|
||||
panic("unknown major version")
|
||||
}
|
||||
if h.MinorVersion != 0x3E {
|
||||
panic("unknown minor version")
|
||||
log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion)
|
||||
//panic("unknown minor version")
|
||||
}
|
||||
|
||||
for _, v := range h.Reserved1 {
|
||||
@ -186,6 +188,9 @@ func (d *doc) load(rx io.ReadSeeker) error {
|
||||
}
|
||||
|
||||
offs := int64(1+sid2) << int32(h.SectorShift)
|
||||
if offs > int64(len(d.data)) {
|
||||
return errors.New("xls/cfb: unable to load file")
|
||||
}
|
||||
sector := d.data[offs:]
|
||||
for j := 0; j < numFATentries; j++ {
|
||||
sid3 := le.Uint32(sector)
|
||||
@ -246,7 +251,7 @@ func (d *doc) buildDirs(br *bytes.Reader) error {
|
||||
d.ministreamstart = uint32(dirent.StartingSectorLocation)
|
||||
d.ministreamsize = uint32(dirent.StreamSize)
|
||||
case typeStorage:
|
||||
panic("got a storage? what to do now?")
|
||||
//log.Println("got a storage? what to do now?")
|
||||
case typeStream:
|
||||
/*
|
||||
var freader io.Reader
|
||||
|
@ -51,6 +51,10 @@ func (s *SliceReader) Seek(offset int64, whence int) (int64, error) {
|
||||
}
|
||||
// current offset in stream
|
||||
trueOffset := int64(s.Offset) + s.CSize[int(s.Index)]
|
||||
if offset == 0 && whence == io.SeekCurrent {
|
||||
// just asking for current position
|
||||
return trueOffset, nil
|
||||
}
|
||||
|
||||
switch whence {
|
||||
case io.SeekStart:
|
||||
|
@ -41,8 +41,10 @@ func (d *rc4Writer) Reset() {
|
||||
func (d *rc4Writer) Flush() {
|
||||
var zeros [1024]byte
|
||||
|
||||
endpad := 0
|
||||
if d.offset < 1024 {
|
||||
d.offset += copy(d.bytes[d.offset:], zeros[:])
|
||||
endpad = copy(d.bytes[d.offset:], zeros[:])
|
||||
d.offset += endpad
|
||||
}
|
||||
if d.offset != 1024 {
|
||||
panic("invalid offset fill")
|
||||
@ -51,7 +53,7 @@ func (d *rc4Writer) Flush() {
|
||||
// decrypt and write results to output buffer
|
||||
d.startBlock()
|
||||
d.dec.XORKeyStream(d.bytes[:], d.bytes[:])
|
||||
d.buf.Write(d.bytes[:])
|
||||
d.buf.Write(d.bytes[:1024-endpad])
|
||||
|
||||
d.offset = 0
|
||||
d.block++
|
||||
|
179
xls/sheets.go
179
xls/sheets.go
@ -12,9 +12,11 @@ import (
|
||||
)
|
||||
|
||||
func (b *WorkBook) Sheets() []string {
|
||||
res := make([]string, len(b.sheets))
|
||||
for i, s := range b.sheets {
|
||||
res[i] = s.Name
|
||||
res := make([]string, 0, len(b.sheets))
|
||||
for _, s := range b.sheets {
|
||||
if (s.HiddenState & 0x03) == 0 {
|
||||
res = append(res, s.Name)
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
@ -40,6 +42,7 @@ type WorkSheet struct {
|
||||
|
||||
rows []*row
|
||||
maxcol int
|
||||
empty bool
|
||||
|
||||
iterRow int
|
||||
}
|
||||
@ -79,30 +82,91 @@ func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
|
||||
s.rows[rowIndex].cols[colIndex] = val
|
||||
}
|
||||
|
||||
func (s *WorkSheet) IsEmpty() bool {
|
||||
return s.empty
|
||||
}
|
||||
|
||||
func (s *WorkSheet) parse() error {
|
||||
var minRow, maxRow uint32
|
||||
var minCol, maxCol uint16
|
||||
for _, r := range s.b.substreams[s.ss] {
|
||||
if r.RecType == RecTypeWsBool {
|
||||
if (r.Data[1] & 0x10) != 0 {
|
||||
// it's a dialog
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var formulaRow, formulaCol uint16
|
||||
for _, r := range s.b.substreams[s.ss] {
|
||||
bb := bytes.NewReader(r.Data)
|
||||
|
||||
switch r.RecType {
|
||||
case RecTypeWindow2:
|
||||
opts := binary.LittleEndian.Uint16(r.Data)
|
||||
// right-to-left = 0x40, selected = 0x400
|
||||
log.Printf("sheet options: %x", opts)
|
||||
//case RecTypeWindow2:
|
||||
//opts := binary.LittleEndian.Uint16(r.Data)
|
||||
// right-to-left = 0x40, selected = 0x400
|
||||
//log.Printf("sheet options: %x", opts)
|
||||
case RecTypeDimensions:
|
||||
binary.Read(bb, binary.LittleEndian, &minRow)
|
||||
binary.Read(bb, binary.LittleEndian, &maxRow)
|
||||
binary.Read(bb, binary.LittleEndian, &minCol)
|
||||
binary.Read(bb, binary.LittleEndian, &maxCol)
|
||||
//log.Printf("dimensions: %d,%d + %dx%d", minRow&0x0000FFFF, minCol,
|
||||
// (maxRow&0x0000FFFF)-(minRow&0x0000FFFF), maxCol-minCol)
|
||||
if minRow > 0x0000FFFF || maxRow > 0x00010000 {
|
||||
log.Println("invalid dimensions")
|
||||
}
|
||||
if minCol > 0x00FF || maxCol > 0x0100 {
|
||||
log.Println("invalid dimensions")
|
||||
}
|
||||
if (maxRow-minRow) == 0 && (maxCol-minCol) == 0 {
|
||||
s.empty = true
|
||||
}
|
||||
|
||||
case RecTypeRow:
|
||||
row := &shRow{}
|
||||
binary.Read(bb, binary.LittleEndian, row)
|
||||
log.Printf("row spec: %+v", *row)
|
||||
if (row.Reserved & 0xFFFF) != 0 {
|
||||
log.Println("invalid Row spec")
|
||||
continue
|
||||
}
|
||||
//log.Printf("row spec: %+v", *row)
|
||||
|
||||
case RecTypeBlank:
|
||||
var rowIndex, colIndex uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
log.Printf("blank spec: %d %d", rowIndex, colIndex)
|
||||
//log.Printf("blank spec: %d %d", rowIndex, colIndex)
|
||||
|
||||
case RecTypeBoolErr:
|
||||
var rowIndex, colIndex, ixfe uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
if r.Data[7] == 0 {
|
||||
bv := false
|
||||
if r.Data[6] == 1 {
|
||||
bv = true
|
||||
}
|
||||
s.placeValue(int(rowIndex), int(colIndex), bv)
|
||||
//log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv)
|
||||
} else {
|
||||
be, ok := berrLookup[r.Data[6]]
|
||||
if !ok {
|
||||
be = "<unknown error>"
|
||||
}
|
||||
s.placeValue(int(rowIndex), int(colIndex), be)
|
||||
//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
|
||||
}
|
||||
|
||||
case RecTypeMulBlank:
|
||||
var rowIndex, firstCol uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &firstCol)
|
||||
nrk := int((r.RecSize - 6) / 6)
|
||||
log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
|
||||
// nrk := int((r.RecSize - 6) / 6)
|
||||
// log.Printf("row blanks spec: %d %d %d", rowIndex, firstCol, nrk)
|
||||
|
||||
case RecTypeMulRk:
|
||||
mr := &shMulRK{}
|
||||
nrk := int((r.RecSize - 6) / 6)
|
||||
@ -123,8 +187,7 @@ func (s *WorkSheet) parse() error {
|
||||
s.placeValue(int(mr.RowIndex), int(mr.FirstCol)+i, rval)
|
||||
}
|
||||
binary.Read(bb, binary.LittleEndian, &mr.LastCol)
|
||||
|
||||
log.Printf("mulrow spec: %+v", *mr)
|
||||
//log.Printf("mulrow spec: %+v", *mr)
|
||||
|
||||
case RecTypeNumber:
|
||||
var rowIndex, colIndex, ixfe uint16
|
||||
@ -135,7 +198,7 @@ func (s *WorkSheet) parse() error {
|
||||
binary.Read(bb, binary.LittleEndian, &xnum)
|
||||
value := math.Float64frombits(xnum)
|
||||
s.placeValue(int(rowIndex), int(colIndex), value)
|
||||
log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
|
||||
//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
|
||||
|
||||
case RecTypeRK:
|
||||
var rowIndex, colIndex uint16
|
||||
@ -151,28 +214,59 @@ func (s *WorkSheet) parse() error {
|
||||
rval = rr.Value.Float64()
|
||||
}
|
||||
s.placeValue(int(rowIndex), int(colIndex), rval)
|
||||
log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
|
||||
//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
|
||||
|
||||
case RecTypeFormula:
|
||||
var rowIndex, colIndex uint16
|
||||
binary.Read(bb, binary.LittleEndian, &rowIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
|
||||
log.Printf("formula spec: %d %d ~~ %+v", rowIndex, colIndex, r.Data)
|
||||
var ixfe uint16
|
||||
binary.Read(bb, binary.LittleEndian, &formulaRow)
|
||||
binary.Read(bb, binary.LittleEndian, &formulaCol)
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
fdata := r.Data[6:]
|
||||
if fdata[6] == 0xFF && r.Data[7] == 0xFF {
|
||||
switch fdata[0] {
|
||||
case 0:
|
||||
// string in next record
|
||||
case 1:
|
||||
// boolean
|
||||
bv := false
|
||||
if fdata[2] != 0 {
|
||||
bv = true
|
||||
}
|
||||
s.placeValue(int(formulaRow), int(formulaCol), bv)
|
||||
case 2:
|
||||
// error value
|
||||
be, ok := berrLookup[fdata[2]]
|
||||
if !ok {
|
||||
be = "<unknown error>"
|
||||
}
|
||||
s.placeValue(int(formulaRow), int(formulaCol), be)
|
||||
case 3:
|
||||
// blank string
|
||||
default:
|
||||
log.Println("unknown formula value type")
|
||||
}
|
||||
} else {
|
||||
var xnum uint64
|
||||
binary.Read(bb, binary.LittleEndian, &xnum)
|
||||
value := math.Float64frombits(xnum)
|
||||
s.placeValue(int(formulaRow), int(formulaCol), value)
|
||||
}
|
||||
//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)
|
||||
|
||||
case RecTypeString:
|
||||
var charCount, flags uint16
|
||||
var charCount uint16
|
||||
var flags byte
|
||||
binary.Read(bb, binary.LittleEndian, &charCount)
|
||||
binary.Read(bb, binary.LittleEndian, &flags)
|
||||
s := ""
|
||||
fstr := ""
|
||||
if (flags & 1) == 0 {
|
||||
s = string(r.Data[4:])
|
||||
fstr = string(r.Data[3:])
|
||||
} else {
|
||||
us := make([]uint16, charCount)
|
||||
binary.Read(bb, binary.LittleEndian, us)
|
||||
s = string(utf16.Decode(us))
|
||||
fstr = string(utf16.Decode(us))
|
||||
}
|
||||
log.Printf("string spec: = %s", s)
|
||||
s.placeValue(int(formulaRow), int(formulaCol), fstr)
|
||||
|
||||
case RecTypeLabelSst:
|
||||
var rowIndex, colIndex, ixfe uint16
|
||||
@ -181,8 +275,11 @@ func (s *WorkSheet) parse() error {
|
||||
binary.Read(bb, binary.LittleEndian, &colIndex)
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
binary.Read(bb, binary.LittleEndian, &sstIndex)
|
||||
if int(sstIndex) > len(s.b.strings) {
|
||||
panic("invalid sst")
|
||||
}
|
||||
s.placeValue(int(rowIndex), int(colIndex), s.b.strings[sstIndex])
|
||||
log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
|
||||
//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
|
||||
|
||||
case RecTypeHLink:
|
||||
loc := &shRef8{}
|
||||
@ -214,13 +311,14 @@ func (s *WorkSheet) parse() error {
|
||||
binary.Read(bb, binary.LittleEndian, &cmcs)
|
||||
mcRefs := make([]shRef8, cmcs)
|
||||
binary.Read(bb, binary.LittleEndian, &mcRefs)
|
||||
log.Printf("MergeCells spec: %d records", cmcs)
|
||||
for j, mc := range mcRefs {
|
||||
log.Printf(" %d: %+v", j, mc)
|
||||
}
|
||||
//log.Printf("MergeCells spec: %d records", cmcs)
|
||||
// TODO: implement markers to annotate these in tabular output
|
||||
// for j, mc := range mcRefs {
|
||||
// log.Printf(" %d: %+v", j, mc)
|
||||
// }
|
||||
|
||||
default:
|
||||
log.Println("worksheet", r.RecType, r.RecSize)
|
||||
//log.Println("worksheet", r.RecType, r.RecSize)
|
||||
|
||||
}
|
||||
}
|
||||
@ -249,13 +347,15 @@ func (s *WorkSheet) Strings() []string {
|
||||
}
|
||||
|
||||
// Scan extracts values from the row into the provided arguments
|
||||
// Arguments must be pointers to one of 4 supported types:
|
||||
// int, float64, string, or time.Time
|
||||
// Arguments must be pointers to one of 5 supported types:
|
||||
// bool, int, float64, string, or time.Time
|
||||
func (s *WorkSheet) Scan(args ...interface{}) error {
|
||||
currow := s.rows[s.iterRow]
|
||||
|
||||
for i, a := range args {
|
||||
switch v := a.(type) {
|
||||
case *bool:
|
||||
*v = currow.cols[i].(bool)
|
||||
case *int:
|
||||
*v = currow.cols[i].(int)
|
||||
case *float64:
|
||||
@ -272,4 +372,15 @@ func (s *WorkSheet) Scan(args ...interface{}) error {
|
||||
}
|
||||
|
||||
// ErrInvalidType is returned by Scan for invalid arguments.
|
||||
var ErrInvalidType = errors.New("xls: Scan only supports *int, *float64, *string, *time.Time arguments")
|
||||
var ErrInvalidType = errors.New("xls: Scan only supports *bool, *int, *float64, *string, *time.Time arguments")
|
||||
|
||||
var berrLookup = map[byte]string{
|
||||
0x00: "#NULL!",
|
||||
0x07: "#DIV/0!",
|
||||
0x0F: "#VALUE!",
|
||||
0x17: "#REF!",
|
||||
0x1D: "#NAME?",
|
||||
0x24: "#NUM!",
|
||||
0x2A: "#N/A",
|
||||
0x2B: "#GETTING_DATA",
|
||||
}
|
||||
|
@ -14,14 +14,14 @@ func TestHeader(t *testing.T) {
|
||||
log.Println(wb.filename)
|
||||
|
||||
for _, s := range wb.Sheets() {
|
||||
log.Println(s)
|
||||
//log.Println(s)
|
||||
sheet, err := wb.Get(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for sheet.Next() {
|
||||
log.Println(sheet.Strings())
|
||||
sheet.Strings()
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -32,6 +32,18 @@ func TestHeader2(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
log.Println(wb.filename)
|
||||
|
||||
for _, s := range wb.Sheets() {
|
||||
//log.Println(s)
|
||||
sheet, err := wb.Get(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for sheet.Next() {
|
||||
sheet.Strings()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeader3(t *testing.T) {
|
||||
@ -40,6 +52,18 @@ func TestHeader3(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
log.Println(wb.filename)
|
||||
|
||||
for _, s := range wb.Sheets() {
|
||||
//log.Println(s)
|
||||
sheet, err := wb.Get(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for sheet.Next() {
|
||||
sheet.Strings()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeader4(t *testing.T) {
|
||||
@ -49,4 +73,16 @@ func TestHeader4(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
log.Println(wb.filename)
|
||||
|
||||
for _, s := range wb.Sheets() {
|
||||
//log.Println(s)
|
||||
sheet, err := wb.Get(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for sheet.Next() {
|
||||
sheet.Strings()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
100
xls/strings.go
100
xls/strings.go
@ -4,7 +4,6 @@ import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"unicode/utf16"
|
||||
)
|
||||
|
||||
@ -45,36 +44,29 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
|
||||
var cbExtRs int32
|
||||
err := binary.Read(r, binary.LittleEndian, &cch)
|
||||
if err != nil {
|
||||
log.Println("x1", err)
|
||||
return "", err
|
||||
}
|
||||
err = binary.Read(r, binary.LittleEndian, &flags)
|
||||
if err != nil {
|
||||
log.Println("x2", err)
|
||||
return "", err
|
||||
}
|
||||
if (flags & 0x8) != 0 {
|
||||
log.Println("FORMATTING PRESENT")
|
||||
// rich formating data is present
|
||||
err = binary.Read(r, binary.LittleEndian, &cRun)
|
||||
if err != nil {
|
||||
log.Println("x3", err)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
if (flags & 0x4) != 0 {
|
||||
log.Println("PHONETIC PRESENT")
|
||||
// phonetic string data is present
|
||||
err = binary.Read(r, binary.LittleEndian, &cbExtRs)
|
||||
if err != nil {
|
||||
log.Println("x4", err)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
content := make([]uint16, cch)
|
||||
if (flags & 0x1) == 0 {
|
||||
log.Println("8BIT DATA", cch)
|
||||
// 16-bit characters but only the bottom 8bits
|
||||
contentBytes := make([]byte, cch)
|
||||
n, err2 := io.ReadFull(r, contentBytes)
|
||||
@ -91,30 +83,25 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
|
||||
}
|
||||
|
||||
} else {
|
||||
log.Println("16BIT DATA", cch)
|
||||
// 16-bit characters
|
||||
err = binary.Read(r, binary.LittleEndian, content)
|
||||
}
|
||||
if err != nil {
|
||||
log.Println("x5", err)
|
||||
return "", err
|
||||
}
|
||||
//////
|
||||
|
||||
if cRun > 0 {
|
||||
log.Println("READING FORMATTING DATA")
|
||||
// rich formating data is present
|
||||
_, err = io.CopyN(ioutil.Discard, r, int64(cRun)*4)
|
||||
if err != nil {
|
||||
log.Println("x6", err)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
if cbExtRs > 0 {
|
||||
log.Println("READING PHONETIC DATA")
|
||||
// phonetic string data is present
|
||||
n, err := io.CopyN(ioutil.Discard, r, int64(cbExtRs))
|
||||
_, err = io.CopyN(ioutil.Discard, r, int64(cbExtRs))
|
||||
if err != nil {
|
||||
log.Println("x7", n, cbExtRs, err)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
@ -125,25 +112,16 @@ func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) {
|
||||
|
||||
// read in an array of XLUnicodeRichExtendedString s
|
||||
func parseSST(recs []*rec) ([]string, error) {
|
||||
totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
|
||||
//totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4])
|
||||
numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8])
|
||||
|
||||
// cell count limit is 65k x 256
|
||||
if numStrings > 65536*256 {
|
||||
log.Println("INVALID COUNTS total=", totalRefs, " -- n strings=", numStrings)
|
||||
totalRefs = 0
|
||||
numStrings = 65536 * 256
|
||||
}
|
||||
|
||||
log.Println("total=", totalRefs, " -- n strings=", numStrings)
|
||||
all := make([]string, 0, numStrings)
|
||||
|
||||
buf := recs[0].Data[8:]
|
||||
for i := 0; i < len(recs); {
|
||||
var blen int
|
||||
var cRunBytes int
|
||||
var flags byte
|
||||
var current []byte
|
||||
var current []uint16
|
||||
var cbExtRs uint32
|
||||
|
||||
for len(buf) > 0 {
|
||||
@ -152,12 +130,6 @@ func parseSST(recs []*rec) ([]string, error) {
|
||||
flags = buf[0]
|
||||
buf = buf[1:]
|
||||
|
||||
blen = int(slen)
|
||||
if (flags & 0x1) != 0 {
|
||||
// 16-bit characters
|
||||
blen = int(slen) * 2
|
||||
}
|
||||
|
||||
if (flags & 0x8) != 0 {
|
||||
// rich formating data is present
|
||||
cRun := binary.LittleEndian.Uint16(buf)
|
||||
@ -170,37 +142,47 @@ func parseSST(recs []*rec) ([]string, error) {
|
||||
buf = buf[4:]
|
||||
}
|
||||
|
||||
///////
|
||||
blx := len(buf)
|
||||
bly := len(buf) - 5
|
||||
if blx > 5 {
|
||||
blx = 5
|
||||
}
|
||||
if bly < 0 {
|
||||
bly = 0
|
||||
}
|
||||
|
||||
// this block will read the string data, but transparently
|
||||
// handle continuing across records
|
||||
current = make([]byte, blen)
|
||||
n := copy(current, buf)
|
||||
current = current[:n]
|
||||
buf = buf[n:]
|
||||
for len(current) < blen {
|
||||
i++
|
||||
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
|
||||
|
||||
n = int(blen) - len(current)
|
||||
if n > len(buf) {
|
||||
n = len(buf)
|
||||
current = make([]uint16, slen)
|
||||
for j := 0; j < int(slen); j++ {
|
||||
if len(buf) == 0 {
|
||||
i++
|
||||
if (recs[i].Data[0] & 1) == 0 {
|
||||
flags &= 0xFE
|
||||
} else {
|
||||
flags |= 1
|
||||
}
|
||||
buf = recs[i].Data[1:]
|
||||
}
|
||||
|
||||
if (flags & 1) == 0 { //8-bit
|
||||
current[j] = uint16(buf[0])
|
||||
buf = buf[1:]
|
||||
} else { //16-bit
|
||||
current[j] = uint16(binary.LittleEndian.Uint16(buf[:2]))
|
||||
buf = buf[2:]
|
||||
if len(buf) == 1 {
|
||||
panic("off by one")
|
||||
}
|
||||
}
|
||||
current = append(current, buf[:n]...)
|
||||
buf = buf[n:]
|
||||
}
|
||||
|
||||
if (flags & 1) == 0 {
|
||||
s := string(current)
|
||||
all = append(all, s)
|
||||
} else {
|
||||
x := make([]uint16, len(current)/2)
|
||||
for y := 0; y < len(current); y += 2 {
|
||||
x[y/2] = binary.LittleEndian.Uint16(current[y : y+2])
|
||||
}
|
||||
s := string(utf16.Decode(x))
|
||||
all = append(all, s)
|
||||
}
|
||||
s := string(utf16.Decode(current))
|
||||
all = append(all, s)
|
||||
|
||||
///////
|
||||
|
||||
//log.Println(len(all), all[len(all)-1])
|
||||
for cRunBytes > 0 {
|
||||
if len(buf) >= int(cRunBytes) {
|
||||
buf = buf[cRunBytes:]
|
||||
@ -208,7 +190,7 @@ func parseSST(recs []*rec) ([]string, error) {
|
||||
} else {
|
||||
cRunBytes -= len(buf)
|
||||
i++
|
||||
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
|
||||
buf = recs[i].Data
|
||||
}
|
||||
}
|
||||
|
||||
@ -219,7 +201,7 @@ func parseSST(recs []*rec) ([]string, error) {
|
||||
} else {
|
||||
cbExtRs -= uint32(len(buf))
|
||||
i++
|
||||
buf = recs[i].Data[1:] // skip flag TODO: verify always zero?
|
||||
buf = recs[i].Data
|
||||
}
|
||||
}
|
||||
}
|
||||
|
192
xls/xls.go
192
xls/xls.go
@ -23,6 +23,7 @@ type WorkBook struct {
|
||||
ctx context.Context
|
||||
doc cfb.Document
|
||||
|
||||
prot bool
|
||||
h *header
|
||||
sheets []*boundSheet
|
||||
codepage uint16
|
||||
@ -34,8 +35,10 @@ type WorkBook struct {
|
||||
|
||||
fpos int64
|
||||
pos2substream map[int64]int
|
||||
}
|
||||
|
||||
decryptors map[int]crypto.Decryptor
|
||||
func (b *WorkBook) IsProtected() bool {
|
||||
return b.prot
|
||||
}
|
||||
|
||||
func Open(ctx context.Context, filename string) (*WorkBook, error) {
|
||||
@ -56,33 +59,134 @@ func Open(ctx context.Context, filename string) (*WorkBook, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
//br := bufio.NewReader(rdr)
|
||||
err = b.loadFromStream(rdr)
|
||||
return b, err
|
||||
}
|
||||
|
||||
func (b *WorkBook) loadFromStream(r io.Reader) error {
|
||||
b.decryptors = make(map[int]crypto.Decryptor)
|
||||
func (b *WorkBook) loadFromStream(r io.ReadSeeker) error {
|
||||
return b.loadFromStream2(r, false)
|
||||
}
|
||||
|
||||
func (b *WorkBook) loadFromStreamWithDecryptor(r io.ReadSeeker, dec crypto.Decryptor) error {
|
||||
_, err := r.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
log.Println("xls: dec-seek1 failed")
|
||||
return err
|
||||
}
|
||||
|
||||
zeros := [8224]byte{}
|
||||
|
||||
type overlay struct {
|
||||
Pos int64
|
||||
|
||||
RecType recordType
|
||||
DataBytes uint16
|
||||
Data []byte // NB len() not necessarily = DataBytes
|
||||
}
|
||||
replaceBlocks := []overlay{}
|
||||
|
||||
obuf := &bytes.Buffer{}
|
||||
for err == nil {
|
||||
o := overlay{}
|
||||
o.Pos, _ = r.Seek(0, io.SeekCurrent)
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &o.RecType)
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
continue
|
||||
}
|
||||
log.Println("xls: dec-read1 failed")
|
||||
return err
|
||||
}
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &o.DataBytes)
|
||||
if err != nil {
|
||||
log.Println("xls: dec-read2 failed")
|
||||
return err
|
||||
}
|
||||
|
||||
// copy to output and decryption stream
|
||||
binary.Write(dec, binary.LittleEndian, o.RecType)
|
||||
binary.Write(dec, binary.LittleEndian, o.DataBytes)
|
||||
tocopy := int(o.DataBytes)
|
||||
|
||||
switch o.RecType {
|
||||
case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
|
||||
// copy original data into output
|
||||
o.Data = make([]byte, o.DataBytes)
|
||||
_, err = io.ReadFull(r, o.Data)
|
||||
if err != nil {
|
||||
log.Println("FAIL err", err)
|
||||
}
|
||||
dec.Write(zeros[:int(o.DataBytes)])
|
||||
tocopy = 0
|
||||
|
||||
case RecTypeBoundSheet8:
|
||||
// copy 32-bit position to output
|
||||
o.Data = make([]byte, 4)
|
||||
_, err = io.ReadFull(r, o.Data)
|
||||
if err != nil {
|
||||
log.Println("FAIL err", err)
|
||||
}
|
||||
dec.Write(zeros[:4])
|
||||
tocopy -= 4
|
||||
}
|
||||
|
||||
if tocopy > 0 {
|
||||
_, err = io.CopyN(dec, r, int64(tocopy))
|
||||
}
|
||||
replaceBlocks = append(replaceBlocks, o)
|
||||
}
|
||||
dec.Flush()
|
||||
io.Copy(obuf, dec)
|
||||
|
||||
alldata := obuf.Bytes()
|
||||
for _, o := range replaceBlocks {
|
||||
offs := int(o.Pos)
|
||||
binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType))
|
||||
binary.LittleEndian.PutUint16(alldata[offs+2:], uint16(o.DataBytes))
|
||||
if len(o.Data) > 0 {
|
||||
offs += 4
|
||||
copy(alldata[offs:], o.Data)
|
||||
}
|
||||
}
|
||||
|
||||
return b.loadFromStream2(bytes.NewReader(alldata), true)
|
||||
}
|
||||
|
||||
func (b *WorkBook) loadFromStream2(r io.ReadSeeker, isDecrypted bool) error {
|
||||
b.h = &header{}
|
||||
substr := -1
|
||||
nestedBOF := 0
|
||||
b.substreams = b.substreams[:0]
|
||||
b.pos2substream = make(map[int64]int, 10)
|
||||
b.fpos = 0
|
||||
nr, err := b.nextRecord(r)
|
||||
for err == nil {
|
||||
if nr.RecType == RecTypeBOF {
|
||||
substr++
|
||||
b.substreams = append(b.substreams, []*rec{})
|
||||
b.pos2substream[b.fpos] = substr
|
||||
switch nr.RecType {
|
||||
case RecTypeEOF:
|
||||
nestedBOF--
|
||||
case RecTypeBOF:
|
||||
// when substreams are nested, keep them in the same grouping
|
||||
if nestedBOF == 0 {
|
||||
substr = len(b.substreams)
|
||||
b.substreams = append(b.substreams, []*rec{})
|
||||
b.pos2substream[b.fpos] = substr
|
||||
}
|
||||
nestedBOF++
|
||||
}
|
||||
b.fpos += int64(4 + len(nr.Data))
|
||||
|
||||
if nr.RecType == RecTypeFilePass {
|
||||
if nr.RecType == RecTypeFilePass && !isDecrypted {
|
||||
etype := binary.LittleEndian.Uint16(nr.Data)
|
||||
switch etype {
|
||||
case 1:
|
||||
b.decryptors[substr], err = crypto.NewBasicRC4(nr.Data[2:])
|
||||
dec, err := crypto.NewBasicRC4(nr.Data[2:])
|
||||
if err != nil {
|
||||
log.Println("xls: rc4 encryption failed to set up", err)
|
||||
return err
|
||||
}
|
||||
return b.loadFromStreamWithDecryptor(r, dec)
|
||||
case 2, 3, 4:
|
||||
log.Println("need Crypto API RC4 decryptor")
|
||||
return errors.New("xls: unsupported Crypto API encryption method")
|
||||
@ -101,60 +205,13 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
|
||||
return err
|
||||
}
|
||||
|
||||
for ss, records := range b.substreams {
|
||||
log.Printf("Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
|
||||
|
||||
if dec, ok := b.decryptors[ss]; ok {
|
||||
log.Printf("Decrypting substream...")
|
||||
|
||||
dec.Reset()
|
||||
var head [4]byte
|
||||
for _, nr := range records {
|
||||
binary.LittleEndian.PutUint16(head[:], uint16(nr.RecType))
|
||||
binary.LittleEndian.PutUint16(head[2:], nr.RecSize)
|
||||
|
||||
// send the record for decryption
|
||||
dec.Write(head[:])
|
||||
dec.Write(nr.Data)
|
||||
}
|
||||
dec.Flush()
|
||||
|
||||
newrecset := make([]*rec, 0, len(records))
|
||||
for _, nr := range records {
|
||||
dec.Read(head[:]) // discard 4 byte header
|
||||
|
||||
dr := &rec{
|
||||
RecType: nr.RecType,
|
||||
RecSize: nr.RecSize,
|
||||
Data: make([]byte, int(nr.RecSize)),
|
||||
}
|
||||
dec.Read(dr.Data)
|
||||
|
||||
switch nr.RecType {
|
||||
case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead:
|
||||
// keep original data
|
||||
copy(dr.Data, nr.Data)
|
||||
case RecTypeBoundSheet8:
|
||||
// copy the position un-decrypted
|
||||
copy(dr.Data[:4], nr.Data)
|
||||
default:
|
||||
// apply decryption
|
||||
}
|
||||
|
||||
newrecset = append(newrecset, dr)
|
||||
}
|
||||
|
||||
b.substreams[ss] = newrecset
|
||||
records = newrecset
|
||||
}
|
||||
|
||||
for _, records := range b.substreams {
|
||||
//log.Printf("Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records))
|
||||
for i, nr := range records {
|
||||
var bb io.Reader = bytes.NewReader(nr.Data)
|
||||
|
||||
switch nr.RecType {
|
||||
case RecTypeSST:
|
||||
//log.Println(i, nr.RecType)
|
||||
|
||||
recSet := []*rec{nr}
|
||||
|
||||
lastIndex := i
|
||||
@ -162,6 +219,7 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
|
||||
lastIndex++
|
||||
recSet = append(recSet, records[lastIndex])
|
||||
}
|
||||
|
||||
b.strings, err = parseSST(recSet)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -170,7 +228,7 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
|
||||
case RecTypeContinue:
|
||||
// no-op (used above)
|
||||
case RecTypeEOF:
|
||||
log.Println("End Of Stream")
|
||||
// done
|
||||
|
||||
case RecTypeBOF:
|
||||
err = binary.Read(bb, binary.LittleEndian, b.h)
|
||||
@ -187,24 +245,21 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
|
||||
if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
|
||||
// we only support the workbook or worksheet substreams
|
||||
log.Println("xls: unsupported document type")
|
||||
break
|
||||
//break
|
||||
}
|
||||
|
||||
case RecTypeCodePage:
|
||||
//log.Println(i, nr.RecType)
|
||||
err = binary.Read(bb, binary.LittleEndian, &b.codepage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case RecTypeDate1904:
|
||||
//log.Println(i, nr.RecType)
|
||||
err = binary.Read(bb, binary.LittleEndian, &b.dateMode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case RecTypeBoundSheet8:
|
||||
//log.Println(i, nr.RecType)
|
||||
bs := &boundSheet{}
|
||||
err = binary.Read(bb, binary.LittleEndian, &bs.Position)
|
||||
if err != nil {
|
||||
@ -229,7 +284,6 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
|
||||
return err
|
||||
}
|
||||
b.sheets = append(b.sheets, bs)
|
||||
log.Println("SHEET", bs.Name, "at pos", bs.Position)
|
||||
default:
|
||||
//log.Println(i, "SKIPPED", nr.RecType)
|
||||
}
|
||||
@ -239,8 +293,6 @@ func (b *WorkBook) loadFromStream(r io.Reader) error {
|
||||
return err
|
||||
}
|
||||
|
||||
var errSkipped = errors.New("xls: skipped record type")
|
||||
|
||||
func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
|
||||
var rt recordType
|
||||
var rs uint16
|
||||
@ -248,8 +300,14 @@ func (b *WorkBook) nextRecord(r io.Reader) (*rec, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if rt == 0 {
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
err = binary.Read(r, binary.LittleEndian, &rs)
|
||||
if rs > 8224 {
|
||||
return nil, errors.New("xls: invalid data format")
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user