1
0
mirror of https://github.com/pbnjay/grate.git synced 2025-03-04 16:16:03 +02:00

improve error handling/drop panics

This commit is contained in:
Jeremy Jay 2021-02-08 15:36:08 -05:00
parent 80c3b4cc81
commit bf6d144fa3
5 changed files with 93 additions and 29 deletions

View File

@ -4,6 +4,8 @@ import (
"context"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
@ -14,6 +16,7 @@ import (
)
func main() {
pretend := flag.Bool("p", false, "pretend to output .tsv")
//infoOnly := flag.Bool("i", false, "show info/stats ONLY")
removeNewlines := flag.Bool("r", true, "remove embedded tabs, newlines, and condense spaces in cell contents")
trimSpaces := flag.Bool("w", true, "trim whitespace from cell contents")
@ -45,9 +48,14 @@ func main() {
continue
}
s2 := sanitize.ReplaceAllString(s, "_")
f, err := os.Create(fn2 + "." + s2 + ".tsv")
if err != nil {
log.Fatal(err)
var w io.Writer = ioutil.Discard
if !*pretend {
f, err := os.Create(fn2 + "." + s2 + ".tsv")
if err != nil {
log.Fatal(err)
}
defer f.Close()
w = f
}
for sheet.Next() {
@ -66,10 +74,12 @@ func main() {
}
}
if nonblank || !*skipBlanks {
fmt.Fprintln(f, strings.Join(row, "\t"))
fmt.Fprintln(w, strings.Join(row, "\t"))
}
}
f.Close()
if c, ok := w.(io.Closer); ok {
c.Close()
}
}
}
}

View File

@ -76,7 +76,7 @@ type directory struct {
func (d *directory) String() string {
if (d.NameByteLen&1) == 1 || d.NameByteLen > 64 {
panic("invalid utf16 string")
return "<invalid utf16 string>"
}
r16 := utf16.Decode(d.Name[:int(d.NameByteLen)/2])
// trim off null terminator
@ -117,39 +117,39 @@ func (d *doc) load(rx io.ReadSeeker) error {
}
if fullAssertions {
if h.ClassID[0] != 0 || h.ClassID[1] != 0 {
panic("invalid CLSID")
return errors.New("ole2: invalid CLSID")
}
if h.MajorVersion != 3 && h.MajorVersion != 4 {
panic("unknown major version")
return errors.New("ole2: unknown major version")
}
if h.MinorVersion != 0x3E {
log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion)
//panic("unknown minor version")
//return errors.New("ole2: unknown minor version")
}
for _, v := range h.Reserved1 {
if v != 0 {
panic("reserved section is non-zero")
return errors.New("ole2: reserved section is non-zero")
}
}
if h.MajorVersion == 3 {
if h.SectorShift != 9 {
panic("invalid sector size")
return errors.New("ole2: invalid sector size")
}
if h.NumDirectorySectors != 0 {
panic("version 3 does not support directory sectors")
return errors.New("ole2: version 3 does not support directory sectors")
}
}
if h.MajorVersion == 4 {
if h.SectorShift != 12 {
panic("invalid sector size")
return errors.New("ole2: invalid sector size")
}
}
if h.MiniSectorShift != 6 {
panic("invalid mini sector size")
return errors.New("ole2: invalid mini sector size")
}
if h.MiniStreamCutoffSize != 0x00001000 {
panic("invalid mini sector cutoff")
return errors.New("ole2: invalid mini sector cutoff")
}
}
d.header = h
@ -166,6 +166,9 @@ func (d *doc) load(rx io.ReadSeeker) error {
break
}
offs := int64(1+sid) << int32(h.SectorShift)
if offs >= int64(len(d.data)) {
return errors.New("xls/cfb: unable to load file")
}
sector := d.data[offs:]
for j := 0; j < numFATentries; j++ {
sid2 := le.Uint32(sector)
@ -188,7 +191,7 @@ func (d *doc) load(rx io.ReadSeeker) error {
}
offs := int64(1+sid2) << int32(h.SectorShift)
if offs > int64(len(d.data)) {
if offs >= int64(len(d.data)) {
return errors.New("xls/cfb: unable to load file")
}
sector := d.data[offs:]
@ -209,18 +212,22 @@ func (d *doc) load(rx io.ReadSeeker) error {
sid := h.FirstMiniFATSectorLocation
for sid != secEndOfChain {
offs := int64(1+sid) << int32(h.SectorShift)
if offs >= int64(len(d.data)) {
return errors.New("xls/cfb: unable to load file")
}
sector := d.data[offs:]
for j := 0; j < numFATentries; j++ {
sid = le.Uint32(sector)
d.minifat = append(d.minifat, sid)
sector = sector[4:]
}
// chain the next mini FAT sector
sid = le.Uint32(sector)
if len(d.minifat) >= int(h.NumMiniFATSectors) {
break
}
// chain the next mini FAT sector
sid = le.Uint32(sector)
}
// step 3: read the Directory Entries
@ -270,7 +277,7 @@ func (d *doc) buildDirs(br *bytes.Reader) error {
return nil
}
func (d *doc) getStreamReader(sid uint32, size uint64) io.ReadSeeker {
func (d *doc) getStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
// NB streamData is a slice of slices of the raw data, so this is the
// only allocation - for the (much smaller) list of sector slices
streamData := make([][]byte, 1+(size>>d.header.SectorShift))
@ -294,13 +301,13 @@ func (d *doc) getStreamReader(sid uint32, size uint64) io.ReadSeeker {
x++
}
if size != 0 {
panic("incomplete read")
return nil, errors.New("ole2: incomplete read")
}
return &SliceReader{Data: streamData}
return &SliceReader{Data: streamData}, nil
}
func (d *doc) getMiniStreamReader(sid uint32, size uint64) io.ReadSeeker {
func (d *doc) getMiniStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
// TODO: move into a separate cache so we don't recalculate it each time
fatStreamData := make([][]byte, 1+(d.ministreamsize>>d.header.SectorShift))
@ -346,5 +353,5 @@ func (d *doc) getMiniStreamReader(sid uint32, size uint64) io.ReadSeeker {
sid = d.minifat[sid]
}
return &SliceReader{Data: streamData}
return &SliceReader{Data: streamData}, nil
}

View File

@ -45,9 +45,9 @@ func (d *doc) Open(name string) (io.ReadSeeker, error) {
for _, e := range d.dir {
if e.String() == name && e.ObjectType == typeStream {
if e.StreamSize < uint64(d.header.MiniStreamCutoffSize) {
return d.getMiniStreamReader(uint32(e.StartingSectorLocation), e.StreamSize), nil
return d.getMiniStreamReader(uint32(e.StartingSectorLocation), e.StreamSize)
} else if e.StreamSize != 0 {
return d.getStreamReader(uint32(e.StartingSectorLocation), e.StreamSize), nil
return d.getStreamReader(uint32(e.StartingSectorLocation), e.StreamSize)
}
}
}

View File

@ -99,8 +99,20 @@ func (s *WorkSheet) parse() error {
}
var formulaRow, formulaCol uint16
for _, r := range s.b.substreams[s.ss] {
for ridx, r := range s.b.substreams[s.ss] {
bb := bytes.NewReader(r.Data)
//log.Println(ridx, r.RecType)
// sec 2.1.7.20.6 Common Productions ABNF:
/*
CELLTABLE = 1*(1*Row *CELL 1*DBCell) *EntExU2
CELL = FORMULA / Blank / MulBlank / RK / MulRk / BoolErr / Number / LabelSst
FORMULA = [Uncalced] Formula [Array / Table / ShrFmla / SUB] [String *Continue]
Not parsed form the list above:
DBCell, EntExU2, Uncalced, Array, Table,ShrFmla
NB: no idea what "SUB" is
*/
switch r.RecType {
//case RecTypeWindow2:
@ -266,6 +278,27 @@ func (s *WorkSheet) parse() error {
binary.Read(bb, binary.LittleEndian, us)
fstr = string(utf16.Decode(us))
}
if (ridx + 1) < len(s.b.substreams[s.ss]) {
ridx2 := ridx + 1
nrecs := len(s.b.substreams[s.ss])
for ridx2 < nrecs {
r2 := s.b.substreams[s.ss][ridx2]
if r2.RecType != RecTypeContinue {
break
}
if (r2.Data[0] & 1) == 0 {
fstr += string(r2.Data[1:])
} else {
bb2 := bytes.NewReader(r2.Data[1:])
us := make([]uint16, len(r2.Data)-1)
binary.Read(bb2, binary.LittleEndian, us)
fstr += string(utf16.Decode(us))
}
ridx2++
}
}
s.placeValue(int(formulaRow), int(formulaCol), fstr)
case RecTypeLabelSst:
@ -276,7 +309,7 @@ func (s *WorkSheet) parse() error {
binary.Read(bb, binary.LittleEndian, &ixfe)
binary.Read(bb, binary.LittleEndian, &sstIndex)
if int(sstIndex) > len(s.b.strings) {
panic("invalid sst")
return errors.New("xls: invalid sst index")
}
s.placeValue(int(rowIndex), int(colIndex), s.b.strings[sstIndex])
//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
@ -284,6 +317,8 @@ func (s *WorkSheet) parse() error {
case RecTypeHLink:
loc := &shRef8{}
binary.Read(bb, binary.LittleEndian, loc)
loc.FirstCol &= 0x00FF // spec doesn't say what to do when MUST is disregarded...
loc.LastCol &= 0x00FF
var x uint64
binary.Read(bb, binary.LittleEndian, &x) // skip and discard classid
binary.Read(bb, binary.LittleEndian, &x)
@ -301,10 +336,18 @@ func (s *WorkSheet) parse() error {
binary.Read(bb, binary.LittleEndian, us)
str = string(utf16.Decode(us))
}
//log.Printf("hyperlink spec: %+v = %s", loc, str)
if loc.FirstCol > maxCol {
//log.Println("invalid hyperlink column")
continue
}
if uint32(loc.FirstRow) > maxRow {
//log.Println("invalid hyperlink row")
continue
}
// TODO: apply merge cell rules
s.placeValue(int(loc.FirstRow), int(loc.FirstCol), str)
log.Printf("hyperlink spec: %+v = %s", loc, str)
case RecTypeMergeCells:
var cmcs uint16
@ -317,6 +360,9 @@ func (s *WorkSheet) parse() error {
// log.Printf(" %d: %+v", j, mc)
// }
case RecTypeContinue:
// the only situation so far is when used in RecTypeString above
default:
//log.Println("worksheet", r.RecType, r.RecSize)

View File

@ -2,6 +2,7 @@ package xls
import (
"encoding/binary"
"errors"
"io"
"io/ioutil"
"unicode/utf16"
@ -173,7 +174,7 @@ func parseSST(recs []*rec) ([]string, error) {
current[j] = uint16(binary.LittleEndian.Uint16(buf[:2]))
buf = buf[2:]
if len(buf) == 1 {
panic("off by one")
return nil, errors.New("xls: off by one")
}
}
}