mirror of
https://github.com/pbnjay/grate.git
synced 2025-03-04 16:16:03 +02:00
improve error handling/drop panics
This commit is contained in:
parent
80c3b4cc81
commit
bf6d144fa3
@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@ -14,6 +16,7 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
pretend := flag.Bool("p", false, "pretend to output .tsv")
|
||||
//infoOnly := flag.Bool("i", false, "show info/stats ONLY")
|
||||
removeNewlines := flag.Bool("r", true, "remove embedded tabs, newlines, and condense spaces in cell contents")
|
||||
trimSpaces := flag.Bool("w", true, "trim whitespace from cell contents")
|
||||
@ -45,9 +48,14 @@ func main() {
|
||||
continue
|
||||
}
|
||||
s2 := sanitize.ReplaceAllString(s, "_")
|
||||
f, err := os.Create(fn2 + "." + s2 + ".tsv")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
var w io.Writer = ioutil.Discard
|
||||
if !*pretend {
|
||||
f, err := os.Create(fn2 + "." + s2 + ".tsv")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
w = f
|
||||
}
|
||||
|
||||
for sheet.Next() {
|
||||
@ -66,10 +74,12 @@ func main() {
|
||||
}
|
||||
}
|
||||
if nonblank || !*skipBlanks {
|
||||
fmt.Fprintln(f, strings.Join(row, "\t"))
|
||||
fmt.Fprintln(w, strings.Join(row, "\t"))
|
||||
}
|
||||
}
|
||||
f.Close()
|
||||
if c, ok := w.(io.Closer); ok {
|
||||
c.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ type directory struct {
|
||||
|
||||
func (d *directory) String() string {
|
||||
if (d.NameByteLen&1) == 1 || d.NameByteLen > 64 {
|
||||
panic("invalid utf16 string")
|
||||
return "<invalid utf16 string>"
|
||||
}
|
||||
r16 := utf16.Decode(d.Name[:int(d.NameByteLen)/2])
|
||||
// trim off null terminator
|
||||
@ -117,39 +117,39 @@ func (d *doc) load(rx io.ReadSeeker) error {
|
||||
}
|
||||
if fullAssertions {
|
||||
if h.ClassID[0] != 0 || h.ClassID[1] != 0 {
|
||||
panic("invalid CLSID")
|
||||
return errors.New("ole2: invalid CLSID")
|
||||
}
|
||||
if h.MajorVersion != 3 && h.MajorVersion != 4 {
|
||||
panic("unknown major version")
|
||||
return errors.New("ole2: unknown major version")
|
||||
}
|
||||
if h.MinorVersion != 0x3E {
|
||||
log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion)
|
||||
//panic("unknown minor version")
|
||||
//return errors.New("ole2: unknown minor version")
|
||||
}
|
||||
|
||||
for _, v := range h.Reserved1 {
|
||||
if v != 0 {
|
||||
panic("reserved section is non-zero")
|
||||
return errors.New("ole2: reserved section is non-zero")
|
||||
}
|
||||
}
|
||||
if h.MajorVersion == 3 {
|
||||
if h.SectorShift != 9 {
|
||||
panic("invalid sector size")
|
||||
return errors.New("ole2: invalid sector size")
|
||||
}
|
||||
if h.NumDirectorySectors != 0 {
|
||||
panic("version 3 does not support directory sectors")
|
||||
return errors.New("ole2: version 3 does not support directory sectors")
|
||||
}
|
||||
}
|
||||
if h.MajorVersion == 4 {
|
||||
if h.SectorShift != 12 {
|
||||
panic("invalid sector size")
|
||||
return errors.New("ole2: invalid sector size")
|
||||
}
|
||||
}
|
||||
if h.MiniSectorShift != 6 {
|
||||
panic("invalid mini sector size")
|
||||
return errors.New("ole2: invalid mini sector size")
|
||||
}
|
||||
if h.MiniStreamCutoffSize != 0x00001000 {
|
||||
panic("invalid mini sector cutoff")
|
||||
return errors.New("ole2: invalid mini sector cutoff")
|
||||
}
|
||||
}
|
||||
d.header = h
|
||||
@ -166,6 +166,9 @@ func (d *doc) load(rx io.ReadSeeker) error {
|
||||
break
|
||||
}
|
||||
offs := int64(1+sid) << int32(h.SectorShift)
|
||||
if offs >= int64(len(d.data)) {
|
||||
return errors.New("xls/cfb: unable to load file")
|
||||
}
|
||||
sector := d.data[offs:]
|
||||
for j := 0; j < numFATentries; j++ {
|
||||
sid2 := le.Uint32(sector)
|
||||
@ -188,7 +191,7 @@ func (d *doc) load(rx io.ReadSeeker) error {
|
||||
}
|
||||
|
||||
offs := int64(1+sid2) << int32(h.SectorShift)
|
||||
if offs > int64(len(d.data)) {
|
||||
if offs >= int64(len(d.data)) {
|
||||
return errors.New("xls/cfb: unable to load file")
|
||||
}
|
||||
sector := d.data[offs:]
|
||||
@ -209,18 +212,22 @@ func (d *doc) load(rx io.ReadSeeker) error {
|
||||
sid := h.FirstMiniFATSectorLocation
|
||||
for sid != secEndOfChain {
|
||||
offs := int64(1+sid) << int32(h.SectorShift)
|
||||
if offs >= int64(len(d.data)) {
|
||||
return errors.New("xls/cfb: unable to load file")
|
||||
}
|
||||
sector := d.data[offs:]
|
||||
for j := 0; j < numFATentries; j++ {
|
||||
sid = le.Uint32(sector)
|
||||
d.minifat = append(d.minifat, sid)
|
||||
sector = sector[4:]
|
||||
}
|
||||
// chain the next mini FAT sector
|
||||
sid = le.Uint32(sector)
|
||||
|
||||
if len(d.minifat) >= int(h.NumMiniFATSectors) {
|
||||
break
|
||||
}
|
||||
|
||||
// chain the next mini FAT sector
|
||||
sid = le.Uint32(sector)
|
||||
}
|
||||
|
||||
// step 3: read the Directory Entries
|
||||
@ -270,7 +277,7 @@ func (d *doc) buildDirs(br *bytes.Reader) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *doc) getStreamReader(sid uint32, size uint64) io.ReadSeeker {
|
||||
func (d *doc) getStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
|
||||
// NB streamData is a slice of slices of the raw data, so this is the
|
||||
// only allocation - for the (much smaller) list of sector slices
|
||||
streamData := make([][]byte, 1+(size>>d.header.SectorShift))
|
||||
@ -294,13 +301,13 @@ func (d *doc) getStreamReader(sid uint32, size uint64) io.ReadSeeker {
|
||||
x++
|
||||
}
|
||||
if size != 0 {
|
||||
panic("incomplete read")
|
||||
return nil, errors.New("ole2: incomplete read")
|
||||
}
|
||||
|
||||
return &SliceReader{Data: streamData}
|
||||
return &SliceReader{Data: streamData}, nil
|
||||
}
|
||||
|
||||
func (d *doc) getMiniStreamReader(sid uint32, size uint64) io.ReadSeeker {
|
||||
func (d *doc) getMiniStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
|
||||
// TODO: move into a separate cache so we don't recalculate it each time
|
||||
fatStreamData := make([][]byte, 1+(d.ministreamsize>>d.header.SectorShift))
|
||||
|
||||
@ -346,5 +353,5 @@ func (d *doc) getMiniStreamReader(sid uint32, size uint64) io.ReadSeeker {
|
||||
sid = d.minifat[sid]
|
||||
}
|
||||
|
||||
return &SliceReader{Data: streamData}
|
||||
return &SliceReader{Data: streamData}, nil
|
||||
}
|
||||
|
@ -45,9 +45,9 @@ func (d *doc) Open(name string) (io.ReadSeeker, error) {
|
||||
for _, e := range d.dir {
|
||||
if e.String() == name && e.ObjectType == typeStream {
|
||||
if e.StreamSize < uint64(d.header.MiniStreamCutoffSize) {
|
||||
return d.getMiniStreamReader(uint32(e.StartingSectorLocation), e.StreamSize), nil
|
||||
return d.getMiniStreamReader(uint32(e.StartingSectorLocation), e.StreamSize)
|
||||
} else if e.StreamSize != 0 {
|
||||
return d.getStreamReader(uint32(e.StartingSectorLocation), e.StreamSize), nil
|
||||
return d.getStreamReader(uint32(e.StartingSectorLocation), e.StreamSize)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -99,8 +99,20 @@ func (s *WorkSheet) parse() error {
|
||||
}
|
||||
|
||||
var formulaRow, formulaCol uint16
|
||||
for _, r := range s.b.substreams[s.ss] {
|
||||
for ridx, r := range s.b.substreams[s.ss] {
|
||||
bb := bytes.NewReader(r.Data)
|
||||
//log.Println(ridx, r.RecType)
|
||||
|
||||
// sec 2.1.7.20.6 Common Productions ABNF:
|
||||
/*
|
||||
CELLTABLE = 1*(1*Row *CELL 1*DBCell) *EntExU2
|
||||
CELL = FORMULA / Blank / MulBlank / RK / MulRk / BoolErr / Number / LabelSst
|
||||
FORMULA = [Uncalced] Formula [Array / Table / ShrFmla / SUB] [String *Continue]
|
||||
|
||||
Not parsed form the list above:
|
||||
DBCell, EntExU2, Uncalced, Array, Table,ShrFmla
|
||||
NB: no idea what "SUB" is
|
||||
*/
|
||||
|
||||
switch r.RecType {
|
||||
//case RecTypeWindow2:
|
||||
@ -266,6 +278,27 @@ func (s *WorkSheet) parse() error {
|
||||
binary.Read(bb, binary.LittleEndian, us)
|
||||
fstr = string(utf16.Decode(us))
|
||||
}
|
||||
|
||||
if (ridx + 1) < len(s.b.substreams[s.ss]) {
|
||||
ridx2 := ridx + 1
|
||||
nrecs := len(s.b.substreams[s.ss])
|
||||
for ridx2 < nrecs {
|
||||
r2 := s.b.substreams[s.ss][ridx2]
|
||||
if r2.RecType != RecTypeContinue {
|
||||
break
|
||||
}
|
||||
if (r2.Data[0] & 1) == 0 {
|
||||
fstr += string(r2.Data[1:])
|
||||
} else {
|
||||
bb2 := bytes.NewReader(r2.Data[1:])
|
||||
us := make([]uint16, len(r2.Data)-1)
|
||||
binary.Read(bb2, binary.LittleEndian, us)
|
||||
fstr += string(utf16.Decode(us))
|
||||
}
|
||||
ridx2++
|
||||
}
|
||||
}
|
||||
|
||||
s.placeValue(int(formulaRow), int(formulaCol), fstr)
|
||||
|
||||
case RecTypeLabelSst:
|
||||
@ -276,7 +309,7 @@ func (s *WorkSheet) parse() error {
|
||||
binary.Read(bb, binary.LittleEndian, &ixfe)
|
||||
binary.Read(bb, binary.LittleEndian, &sstIndex)
|
||||
if int(sstIndex) > len(s.b.strings) {
|
||||
panic("invalid sst")
|
||||
return errors.New("xls: invalid sst index")
|
||||
}
|
||||
s.placeValue(int(rowIndex), int(colIndex), s.b.strings[sstIndex])
|
||||
//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
|
||||
@ -284,6 +317,8 @@ func (s *WorkSheet) parse() error {
|
||||
case RecTypeHLink:
|
||||
loc := &shRef8{}
|
||||
binary.Read(bb, binary.LittleEndian, loc)
|
||||
loc.FirstCol &= 0x00FF // spec doesn't say what to do when MUST is disregarded...
|
||||
loc.LastCol &= 0x00FF
|
||||
var x uint64
|
||||
binary.Read(bb, binary.LittleEndian, &x) // skip and discard classid
|
||||
binary.Read(bb, binary.LittleEndian, &x)
|
||||
@ -301,10 +336,18 @@ func (s *WorkSheet) parse() error {
|
||||
binary.Read(bb, binary.LittleEndian, us)
|
||||
str = string(utf16.Decode(us))
|
||||
}
|
||||
//log.Printf("hyperlink spec: %+v = %s", loc, str)
|
||||
if loc.FirstCol > maxCol {
|
||||
//log.Println("invalid hyperlink column")
|
||||
continue
|
||||
}
|
||||
if uint32(loc.FirstRow) > maxRow {
|
||||
//log.Println("invalid hyperlink row")
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO: apply merge cell rules
|
||||
s.placeValue(int(loc.FirstRow), int(loc.FirstCol), str)
|
||||
log.Printf("hyperlink spec: %+v = %s", loc, str)
|
||||
|
||||
case RecTypeMergeCells:
|
||||
var cmcs uint16
|
||||
@ -317,6 +360,9 @@ func (s *WorkSheet) parse() error {
|
||||
// log.Printf(" %d: %+v", j, mc)
|
||||
// }
|
||||
|
||||
case RecTypeContinue:
|
||||
// the only situation so far is when used in RecTypeString above
|
||||
|
||||
default:
|
||||
//log.Println("worksheet", r.RecType, r.RecSize)
|
||||
|
||||
|
@ -2,6 +2,7 @@ package xls
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"unicode/utf16"
|
||||
@ -173,7 +174,7 @@ func parseSST(recs []*rec) ([]string, error) {
|
||||
current[j] = uint16(binary.LittleEndian.Uint16(buf[:2]))
|
||||
buf = buf[2:]
|
||||
if len(buf) == 1 {
|
||||
panic("off by one")
|
||||
return nil, errors.New("xls: off by one")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user