From f794a5ef9b425db6d350c38c4814d3b7f075401c Mon Sep 17 00:00:00 2001 From: Jeremy Jay Date: Sun, 7 Feb 2021 23:52:37 -0500 Subject: [PATCH] allow seeking in SliceReader --- xls/cfb/cfb.go | 4 +-- xls/cfb/interface.go | 4 +-- xls/cfb/simple_test.go | 59 ++++++++++++++++++++++++++++++++++++ xls/cfb/slicereader.go | 69 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 4 deletions(-) diff --git a/xls/cfb/cfb.go b/xls/cfb/cfb.go index f9daf44..ab5bafe 100644 --- a/xls/cfb/cfb.go +++ b/xls/cfb/cfb.go @@ -265,7 +265,7 @@ func (d *doc) buildDirs(br *bytes.Reader) error { return nil } -func (d *doc) getStreamReader(sid uint32, size uint64) io.Reader { +func (d *doc) getStreamReader(sid uint32, size uint64) io.ReadSeeker { // NB streamData is a slice of slices of the raw data, so this is the // only allocation - for the (much smaller) list of sector slices streamData := make([][]byte, 1+(size>>d.header.SectorShift)) @@ -295,7 +295,7 @@ func (d *doc) getStreamReader(sid uint32, size uint64) io.Reader { return &SliceReader{Data: streamData} } -func (d *doc) getMiniStreamReader(sid uint32, size uint64) io.Reader { +func (d *doc) getMiniStreamReader(sid uint32, size uint64) io.ReadSeeker { // TODO: move into a separate cache so we don't recalculate it each time fatStreamData := make([][]byte, 1+(d.ministreamsize>>d.header.SectorShift)) diff --git a/xls/cfb/interface.go b/xls/cfb/interface.go index caa04d8..6fe87a9 100644 --- a/xls/cfb/interface.go +++ b/xls/cfb/interface.go @@ -12,7 +12,7 @@ type Document interface { List() ([]string, error) // Open the named stream contained in the document. - Open(name string) (io.Reader, error) + Open(name string) (io.ReadSeeker, error) } // Open a Compound File Binary Format document. @@ -41,7 +41,7 @@ func (d *doc) List() ([]string, error) { } // Open the named stream contained in the document. -func (d *doc) Open(name string) (io.Reader, error) { +func (d *doc) Open(name string) (io.ReadSeeker, error) { for _, e := range d.dir { if e.String() == name && e.ObjectType == typeStream { if e.StreamSize < uint64(d.header.MiniStreamCutoffSize) { diff --git a/xls/cfb/simple_test.go b/xls/cfb/simple_test.go index 3256e39..7635e24 100644 --- a/xls/cfb/simple_test.go +++ b/xls/cfb/simple_test.go @@ -1,6 +1,7 @@ package cfb import ( + "io" "io/ioutil" "log" "os" @@ -64,3 +65,61 @@ func TestHeader4(t *testing.T) { } log.Println(len(data)) } + +var testSlices = [][]byte{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34, 35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44, 45, 46, 47, 48, 49}, +} + +func TestSliceReader(t *testing.T) { + sr := &SliceReader{ + Data: testSlices, + } + var uno, old [1]byte + _, err := sr.Read(uno[:]) + for err == nil { + old[0] = uno[0] + _, err = sr.Read(uno[:]) + if err == nil && uno[0] != (old[0]+1) { + log.Printf("read data out of order new=%d, old=%d", old[0], uno[0]) + t.Fail() + } + } + sr.Seek(0, io.SeekStart) + _, err = sr.Read(uno[:]) + for err == nil { + old[0] = uno[0] + _, err = sr.Read(uno[:]) + if err == nil && uno[0] != (old[0]+1) { + log.Printf("read data out of order new=%d, old=%d", old[0], uno[0]) + t.Fail() + } + } + sr.Seek(10, io.SeekStart) + _, err = sr.Read(uno[:]) + if uno[0] != 10 { + log.Printf("unexpected element %d (expected %d)", uno[0], 10) + t.Fail() + } + sr.Seek(35, io.SeekStart) + _, err = sr.Read(uno[:]) + if uno[0] != 35 { + log.Printf("unexpected element %d (expected %d)", uno[0], 35) + t.Fail() + } + sr.Seek(7, io.SeekCurrent) + _, err = sr.Read(uno[:]) + if uno[0] != 43 { + log.Printf("unexpected element %d (expected %d)", uno[0], 43) + t.Fail() + } + sr.Seek(-9, io.SeekCurrent) + _, err = sr.Read(uno[:]) + if uno[0] != 35 { + log.Printf("unexpected element %d (expected %d)", uno[0], 35) + t.Fail() + } +} diff --git a/xls/cfb/slicereader.go b/xls/cfb/slicereader.go index 6f637e4..282ddf2 100644 --- a/xls/cfb/slicereader.go +++ b/xls/cfb/slicereader.go @@ -1,15 +1,20 @@ package cfb import ( + "errors" "io" ) +// SliceReader wraps a list of slices as a io.ReadSeeker that +// can transparently merge them into a single coherent stream. type SliceReader struct { + CSize []int64 Data [][]byte Index uint Offset uint } +// Read implements the io.Reader interface. func (s *SliceReader) Read(b []byte) (int, error) { if s.Index >= uint(len(s.Data)) { return 0, io.EOF @@ -26,3 +31,67 @@ func (s *SliceReader) Read(b []byte) (int, error) { return 0, io.EOF } + +var x io.Seeker + +// Seek implements the io.Seeker interface. +func (s *SliceReader) Seek(offset int64, whence int) (int64, error) { + if len(s.CSize) != len(s.Data) { + // calculate the cumulative block size cache + s.CSize = make([]int64, len(s.Data)) + sz := int64(0) + for i, d := range s.Data { + s.CSize[i] = sz + sz += int64(len(d)) + } + } + if s.Index >= uint(len(s.CSize)) { + s.Index = uint(len(s.CSize) - 1) + s.Offset = uint(len(s.Data[s.Index])) + } + // current offset in stream + trueOffset := int64(s.Offset) + s.CSize[int(s.Index)] + + switch whence { + case io.SeekStart: + if offset < 0 { + return -1, errors.New("xls: invalid seek offset") + } + s.Index = 0 + s.Offset = 0 + trueOffset = 0 + + case io.SeekEnd: + if offset > 0 { + return -1, errors.New("xls: invalid seek offset") + } + + s.Index = uint(len(s.Data) - 1) + s.Offset = uint(len(s.Data[s.Index])) + trueOffset = int64(s.Offset) + s.CSize[s.Index] + + default: + // current position already defined + } + + wantOffset := offset + trueOffset + for trueOffset != wantOffset { + loOffset := s.CSize[int(s.Index)] + hiOffset := s.CSize[int(s.Index)] + int64(len(s.Data[s.Index])) + if wantOffset > loOffset && wantOffset < hiOffset { + s.Offset = uint(wantOffset - loOffset) + return wantOffset, nil + } + + if trueOffset > wantOffset { + s.Index-- + s.Offset = 0 + trueOffset = s.CSize[int(s.Index)] + } else if trueOffset < wantOffset { + s.Index++ + s.Offset = 0 + trueOffset = s.CSize[int(s.Index)] + } + } + return wantOffset, nil +}