1
0
mirror of https://github.com/pbnjay/grate.git synced 2024-12-12 13:35:18 +02:00

refactor sheets and formatting so we can use for type detection

This commit is contained in:
Jeremy Jay 2021-02-21 23:25:18 -05:00
parent 167574603f
commit 35c8ec73bc
12 changed files with 686 additions and 359 deletions

281
commonxl/cell.go Normal file
View File

@ -0,0 +1,281 @@
package commonxl
import (
"fmt"
"math"
"net/url"
"strconv"
"time"
"unicode/utf16"
)
// CellType annotates the type of data extracted in the cell.
type CellType uint16
// CellType annotations for various cell value types.
const (
BlankCell CellType = iota
IntegerCell
FloatCell
StringCell
BooleanCell
DateCell
HyperlinkStringCell // internal type to separate URLs
StaticCell // placeholder, internal use only
)
// Cell represents a single cell value.
type Cell []interface{}
// internally, it is a slice sized 2 or 3
// [Value, CellType] or [Value, CellType, FormatNumber]
// where FormatNumber is a uint16 if not 0
// Value returns the contents as a generic interface{}.
func (c Cell) Value() interface{} {
if len(c) == 0 {
return ""
}
return c[0]
}
// SetURL adds a URL hyperlink to the cell.
func (c *Cell) SetURL(link string) {
(*c)[1] = HyperlinkStringCell
if len(*c) == 2 {
*c = append(*c, uint16(0), link)
} else { // len = 3 already
*c = append(*c, link)
}
}
// URL returns the parsed URL when a cell contains a hyperlink.
func (c Cell) URL() (*url.URL, bool) {
if c.Type() == HyperlinkStringCell && len(c) >= 4 {
u, err := url.Parse(c[3].(string))
return u, err == nil
}
return nil, false
}
// Type returns the CellType of the value.
func (c Cell) Type() CellType {
if len(c) < 2 {
return BlankCell
}
return c[1].(CellType)
}
// FormatNo returns the NumberFormat used for display.
func (c Cell) FormatNo() uint16 {
if len(c) == 3 {
return c[2].(uint16)
}
return 0
}
///////
var boolStrings = map[string]bool{
"yes": true, "true": true, "t": true, "y": true, "1": true, "on": true,
"no": false, "false": false, "f": false, "n": false, "0": false, "off": false,
"YES": true, "TRUE": true, "T": true, "Y": true, "1.0": true, "ON": true,
"NO": false, "FALSE": false, "F": false, "N": false, "0.0": false, "OFF": false,
}
// NewCellWithType creates a new cell value with the given type, coercing as necessary.
func NewCellWithType(value interface{}, t CellType) Cell {
c := NewCell(value)
if c[1] == t {
// fast path if it was already typed correctly
return c
}
if c[1] == BooleanCell {
if t == IntegerCell {
if c[0].(bool) {
c[0] = int64(1)
} else {
c[0] = int64(0)
}
c[1] = IntegerCell
} else if t == FloatCell {
if c[0].(bool) {
c[0] = float64(1.0)
} else {
c[0] = float64(0.0)
}
c[1] = FloatCell
} else if t == StringCell {
if c[0].(bool) {
c[0] = "TRUE"
} else {
c[0] = "FALSE"
}
c[1] = FloatCell
}
}
if c[1] == FloatCell {
if t == IntegerCell {
c[0] = int64(c[0].(float64))
c[1] = IntegerCell
} else if t == BooleanCell {
c[0] = c[0].(float64) != 0.0
c[1] = BooleanCell
}
}
if c[1] == IntegerCell {
if t == FloatCell {
c[0] = float64(c[0].(int64))
c[1] = FloatCell
} else if t == BooleanCell {
c[0] = c[0].(int64) != 0
c[1] = BooleanCell
}
}
if c[1] == StringCell {
if t == IntegerCell {
x, _ := strconv.ParseInt(c[0].(string), 10, 64)
c[0] = x
c[1] = IntegerCell
} else if t == FloatCell {
x, _ := strconv.ParseFloat(c[0].(string), 64)
c[0] = x
c[1] = FloatCell
} else if t == BooleanCell {
c[0] = boolStrings[c[0].(string)]
c[1] = BooleanCell
}
}
if t == StringCell {
c[0] = fmt.Sprint(c[0])
c[1] = StringCell
}
if t == DateCell {
/// DO THE MAGIC CONVERSION HERE
}
return c
}
// NewCell creates a new cell value from any builtin type.
func NewCell(value interface{}) Cell {
c := make([]interface{}, 2)
switch v := value.(type) {
case bool:
c[0] = v
c[1] = BooleanCell
case int:
c[0] = int64(v)
c[1] = IntegerCell
case int8:
c[0] = int64(v)
c[1] = IntegerCell
case int16:
c[0] = int64(v)
c[1] = IntegerCell
case int32:
c[0] = int64(v)
c[1] = IntegerCell
case int64:
c[0] = int64(v)
c[1] = IntegerCell
case uint8:
c[0] = int64(v)
c[1] = IntegerCell
case uint16:
c[0] = int64(v)
c[1] = IntegerCell
case uint32:
c[0] = int64(v)
c[1] = IntegerCell
case uint:
if v > math.MaxInt64 {
c[0] = float64(v)
c[1] = FloatCell
} else {
c[0] = int64(v)
c[1] = IntegerCell
}
case uint64:
if v > math.MaxInt64 {
c[0] = float64(v)
c[1] = FloatCell
} else {
c[0] = int64(v)
c[1] = IntegerCell
}
case float32:
c[0] = float64(v)
c[1] = FloatCell
case float64:
c[0] = float64(v)
c[1] = FloatCell
case string:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = v
c[1] = StringCell
}
case []byte:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = string(v)
c[1] = StringCell
}
case []uint16:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = string(utf16.Decode(v))
c[1] = StringCell
}
case []rune:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = string(v)
c[1] = StringCell
}
case time.Time:
c[0] = v
c[1] = DateCell
case fmt.Stringer:
s := v.String()
if len(s) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = s
c[1] = StringCell
}
default:
panic("grate: data type not handled")
}
return Cell(c)
}
// SetFormatNumber changes the number format stored with the cell.
func (c *Cell) SetFormatNumber(f uint16) {
if f == 0 {
*c = (*c)[:2]
return
}
if len(*c) == 2 {
*c = append(*c, f)
} else {
(*c)[2] = f
}
}

View File

@ -59,6 +59,8 @@ func timeFmtFunc(f string) FmtFunc {
}
}
// same as above but replaces "AM" and "PM" with chinese translations.
// TODO: implement others
func cnTimeFmtFunc(f string) FmtFunc {
return func(x *Formatter, v interface{}) string {
t, ok := v.(time.Time)
@ -74,12 +76,3 @@ func cnTimeFmtFunc(f string) FmtFunc {
return strings.Replace(s, `PM`, `下午`, 1)
}
}
// 0x0001 = date 0b0010 = time 0b0011 = date+time
var builtInDateFormats = map[uint16]byte{
14: 1, 15: 1, 16: 1, 17: 1, 18: 2, 19: 2, 20: 2, 21: 2, 22: 3,
45: 2, 46: 2, 47: 2, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 2,
33: 2, 34: 2, 35: 2, 36: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1,
55: 2, 56: 2, 57: 1, 58: 1, 71: 1, 72: 1, 73: 1, 74: 1, 75: 2,
76: 2, 77: 3, 78: 2, 79: 2, 80: 2, 81: 1,
}

View File

@ -124,6 +124,7 @@ func convertToFloat64(v interface{}) (float64, bool) {
}
}
// replaces a zero with a dash
func zeroDashFunc(ff FmtFunc) FmtFunc {
return func(x *Formatter, v interface{}) string {
fval, ok := convertToFloat64(v)
@ -152,7 +153,8 @@ func fracFmtFunc(n int) FmtFunc {
}
}
// handle (up to) all four format cases
// handle (up to) all four format cases:
// positive;negative;zero;other
func switchFmtFunc(pos FmtFunc, others ...FmtFunc) FmtFunc {
stringFF := identFunc
zeroFF := pos

View File

@ -97,7 +97,10 @@ func TestDateFormats(t *testing.T) {
fx := &Formatter{}
for _, t := range testDates {
for fid := range builtInDateFormats {
for fid, ctype := range builtInFormatTypes {
if ctype != DateCell {
continue
}
ff, _ := goFormatters[fid]
// mainly testing these don't crash...
log.Println(ff(fx, t))
@ -105,7 +108,7 @@ func TestDateFormats(t *testing.T) {
}
}
func TestBoolFormats(t *testing.T) {
ff := makeFormatter(`"yes";"yes";"no"`)
ff, _ := makeFormatter(`"yes";"yes";"no"`)
if "no" != ff(nil, false) {
t.Fatal(`false should be "no"`)

View File

@ -9,8 +9,9 @@ import (
// Formatter contains formatting methods common to Excel spreadsheets.
type Formatter struct {
flags uint64
customCodes map[uint16]FmtFunc
flags uint64
customCodes map[uint16]FmtFunc
customCodeTypes map[uint16]CellType
}
const (
@ -31,6 +32,7 @@ func (x *Formatter) Mode1904(enabled bool) {
func (x *Formatter) Add(fmtID uint16, formatCode string) error {
if x.customCodes == nil {
x.customCodes = make(map[uint16]FmtFunc)
x.customCodeTypes = make(map[uint16]CellType)
}
if strings.ToLower(formatCode) == "general" {
x.customCodes[fmtID] = goFormatters[0]
@ -46,10 +48,21 @@ func (x *Formatter) Add(fmtID uint16, formatCode string) error {
return errors.New("grate/commonxl: cannot replace existing number formats")
}
x.customCodes[fmtID] = makeFormatter(formatCode)
x.customCodes[fmtID], x.customCodeTypes[fmtID] = makeFormatter(formatCode)
return nil
}
func (x *Formatter) getCellType(fmtID uint16) (CellType, bool) {
if ct, ok := builtInFormatTypes[fmtID]; ok {
return ct, true
}
if x.customCodeTypes != nil {
ct, ok := x.customCodeTypes[fmtID]
return ct, ok
}
return 0, false
}
var (
minsMatch = regexp.MustCompile("h.*m.*s")
nonEsc = regexp.MustCompile(`([^"]|^)"`)
@ -60,18 +73,18 @@ var (
formatMatchTextLiteral = regexp.MustCompile(`"[^"]*"`)
)
func makeFormatter(s string) FmtFunc {
func makeFormatter(s string) (FmtFunc, CellType) {
//log.Printf("makeFormatter('%s')", s)
// remove any coloring marks
s = formatMatchBrackets.ReplaceAllString(s, "")
if strings.Contains(s, ";") {
parts := strings.Split(s, ";")
posFF := makeFormatter(parts[0])
posFF, ctypePos := makeFormatter(parts[0])
rem := make([]FmtFunc, len(parts)-1)
for i, ps := range parts[1:] {
rem[i] = makeFormatter(ps)
rem[i], _ = makeFormatter(ps)
}
return switchFmtFunc(posFF, rem...)
return switchFmtFunc(posFF, rem...), ctypePos
}
// escaped characters, and quoted text
@ -111,10 +124,11 @@ func makeFormatter(s string) FmtFunc {
s = fixEsc.ReplaceAllString(s, `$1`)
//log.Printf(" made time formatter '%s'", s)
return timeFmtFunc(s)
return timeFmtFunc(s), DateCell
}
var ff FmtFunc
var ctype CellType
if strings.ContainsAny(s, ".Ee") {
verb := "f"
if strings.ContainsAny(s, "Ee") {
@ -132,6 +146,7 @@ func makeFormatter(s string) FmtFunc {
sf := fmt.Sprintf("%%%d.%d%s", i3-i1, i3-i2, verb)
//log.Printf(" made float formatter '%s'", sf)
ff = sprintfFunc(sf, mul)
ctype = FloatCell
} else {
s2 := strings.ReplaceAll(s, ",", "")
i1 := strings.IndexAny(s2, "0")
@ -146,6 +161,7 @@ func makeFormatter(s string) FmtFunc {
}
//log.Printf(" made int formatter '%s'", sf)
ff = sprintfFunc(sf, mul)
ctype = IntegerCell
}
if strings.Contains(s, ",") {
@ -162,10 +178,10 @@ func makeFormatter(s string) FmtFunc {
}
if len(prepost) == 1 {
if prepost[0] == "@" {
return identFunc
return identFunc, StringCell
}
//log.Printf(" added static ('%s')", prepost[0])
return staticFmtFunc(prepost[0])
return staticFmtFunc(prepost[0]), StringCell
}
if len(prepost[0]) > 0 || len(prepost[1]) > 0 {
prepost[1] = nonEsc.ReplaceAllString(prepost[1], `$1`)
@ -176,7 +192,7 @@ func makeFormatter(s string) FmtFunc {
//log.Printf(" added surround ('%s' ... '%s')", prepost[0], prepost[1])
}
return ff
return ff, ctype
}
// Get the number format func to use for formatting values,
@ -288,3 +304,78 @@ var builtInFormats = map[uint16]string{
80: `mm:ss.0`, // `นน:ทท.0`,
81: `d/m/bb`, // `d/m/bb`,
}
// builtInFormatTypes are the underlying datatypes for built-in number formats in XLS/XLSX.
var builtInFormatTypes = map[uint16]CellType{
// 0 has no defined type
1: IntegerCell,
2: FloatCell,
3: IntegerCell,
4: FloatCell,
9: FloatCell,
10: FloatCell,
11: FloatCell,
12: FloatCell,
13: FloatCell,
14: DateCell,
15: DateCell,
16: DateCell,
17: DateCell,
18: DateCell,
19: DateCell,
20: DateCell,
21: DateCell,
22: DateCell,
37: IntegerCell,
38: IntegerCell,
39: FloatCell,
40: FloatCell,
41: IntegerCell,
42: IntegerCell,
43: FloatCell,
44: FloatCell,
45: DateCell, // Durations?
46: DateCell,
47: DateCell,
48: FloatCell,
49: StringCell,
27: DateCell,
28: DateCell,
29: DateCell,
30: DateCell,
31: DateCell,
32: DateCell,
33: DateCell,
34: DateCell,
35: DateCell,
36: DateCell,
50: DateCell,
51: DateCell,
52: DateCell,
53: DateCell,
54: DateCell,
55: DateCell,
56: DateCell,
57: DateCell,
58: DateCell,
59: IntegerCell,
60: FloatCell,
61: IntegerCell,
62: FloatCell,
67: FloatCell,
68: FloatCell,
69: FloatCell,
70: FloatCell,
71: DateCell,
72: DateCell,
73: DateCell,
74: DateCell,
75: DateCell,
76: DateCell,
77: DateCell,
78: DateCell,
79: DateCell,
80: DateCell,
81: DateCell,
}

171
commonxl/sheet.go Normal file
View File

@ -0,0 +1,171 @@
package commonxl
import (
"fmt"
"log"
"time"
)
// Sheet holds raw and rendered values for a spreadsheet.
type Sheet struct {
Formatter *Formatter
NumRows int
NumCols int
Rows [][]Cell
CurRow int
}
// Resize the sheet for the number of rows and cols given.
// Newly added cells default to blank.
func (s *Sheet) Resize(rows, cols int) {
// some sheets are off by one
rows++
cols++
if rows <= 0 {
rows = 1
}
if cols <= 0 {
cols = 1
}
s.CurRow = 0
s.NumRows = rows
s.NumCols = cols
for rows >= len(s.Rows) {
s.Rows = append(s.Rows, make([]Cell, cols))
}
for i := 0; len(s.Rows[i]) < cols; i++ {
r2 := make([]Cell, cols-len(s.Rows[i]))
s.Rows[i] = append(s.Rows[i], r2...)
}
}
// Put the value at the cell location given.
func (s *Sheet) Put(row, col int, value interface{}, fmtNum uint16) {
if row >= s.NumRows || col >= s.NumCols {
log.Printf("grate: cell out of bounds row %d>=%d, col %d>=%d",
row, s.NumRows, col, s.NumCols)
return
}
ct, ok := s.Formatter.getCellType(fmtNum)
if !ok || fmtNum == 0 {
s.Rows[row][col] = NewCell(value)
} else {
s.Rows[row][col] = NewCellWithType(value, ct)
}
s.Rows[row][col].SetFormatNumber(fmtNum)
}
// Set changes the value in an existing cell location.
// NB Currently only used for populating string results for formulas.
func (s *Sheet) Set(row, col int, value interface{}) {
if row > s.NumRows || col > s.NumCols {
log.Println("grate: cell out of bounds")
return
}
s.Rows[row][col][0] = value
s.Rows[row][col][1] = StringCell
}
// SetURL adds a hyperlink to an existing cell location.
func (s *Sheet) SetURL(row, col int, link string) {
if row > s.NumRows || col > s.NumCols {
log.Println("grate: cell out of bounds")
return
}
s.Rows[row][col].SetURL(link)
}
// Next advances to the next record of content.
// It MUST be called prior to any Scan().
func (s *Sheet) Next() bool {
if (s.CurRow + 1) >= len(s.Rows) {
return false
}
s.CurRow++
return true
}
// Strings extracts values from the current record into a list of strings.
func (s *Sheet) Strings() []string {
res := make([]string, s.NumCols)
for i, cell := range s.Rows[s.CurRow] {
if cell.Type() == BlankCell {
res[i] = ""
continue
}
val := cell.Value()
fs, ok := s.Formatter.Apply(cell.FormatNo(), val)
if !ok {
fs = fmt.Sprint(val)
}
res[i] = fs
}
return res
}
// Scan extracts values from the current record into the provided arguments
// Arguments must be pointers to one of 5 supported types:
// bool, int64, float64, string, or time.Time
// If invalid, returns ErrInvalidScanType
func (s *Sheet) Scan(args ...interface{}) error {
row := s.Rows[s.CurRow]
for i, a := range args {
val := row[i].Value()
switch v := a.(type) {
case bool, int64, float64, string, time.Time:
return fmt.Errorf("scan destinations must be pointer (arg %d is not)", i)
case *bool:
if x, ok := val.(bool); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *bool", i, val)
}
case *int64:
if x, ok := val.(int64); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *int64", i, val)
}
case *float64:
if x, ok := val.(float64); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *float64", i, val)
}
case *string:
if x, ok := val.(string); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *string", i, val)
}
case *time.Time:
if x, ok := val.(time.Time); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *time.Time", i, val)
}
default:
return fmt.Errorf("scan destination for arg %d is not supported (%T)", i, a)
}
}
return nil
}
// IsEmpty returns true if there are no data values.
func (s *Sheet) IsEmpty() bool {
return (s.NumCols <= 1 && s.NumRows <= 1)
}
// Err returns the last error that occured.
func (s *Sheet) Err() error {
return nil
}

View File

@ -31,7 +31,7 @@ type Collection interface {
// Scan extracts values from the current record into the provided arguments
// Arguments must be pointers to one of 5 supported types:
// bool, int, float64, string, or time.Time
// bool, int64, float64, string, or time.Time
// If invalid, returns ErrInvalidScanType
Scan(args ...interface{}) error
@ -82,3 +82,15 @@ func Register(name string, priority int, opener OpenFunc) error {
})
return nil
}
const (
// ContinueColumnMerged marks a continuation column within a merged cell.
ContinueColumnMerged = "→"
// EndColumnMerged marks the last column of a merged cell.
EndColumnMerged = "⇥"
// ContinueRowMerged marks a continuation row within a merged cell.
ContinueRowMerged = "↓"
// EndRowMerged marks the last row of a merged cell.
EndRowMerged = "⤓"
)

View File

@ -3,13 +3,12 @@ package xls
import (
"encoding/binary"
"errors"
"fmt"
"log"
"math"
"time"
"unicode/utf16"
"github.com/pbnjay/grate"
"github.com/pbnjay/grate/commonxl"
)
// List (visible) sheet names from the workbook.
@ -39,94 +38,24 @@ func (b *WorkBook) Get(sheetName string) (grate.Collection, error) {
for _, s := range b.sheets {
if s.Name == sheetName {
ss := b.pos2substream[int64(s.Position)]
ws := &WorkSheet{
b: b, s: s, ss: ss,
iterRow: -1,
}
return ws, ws.parse()
return b.parseSheet(s, ss)
}
}
return nil, errors.New("xls: sheet not found")
}
// WorkSheet holds various metadata about a sheet in a Workbook.
type WorkSheet struct {
b *WorkBook
s *boundSheet
ss int
err error
minRow int
maxRow int // maximum valid row index (0xFFFF)
minCol int
maxCol int // maximum valid column index (0xFF)
rows []*row
empty bool
iterRow int
iterMC int
}
type staticCellType rune
const (
staticBlank staticCellType = 0
// marks a continuation column within a merged cell.
continueColumnMerged staticCellType = '→'
// marks the last column of a merged cell.
endColumnMerged staticCellType = '⇥'
// marks a continuation row within a merged cell.
continueRowMerged staticCellType = '↓'
// marks the last row of a merged cell.
endRowMerged staticCellType = '⤓'
)
func (s staticCellType) String() string {
if s == 0 {
return ""
func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) {
res := &commonxl.Sheet{
Formatter: &b.nfmt,
}
return string([]rune{rune(s)})
}
var minRow, maxRow uint32
var minCol, maxCol uint16
type row struct {
// each value must be one of: int, float64, string, or time.Time
cols []interface{}
}
func (s *WorkSheet) makeCells() {
// ensure we always have a complete matrix
for len(s.rows) <= s.maxRow {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
}
func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
if colIndex > s.maxCol || rowIndex > s.maxRow {
// invalid
return
}
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
s.rows[rowIndex].cols[colIndex] = val
}
func (s *WorkSheet) IsEmpty() bool {
return s.empty
}
func (s *WorkSheet) parse() error {
// temporary string buffer
us := make([]uint16, 8224)
inSubstream := 0
for idx, r := range s.b.substreams[s.ss] {
for idx, r := range b.substreams[ss] {
if inSubstream > 0 {
if r.RecType == RecTypeEOF {
inSubstream--
@ -145,15 +74,15 @@ func (s *WorkSheet) parse() error {
case RecTypeWsBool:
if (r.Data[1] & 0x10) != 0 {
// it's a dialog
return nil
return nil, nil
}
case RecTypeDimensions:
// max = 0-based index of the row AFTER the last valid index
minRow := binary.LittleEndian.Uint32(r.Data[:4])
maxRow := binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000
minCol := binary.LittleEndian.Uint16(r.Data[8:10])
maxCol := binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100
minRow = binary.LittleEndian.Uint32(r.Data[:4])
maxRow = binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000
minCol = binary.LittleEndian.Uint16(r.Data[8:10])
maxCol = binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100
if grate.Debug {
log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)",
minCol, minRow, maxCol, maxRow)
@ -164,21 +93,15 @@ func (s *WorkSheet) parse() error {
if minCol > 0x00FF || maxCol > 0x0100 {
log.Println("invalid dimensions")
}
s.minRow = int(uint64(minRow) & 0x0FFFF)
s.maxRow = int(uint64(maxRow)&0x1FFFF) - 1 // translate to last valid index
s.minCol = int(uint64(minCol) & 0x000FF)
s.maxCol = int(uint64(maxCol)&0x001FF) - 1 // translate to last valid index
if (maxRow-minRow) == 0 || (maxCol-minCol) == 0 {
s.empty = true
}
// pre-allocate cells
s.makeCells()
res.Resize(int(maxRow), int(maxCol))
}
}
inSubstream = 0
var formulaRow, formulaCol uint16
for ridx, r := range s.b.substreams[s.ss] {
for ridx, r := range b.substreams[ss] {
if inSubstream > 0 {
if r.RecType == RecTypeEOF {
inSubstream--
@ -216,15 +139,11 @@ func (s *WorkSheet) parse() error {
if r.Data[6] == 1 {
bv = true
}
var rval interface{} = bv
var fno uint16
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
if ixfe < len(b.xfs) {
fno = b.xfs[ixfe]
}
if fval, ok := s.b.nfmt.Apply(fno, bv); ok {
rval = fval
}
s.placeValue(rowIndex, colIndex, rval)
res.Put(rowIndex, colIndex, bv, fno)
//log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv)
} else {
// it's an error, load the label
@ -232,7 +151,7 @@ func (s *WorkSheet) parse() error {
if !ok {
be = "<unknown error>"
}
s.placeValue(rowIndex, colIndex, be)
res.Put(rowIndex, colIndex, be, 0)
//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
}
@ -253,11 +172,10 @@ func (s *WorkSheet) parse() error {
rval = value.Float64()
}
var fno uint16
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
if ixfe < len(b.xfs) {
fno = b.xfs[ixfe]
}
rval, _ = s.b.nfmt.Apply(fno, rval)
s.placeValue(rowIndex, colIndex+i, rval)
res.Put(rowIndex, colIndex+i, rval, fno)
}
//log.Printf("mulrow spec: %+v", *mr)
@ -269,12 +187,10 @@ func (s *WorkSheet) parse() error {
value := math.Float64frombits(xnum)
var fno uint16
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
if ixfe < len(b.xfs) {
fno = b.xfs[ixfe]
}
rval, _ := s.b.nfmt.Apply(fno, value)
s.placeValue(rowIndex, colIndex, rval)
res.Put(rowIndex, colIndex, value, fno)
//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
case RecTypeRK:
@ -290,11 +206,10 @@ func (s *WorkSheet) parse() error {
rval = value.Float64()
}
var fno uint16
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
if ixfe < len(b.xfs) {
fno = b.xfs[ixfe]
}
rval, _ = s.b.nfmt.Apply(fno, rval)
s.placeValue(rowIndex, colIndex, rval)
res.Put(rowIndex, colIndex, rval, fno)
//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
case RecTypeFormula:
@ -302,32 +217,30 @@ func (s *WorkSheet) parse() error {
formulaCol = binary.LittleEndian.Uint16(r.Data[2:4])
ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
fdata := r.Data[6:]
var fno uint16
if ixfe < len(b.xfs) {
fno = b.xfs[ixfe]
}
if fdata[6] == 0xFF && r.Data[7] == 0xFF {
switch fdata[0] {
case 0:
// string in next record
// put placeholder now to record the numFmt
res.Put(int(formulaRow), int(formulaCol), "", fno)
case 1:
// boolean
bv := false
if fdata[2] != 0 {
bv = true
}
var rval interface{} = bv
var fno uint16
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
}
if fval, ok := s.b.nfmt.Apply(fno, bv); ok {
rval = fval
}
s.placeValue(int(formulaRow), int(formulaCol), rval)
res.Put(int(formulaRow), int(formulaCol), bv, fno)
case 2:
// error value
be, ok := berrLookup[fdata[2]]
if !ok {
be = "<unknown error>"
}
s.placeValue(int(formulaRow), int(formulaCol), be)
res.Put(int(formulaRow), int(formulaCol), be, 0)
case 3:
// blank string
default:
@ -336,12 +249,7 @@ func (s *WorkSheet) parse() error {
} else {
xnum := binary.LittleEndian.Uint64(r.Data[6:])
value := math.Float64frombits(xnum)
var fno uint16
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
}
rval, _ := s.b.nfmt.Apply(fno, value)
s.placeValue(int(formulaRow), int(formulaCol), rval)
res.Put(int(formulaRow), int(formulaCol), value, fno)
}
//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)
@ -370,11 +278,11 @@ func (s *WorkSheet) parse() error {
fstr = string(utf16.Decode(us))
}
if (ridx + 1) < len(s.b.substreams[s.ss]) {
if (ridx + 1) < len(b.substreams[ss]) {
ridx2 := ridx + 1
nrecs := len(s.b.substreams[s.ss])
nrecs := len(b.substreams[ss])
for ridx2 < nrecs {
r2 := s.b.substreams[s.ss][ridx2]
r2 := b.substreams[ss][ridx2]
if r2.RecType != RecTypeContinue {
break
}
@ -393,20 +301,22 @@ func (s *WorkSheet) parse() error {
ridx2++
}
}
// TODO: does formula record formatted dates as pre-computed strings?
s.placeValue(int(formulaRow), int(formulaCol), fstr)
res.Set(int(formulaRow), int(formulaCol), fstr)
case RecTypeLabelSst:
rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6])
ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:]))
if sstIndex > len(s.b.strings) {
return errors.New("xls: invalid sst index")
if sstIndex > len(b.strings) {
return nil, errors.New("xls: invalid sst index")
}
// FIXME: double check that ixfe doesn't modify output
if s.b.strings[sstIndex] != "" {
s.placeValue(rowIndex, colIndex, s.b.strings[sstIndex])
var fno uint16
if ixfe < len(b.xfs) {
fno = b.xfs[ixfe]
}
if b.strings[sstIndex] != "" {
res.Put(rowIndex, colIndex, b.strings[sstIndex], fno)
}
//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
@ -415,19 +325,19 @@ func (s *WorkSheet) parse() error {
lastRow := binary.LittleEndian.Uint16(r.Data[2:4])
firstCol := binary.LittleEndian.Uint16(r.Data[4:6])
lastCol := binary.LittleEndian.Uint16(r.Data[6:])
if int(firstCol) > s.maxCol {
if int(firstCol) > int(maxCol) {
//log.Println("invalid hyperlink column")
continue
}
if int(firstRow) > s.maxRow {
if int(firstRow) > int(maxRow) {
//log.Println("invalid hyperlink row")
continue
}
if lastRow == 0xFFFF { // placeholder value indicate "last"
lastRow = uint16(s.maxRow)
lastRow = uint16(maxRow) - 1
}
if lastCol == 0xFF { // placeholder value indicate "last"
lastCol = uint16(s.maxCol)
lastCol = uint16(maxCol) - 1
}
// decode the hyperlink datastructure and try to find the
@ -443,19 +353,19 @@ func (s *WorkSheet) parse() error {
for cn := int(firstCol); cn <= int(lastCol); cn++ {
if rn == int(firstRow) && cn == int(firstCol) {
// TODO: provide custom hooks for how to handle links in output
s.placeValue(rn, cn, displayText+" <"+linkText+">")
res.Put(rn, cn, displayText+" <"+linkText+">", 0)
} else if cn == int(firstCol) {
// first and last column MAY be the same
if rn == int(lastRow) {
s.placeValue(rn, cn, endRowMerged)
res.Put(rn, cn, grate.EndRowMerged, 0)
} else {
s.placeValue(rn, cn, continueRowMerged)
res.Put(rn, cn, grate.ContinueRowMerged, 0)
}
} else if cn == int(lastCol) {
// first and last column are NOT the same
s.placeValue(rn, cn, endColumnMerged)
res.Put(rn, cn, grate.EndColumnMerged, 0)
} else {
s.placeValue(rn, cn, continueColumnMerged)
res.Put(rn, cn, grate.ContinueColumnMerged, 0)
}
}
}
@ -482,10 +392,10 @@ func (s *WorkSheet) parse() error {
raw = raw[8:]
if lastRow == 0xFFFF { // placeholder value indicate "last"
lastRow = uint16(s.maxRow)
lastRow = uint16(maxRow) - 1
}
if lastCol == 0xFF { // placeholder value indicate "last"
lastCol = uint16(s.maxCol)
lastCol = uint16(maxCol) - 1
}
for rn := int(firstRow); rn <= int(lastRow); rn++ {
for cn := int(firstCol); cn <= int(lastCol); cn++ {
@ -494,15 +404,15 @@ func (s *WorkSheet) parse() error {
} else if cn == int(firstCol) {
// first and last column MAY be the same
if rn == int(lastRow) {
s.placeValue(rn, cn, endRowMerged)
res.Put(rn, cn, grate.EndRowMerged, 0)
} else {
s.placeValue(rn, cn, continueRowMerged)
res.Put(rn, cn, grate.ContinueRowMerged, 0)
}
} else if cn == int(lastCol) {
// first and last column are NOT the same
s.placeValue(rn, cn, endColumnMerged)
res.Put(rn, cn, grate.EndColumnMerged, 0)
} else {
s.placeValue(rn, cn, continueColumnMerged)
res.Put(rn, cn, grate.ContinueColumnMerged, 0)
}
}
}
@ -524,64 +434,7 @@ func (s *WorkSheet) parse() error {
*/
}
}
return nil
}
// Err returns the last error that occured.
func (s *WorkSheet) Err() error {
return s.err
}
// Next advances to the next row of content.
// It MUST be called prior to any Scan().
func (s *WorkSheet) Next() bool {
s.iterRow++
return s.iterRow < len(s.rows)
}
// Strings returns the contents of the row as string types.
func (s *WorkSheet) Strings() []string {
currow := s.rows[s.iterRow]
res := make([]string, len(currow.cols))
for i, col := range currow.cols {
if col == nil || col == "" {
continue
}
switch v := col.(type) {
case string:
res[i] = v
case fmt.Stringer:
res[i] = v.String()
default:
res[i] = fmt.Sprint(col)
}
}
return res
}
// Scan extracts values from the row into the provided arguments
// Arguments must be pointers to one of 5 supported types:
// bool, int, float64, string, or time.Time
func (s *WorkSheet) Scan(args ...interface{}) error {
currow := s.rows[s.iterRow]
for i, a := range args {
switch v := a.(type) {
case *bool:
*v = currow.cols[i].(bool)
case *int:
*v = currow.cols[i].(int)
case *float64:
*v = currow.cols[i].(float64)
case *string:
*v = currow.cols[i].(string)
case *time.Time:
*v = currow.cols[i].(time.Time)
default:
return grate.ErrInvalidScanType
}
}
return nil
return res, nil
}
var berrLookup = map[byte]string{

View File

@ -282,11 +282,13 @@ func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error {
if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD {
return errors.New("xls: unsupported biff version")
}
if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
// we only support the workbook or worksheet substreams
log.Println("xls: unsupported document type")
//break
}
/*
if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
// we only support the workbook or worksheet substreams
log.Println("xls: unsupported document type")
//break
}
*/
case RecTypeCodePage:
// BIFF8 is entirely UTF-16LE so this is actually ignored

View File

@ -3,13 +3,11 @@ package xlsx
import (
"encoding/xml"
"errors"
"fmt"
"io"
"log"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/pbnjay/grate"
"github.com/pbnjay/grate/commonxl"
@ -23,24 +21,15 @@ type Sheet struct {
err error
minRow int
maxRow int
minCol int
maxCol int
rows []*row
empty bool
iterRow int
wrapped *commonxl.Sheet
}
var errNotLoaded = errors.New("xlsx: sheet not loaded")
type row struct {
// each value must be one of: int, float64, string, or time.Time
cols []interface{}
}
func (s *Sheet) parseSheet() error {
s.wrapped = &commonxl.Sheet{
Formatter: &s.d.fmt,
}
linkmap := make(map[string]string)
base := filepath.Base(s.docname)
sub := strings.TrimSuffix(s.docname, base)
@ -68,7 +57,9 @@ func (s *Sheet) parseSheet() error {
currentCellType := BlankCellType
currentCell := ""
var numFormat commonxl.FmtFunc
var fno uint16
var maxCol, maxRow int
tok, err := dec.RawToken()
for ; err == nil; tok, err = dec.RawToken() {
switch v := tok.(type) {
@ -79,6 +70,7 @@ func (s *Sheet) parseSheet() error {
c, r := refToIndexes(currentCell)
if c >= 0 && r >= 0 {
var val interface{} = string(v)
switch currentCellType {
case BooleanCellType:
if v[0] == '1' {
@ -87,13 +79,12 @@ func (s *Sheet) parseSheet() error {
val = false
}
case DateCellType:
log.Println("CELL DATE", val, numFormat)
log.Println("CELL DATE", val, fno)
case NumberCellType:
fval, err := strconv.ParseFloat(string(v), 64)
if err == nil {
val = fval
}
val = numFormat(&s.d.fmt, fval)
//log.Println("CELL NUMBER", val, numFormat)
case SharedStringCellType:
//log.Println("CELL SHSTR", val, currentCellType, numFormat)
@ -106,9 +97,9 @@ func (s *Sheet) parseSheet() error {
case ErrorCellType, FormulaStringCellType, InlineStringCellType:
//log.Println("CELL ERR/FORM/INLINE", val, currentCellType)
default:
log.Println("CELL UNKNOWN", val, currentCellType, numFormat)
log.Println("CELL UNKNOWN", val, currentCellType, fno)
}
s.placeValue(r, c, val)
s.wrapped.Put(r, c, val, fno)
} else {
//log.Println("FAIL row/col: ", currentCell)
}
@ -117,20 +108,19 @@ func (s *Sheet) parseSheet() error {
case "dimension":
ax := getAttrs(v.Attr, "ref")
if ax[0] == "A1" {
maxCol, maxRow = 1, 1
// short-circuit empty sheet
s.minCol, s.minRow = 0, 0
s.maxCol, s.maxRow = 1, 1
s.empty = true
s.wrapped.Resize(1, 1)
continue
}
dims := strings.Split(ax[0], ":")
if len(dims) == 1 {
s.minCol, s.minRow = 0, 0
s.maxCol, s.maxRow = refToIndexes(dims[0])
maxCol, maxRow = refToIndexes(dims[0])
} else {
s.minCol, s.minRow = refToIndexes(dims[0])
s.maxCol, s.maxRow = refToIndexes(dims[1])
//minCol, minRow := refToIndexes(dims[0])
maxCol, maxRow = refToIndexes(dims[1])
}
s.wrapped.Resize(maxRow, maxCol)
//log.Println("DIMENSION:", s.minRow, s.minCol, ">", s.maxRow, s.maxCol)
case "row":
//currentRow = ax["r"] // unsigned int row index
@ -145,9 +135,9 @@ func (s *Sheet) parseSheet() error {
style := ax[2]
sid, _ := strconv.ParseInt(style, 10, 64)
if len(s.d.xfs) > int(sid) {
numFormat = s.d.xfs[sid] // unsigned integer lookup
fno = s.d.xfs[sid]
} else {
numFormat = s.d.xfs[0]
fno = 0
}
//log.Println("CELL", currentCell, sid, numFormat, currentCellType)
case "v":
@ -161,6 +151,12 @@ func (s *Sheet) parseSheet() error {
if len(dims) > 1 {
endCol, endRow = refToIndexes(dims[1])
}
if endRow > maxRow {
endRow = maxRow
}
if endCol > maxCol {
endCol = maxCol
}
for r := startRow; r <= endRow; r++ {
for c := startCol; c <= endCol; c++ {
if r == startRow && c == startCol {
@ -168,15 +164,15 @@ func (s *Sheet) parseSheet() error {
} else if c == startCol {
// first and last column MAY be the same
if r == endRow {
s.placeValue(r, c, endRowMerged)
s.wrapped.Put(r, c, grate.EndRowMerged, 0)
} else {
s.placeValue(r, c, continueRowMerged)
s.wrapped.Put(r, c, grate.ContinueRowMerged, 0)
}
} else if c == endCol {
// first and last column are NOT the same
s.placeValue(r, c, endColumnMerged)
s.wrapped.Put(r, c, grate.EndColumnMerged, 0)
} else {
s.placeValue(r, c, continueColumnMerged)
s.wrapped.Put(r, c, grate.ContinueColumnMerged, 0)
}
}
}
@ -185,12 +181,8 @@ func (s *Sheet) parseSheet() error {
ax := getAttrs(v.Attr, "ref", "id")
col, row := refToIndexes(ax[0])
link := linkmap[ax[1]]
if len(s.rows) > row && len(s.rows[row].cols) > col {
if sstr, ok := s.rows[row].cols[col].(string); ok {
link = sstr + " <" + link + ">"
}
}
s.placeValue(row, col, link)
s.wrapped.Put(row, col, link, 0)
s.wrapped.SetURL(row, col, link)
case "worksheet", "mergeCells", "hyperlinks":
// containers
@ -220,71 +212,3 @@ func (s *Sheet) parseSheet() error {
}
return err
}
func (s *Sheet) placeValue(rowIndex, colIndex int, val interface{}) {
if colIndex > s.maxCol || rowIndex > s.maxRow {
// invalid
return
}
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
s.empty = false
s.rows[rowIndex].cols[colIndex] = val
}
// Next advances to the next row of content.
// It MUST be called prior to any Scan().
func (s *Sheet) Next() bool {
s.iterRow++
return s.iterRow < len(s.rows)
}
func (s *Sheet) Strings() []string {
currow := s.rows[s.iterRow]
res := make([]string, len(currow.cols))
for i, col := range currow.cols {
if col == nil || col == "" {
continue
}
res[i] = fmt.Sprint(col)
}
return res
}
// Scan extracts values from the row into the provided arguments
// Arguments must be pointers to one of 5 supported types:
// bool, int, float64, string, or time.Time
func (s *Sheet) Scan(args ...interface{}) error {
currow := s.rows[s.iterRow]
for i, a := range args {
switch v := a.(type) {
case *bool:
*v = currow.cols[i].(bool)
case *int:
*v = currow.cols[i].(int)
case *float64:
*v = currow.cols[i].(float64)
case *string:
*v = currow.cols[i].(string)
case *time.Time:
*v = currow.cols[i].(time.Time)
default:
return grate.ErrInvalidScanType
}
}
return nil
}
func (s *Sheet) IsEmpty() bool {
return s.empty
}
// Err returns the last error that occured.
func (s *Sheet) Err() error {
return s.err
}

View File

@ -9,7 +9,6 @@ import (
"strconv"
"github.com/pbnjay/grate"
"github.com/pbnjay/grate/commonxl"
)
func (d *Document) parseRels(dec *xml.Decoder, basedir string) error {
@ -119,7 +118,7 @@ func (d *Document) parseStyles(dec *xml.Decoder) error {
section = 2
ax := getAttrs(v.Attr, "count")
n, _ := strconv.ParseInt(ax[0], 10, 64)
d.xfs = make([]commonxl.FmtFunc, 0, n)
d.xfs = make([]uint16, 0, n)
case "xf":
ax := getAttrs(v.Attr, "numFmtId", "applyNumberFormat", "xfId")
@ -148,11 +147,7 @@ func (d *Document) parseStyles(dec *xml.Decoder) error {
}
nfid, _ := strconv.ParseInt(numFmtID, 10, 16)
thisXF, ok := d.fmt.Get(uint16(nfid))
if !ok {
panic("numformat unknown")
}
d.xfs = append(d.xfs, thisXF)
d.xfs = append(d.xfs, uint16(nfid))
} else {
panic("wheres is this xf??")
}

View File

@ -27,7 +27,7 @@ type Document struct {
rels map[string]map[string]string
sheets []*Sheet
strings []string
xfs []commonxl.FmtFunc
xfs []uint16
fmt commonxl.Formatter
}
@ -163,7 +163,7 @@ func (d *Document) Get(sheetName string) (grate.Collection, error) {
if s.err == errNotLoaded {
s.err = s.parseSheet()
}
return s, s.err
return s.wrapped, s.err
}
}
return nil, errors.New("xlsx: sheet not found")