1
0
mirror of https://github.com/pbnjay/grate.git synced 2024-12-12 13:35:18 +02:00

refactor sheets and formatting so we can use for type detection

This commit is contained in:
Jeremy Jay 2021-02-21 23:25:18 -05:00
parent 167574603f
commit 35c8ec73bc
12 changed files with 686 additions and 359 deletions

281
commonxl/cell.go Normal file
View File

@ -0,0 +1,281 @@
package commonxl
import (
"fmt"
"math"
"net/url"
"strconv"
"time"
"unicode/utf16"
)
// CellType annotates the type of data extracted in the cell.
type CellType uint16
// CellType annotations for various cell value types.
const (
BlankCell CellType = iota
IntegerCell
FloatCell
StringCell
BooleanCell
DateCell
HyperlinkStringCell // internal type to separate URLs
StaticCell // placeholder, internal use only
)
// Cell represents a single cell value.
type Cell []interface{}
// internally, it is a slice sized 2 or 3
// [Value, CellType] or [Value, CellType, FormatNumber]
// where FormatNumber is a uint16 if not 0
// Value returns the contents as a generic interface{}.
func (c Cell) Value() interface{} {
if len(c) == 0 {
return ""
}
return c[0]
}
// SetURL adds a URL hyperlink to the cell.
func (c *Cell) SetURL(link string) {
(*c)[1] = HyperlinkStringCell
if len(*c) == 2 {
*c = append(*c, uint16(0), link)
} else { // len = 3 already
*c = append(*c, link)
}
}
// URL returns the parsed URL when a cell contains a hyperlink.
func (c Cell) URL() (*url.URL, bool) {
if c.Type() == HyperlinkStringCell && len(c) >= 4 {
u, err := url.Parse(c[3].(string))
return u, err == nil
}
return nil, false
}
// Type returns the CellType of the value.
func (c Cell) Type() CellType {
if len(c) < 2 {
return BlankCell
}
return c[1].(CellType)
}
// FormatNo returns the NumberFormat used for display.
func (c Cell) FormatNo() uint16 {
if len(c) == 3 {
return c[2].(uint16)
}
return 0
}
///////
var boolStrings = map[string]bool{
"yes": true, "true": true, "t": true, "y": true, "1": true, "on": true,
"no": false, "false": false, "f": false, "n": false, "0": false, "off": false,
"YES": true, "TRUE": true, "T": true, "Y": true, "1.0": true, "ON": true,
"NO": false, "FALSE": false, "F": false, "N": false, "0.0": false, "OFF": false,
}
// NewCellWithType creates a new cell value with the given type, coercing as necessary.
func NewCellWithType(value interface{}, t CellType) Cell {
c := NewCell(value)
if c[1] == t {
// fast path if it was already typed correctly
return c
}
if c[1] == BooleanCell {
if t == IntegerCell {
if c[0].(bool) {
c[0] = int64(1)
} else {
c[0] = int64(0)
}
c[1] = IntegerCell
} else if t == FloatCell {
if c[0].(bool) {
c[0] = float64(1.0)
} else {
c[0] = float64(0.0)
}
c[1] = FloatCell
} else if t == StringCell {
if c[0].(bool) {
c[0] = "TRUE"
} else {
c[0] = "FALSE"
}
c[1] = FloatCell
}
}
if c[1] == FloatCell {
if t == IntegerCell {
c[0] = int64(c[0].(float64))
c[1] = IntegerCell
} else if t == BooleanCell {
c[0] = c[0].(float64) != 0.0
c[1] = BooleanCell
}
}
if c[1] == IntegerCell {
if t == FloatCell {
c[0] = float64(c[0].(int64))
c[1] = FloatCell
} else if t == BooleanCell {
c[0] = c[0].(int64) != 0
c[1] = BooleanCell
}
}
if c[1] == StringCell {
if t == IntegerCell {
x, _ := strconv.ParseInt(c[0].(string), 10, 64)
c[0] = x
c[1] = IntegerCell
} else if t == FloatCell {
x, _ := strconv.ParseFloat(c[0].(string), 64)
c[0] = x
c[1] = FloatCell
} else if t == BooleanCell {
c[0] = boolStrings[c[0].(string)]
c[1] = BooleanCell
}
}
if t == StringCell {
c[0] = fmt.Sprint(c[0])
c[1] = StringCell
}
if t == DateCell {
/// DO THE MAGIC CONVERSION HERE
}
return c
}
// NewCell creates a new cell value from any builtin type.
func NewCell(value interface{}) Cell {
c := make([]interface{}, 2)
switch v := value.(type) {
case bool:
c[0] = v
c[1] = BooleanCell
case int:
c[0] = int64(v)
c[1] = IntegerCell
case int8:
c[0] = int64(v)
c[1] = IntegerCell
case int16:
c[0] = int64(v)
c[1] = IntegerCell
case int32:
c[0] = int64(v)
c[1] = IntegerCell
case int64:
c[0] = int64(v)
c[1] = IntegerCell
case uint8:
c[0] = int64(v)
c[1] = IntegerCell
case uint16:
c[0] = int64(v)
c[1] = IntegerCell
case uint32:
c[0] = int64(v)
c[1] = IntegerCell
case uint:
if v > math.MaxInt64 {
c[0] = float64(v)
c[1] = FloatCell
} else {
c[0] = int64(v)
c[1] = IntegerCell
}
case uint64:
if v > math.MaxInt64 {
c[0] = float64(v)
c[1] = FloatCell
} else {
c[0] = int64(v)
c[1] = IntegerCell
}
case float32:
c[0] = float64(v)
c[1] = FloatCell
case float64:
c[0] = float64(v)
c[1] = FloatCell
case string:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = v
c[1] = StringCell
}
case []byte:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = string(v)
c[1] = StringCell
}
case []uint16:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = string(utf16.Decode(v))
c[1] = StringCell
}
case []rune:
if len(v) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = string(v)
c[1] = StringCell
}
case time.Time:
c[0] = v
c[1] = DateCell
case fmt.Stringer:
s := v.String()
if len(s) == 0 {
c[0] = nil
c[1] = BlankCell
} else {
c[0] = s
c[1] = StringCell
}
default:
panic("grate: data type not handled")
}
return Cell(c)
}
// SetFormatNumber changes the number format stored with the cell.
func (c *Cell) SetFormatNumber(f uint16) {
if f == 0 {
*c = (*c)[:2]
return
}
if len(*c) == 2 {
*c = append(*c, f)
} else {
(*c)[2] = f
}
}

View File

@ -59,6 +59,8 @@ func timeFmtFunc(f string) FmtFunc {
} }
} }
// same as above but replaces "AM" and "PM" with chinese translations.
// TODO: implement others
func cnTimeFmtFunc(f string) FmtFunc { func cnTimeFmtFunc(f string) FmtFunc {
return func(x *Formatter, v interface{}) string { return func(x *Formatter, v interface{}) string {
t, ok := v.(time.Time) t, ok := v.(time.Time)
@ -74,12 +76,3 @@ func cnTimeFmtFunc(f string) FmtFunc {
return strings.Replace(s, `PM`, `下午`, 1) return strings.Replace(s, `PM`, `下午`, 1)
} }
} }
// 0x0001 = date 0b0010 = time 0b0011 = date+time
var builtInDateFormats = map[uint16]byte{
14: 1, 15: 1, 16: 1, 17: 1, 18: 2, 19: 2, 20: 2, 21: 2, 22: 3,
45: 2, 46: 2, 47: 2, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 2,
33: 2, 34: 2, 35: 2, 36: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1,
55: 2, 56: 2, 57: 1, 58: 1, 71: 1, 72: 1, 73: 1, 74: 1, 75: 2,
76: 2, 77: 3, 78: 2, 79: 2, 80: 2, 81: 1,
}

View File

@ -124,6 +124,7 @@ func convertToFloat64(v interface{}) (float64, bool) {
} }
} }
// replaces a zero with a dash
func zeroDashFunc(ff FmtFunc) FmtFunc { func zeroDashFunc(ff FmtFunc) FmtFunc {
return func(x *Formatter, v interface{}) string { return func(x *Formatter, v interface{}) string {
fval, ok := convertToFloat64(v) fval, ok := convertToFloat64(v)
@ -152,7 +153,8 @@ func fracFmtFunc(n int) FmtFunc {
} }
} }
// handle (up to) all four format cases // handle (up to) all four format cases:
// positive;negative;zero;other
func switchFmtFunc(pos FmtFunc, others ...FmtFunc) FmtFunc { func switchFmtFunc(pos FmtFunc, others ...FmtFunc) FmtFunc {
stringFF := identFunc stringFF := identFunc
zeroFF := pos zeroFF := pos

View File

@ -97,7 +97,10 @@ func TestDateFormats(t *testing.T) {
fx := &Formatter{} fx := &Formatter{}
for _, t := range testDates { for _, t := range testDates {
for fid := range builtInDateFormats { for fid, ctype := range builtInFormatTypes {
if ctype != DateCell {
continue
}
ff, _ := goFormatters[fid] ff, _ := goFormatters[fid]
// mainly testing these don't crash... // mainly testing these don't crash...
log.Println(ff(fx, t)) log.Println(ff(fx, t))
@ -105,7 +108,7 @@ func TestDateFormats(t *testing.T) {
} }
} }
func TestBoolFormats(t *testing.T) { func TestBoolFormats(t *testing.T) {
ff := makeFormatter(`"yes";"yes";"no"`) ff, _ := makeFormatter(`"yes";"yes";"no"`)
if "no" != ff(nil, false) { if "no" != ff(nil, false) {
t.Fatal(`false should be "no"`) t.Fatal(`false should be "no"`)

View File

@ -9,8 +9,9 @@ import (
// Formatter contains formatting methods common to Excel spreadsheets. // Formatter contains formatting methods common to Excel spreadsheets.
type Formatter struct { type Formatter struct {
flags uint64 flags uint64
customCodes map[uint16]FmtFunc customCodes map[uint16]FmtFunc
customCodeTypes map[uint16]CellType
} }
const ( const (
@ -31,6 +32,7 @@ func (x *Formatter) Mode1904(enabled bool) {
func (x *Formatter) Add(fmtID uint16, formatCode string) error { func (x *Formatter) Add(fmtID uint16, formatCode string) error {
if x.customCodes == nil { if x.customCodes == nil {
x.customCodes = make(map[uint16]FmtFunc) x.customCodes = make(map[uint16]FmtFunc)
x.customCodeTypes = make(map[uint16]CellType)
} }
if strings.ToLower(formatCode) == "general" { if strings.ToLower(formatCode) == "general" {
x.customCodes[fmtID] = goFormatters[0] x.customCodes[fmtID] = goFormatters[0]
@ -46,10 +48,21 @@ func (x *Formatter) Add(fmtID uint16, formatCode string) error {
return errors.New("grate/commonxl: cannot replace existing number formats") return errors.New("grate/commonxl: cannot replace existing number formats")
} }
x.customCodes[fmtID] = makeFormatter(formatCode) x.customCodes[fmtID], x.customCodeTypes[fmtID] = makeFormatter(formatCode)
return nil return nil
} }
func (x *Formatter) getCellType(fmtID uint16) (CellType, bool) {
if ct, ok := builtInFormatTypes[fmtID]; ok {
return ct, true
}
if x.customCodeTypes != nil {
ct, ok := x.customCodeTypes[fmtID]
return ct, ok
}
return 0, false
}
var ( var (
minsMatch = regexp.MustCompile("h.*m.*s") minsMatch = regexp.MustCompile("h.*m.*s")
nonEsc = regexp.MustCompile(`([^"]|^)"`) nonEsc = regexp.MustCompile(`([^"]|^)"`)
@ -60,18 +73,18 @@ var (
formatMatchTextLiteral = regexp.MustCompile(`"[^"]*"`) formatMatchTextLiteral = regexp.MustCompile(`"[^"]*"`)
) )
func makeFormatter(s string) FmtFunc { func makeFormatter(s string) (FmtFunc, CellType) {
//log.Printf("makeFormatter('%s')", s) //log.Printf("makeFormatter('%s')", s)
// remove any coloring marks // remove any coloring marks
s = formatMatchBrackets.ReplaceAllString(s, "") s = formatMatchBrackets.ReplaceAllString(s, "")
if strings.Contains(s, ";") { if strings.Contains(s, ";") {
parts := strings.Split(s, ";") parts := strings.Split(s, ";")
posFF := makeFormatter(parts[0]) posFF, ctypePos := makeFormatter(parts[0])
rem := make([]FmtFunc, len(parts)-1) rem := make([]FmtFunc, len(parts)-1)
for i, ps := range parts[1:] { for i, ps := range parts[1:] {
rem[i] = makeFormatter(ps) rem[i], _ = makeFormatter(ps)
} }
return switchFmtFunc(posFF, rem...) return switchFmtFunc(posFF, rem...), ctypePos
} }
// escaped characters, and quoted text // escaped characters, and quoted text
@ -111,10 +124,11 @@ func makeFormatter(s string) FmtFunc {
s = fixEsc.ReplaceAllString(s, `$1`) s = fixEsc.ReplaceAllString(s, `$1`)
//log.Printf(" made time formatter '%s'", s) //log.Printf(" made time formatter '%s'", s)
return timeFmtFunc(s) return timeFmtFunc(s), DateCell
} }
var ff FmtFunc var ff FmtFunc
var ctype CellType
if strings.ContainsAny(s, ".Ee") { if strings.ContainsAny(s, ".Ee") {
verb := "f" verb := "f"
if strings.ContainsAny(s, "Ee") { if strings.ContainsAny(s, "Ee") {
@ -132,6 +146,7 @@ func makeFormatter(s string) FmtFunc {
sf := fmt.Sprintf("%%%d.%d%s", i3-i1, i3-i2, verb) sf := fmt.Sprintf("%%%d.%d%s", i3-i1, i3-i2, verb)
//log.Printf(" made float formatter '%s'", sf) //log.Printf(" made float formatter '%s'", sf)
ff = sprintfFunc(sf, mul) ff = sprintfFunc(sf, mul)
ctype = FloatCell
} else { } else {
s2 := strings.ReplaceAll(s, ",", "") s2 := strings.ReplaceAll(s, ",", "")
i1 := strings.IndexAny(s2, "0") i1 := strings.IndexAny(s2, "0")
@ -146,6 +161,7 @@ func makeFormatter(s string) FmtFunc {
} }
//log.Printf(" made int formatter '%s'", sf) //log.Printf(" made int formatter '%s'", sf)
ff = sprintfFunc(sf, mul) ff = sprintfFunc(sf, mul)
ctype = IntegerCell
} }
if strings.Contains(s, ",") { if strings.Contains(s, ",") {
@ -162,10 +178,10 @@ func makeFormatter(s string) FmtFunc {
} }
if len(prepost) == 1 { if len(prepost) == 1 {
if prepost[0] == "@" { if prepost[0] == "@" {
return identFunc return identFunc, StringCell
} }
//log.Printf(" added static ('%s')", prepost[0]) //log.Printf(" added static ('%s')", prepost[0])
return staticFmtFunc(prepost[0]) return staticFmtFunc(prepost[0]), StringCell
} }
if len(prepost[0]) > 0 || len(prepost[1]) > 0 { if len(prepost[0]) > 0 || len(prepost[1]) > 0 {
prepost[1] = nonEsc.ReplaceAllString(prepost[1], `$1`) prepost[1] = nonEsc.ReplaceAllString(prepost[1], `$1`)
@ -176,7 +192,7 @@ func makeFormatter(s string) FmtFunc {
//log.Printf(" added surround ('%s' ... '%s')", prepost[0], prepost[1]) //log.Printf(" added surround ('%s' ... '%s')", prepost[0], prepost[1])
} }
return ff return ff, ctype
} }
// Get the number format func to use for formatting values, // Get the number format func to use for formatting values,
@ -288,3 +304,78 @@ var builtInFormats = map[uint16]string{
80: `mm:ss.0`, // `นน:ทท.0`, 80: `mm:ss.0`, // `นน:ทท.0`,
81: `d/m/bb`, // `d/m/bb`, 81: `d/m/bb`, // `d/m/bb`,
} }
// builtInFormatTypes are the underlying datatypes for built-in number formats in XLS/XLSX.
var builtInFormatTypes = map[uint16]CellType{
// 0 has no defined type
1: IntegerCell,
2: FloatCell,
3: IntegerCell,
4: FloatCell,
9: FloatCell,
10: FloatCell,
11: FloatCell,
12: FloatCell,
13: FloatCell,
14: DateCell,
15: DateCell,
16: DateCell,
17: DateCell,
18: DateCell,
19: DateCell,
20: DateCell,
21: DateCell,
22: DateCell,
37: IntegerCell,
38: IntegerCell,
39: FloatCell,
40: FloatCell,
41: IntegerCell,
42: IntegerCell,
43: FloatCell,
44: FloatCell,
45: DateCell, // Durations?
46: DateCell,
47: DateCell,
48: FloatCell,
49: StringCell,
27: DateCell,
28: DateCell,
29: DateCell,
30: DateCell,
31: DateCell,
32: DateCell,
33: DateCell,
34: DateCell,
35: DateCell,
36: DateCell,
50: DateCell,
51: DateCell,
52: DateCell,
53: DateCell,
54: DateCell,
55: DateCell,
56: DateCell,
57: DateCell,
58: DateCell,
59: IntegerCell,
60: FloatCell,
61: IntegerCell,
62: FloatCell,
67: FloatCell,
68: FloatCell,
69: FloatCell,
70: FloatCell,
71: DateCell,
72: DateCell,
73: DateCell,
74: DateCell,
75: DateCell,
76: DateCell,
77: DateCell,
78: DateCell,
79: DateCell,
80: DateCell,
81: DateCell,
}

171
commonxl/sheet.go Normal file
View File

@ -0,0 +1,171 @@
package commonxl
import (
"fmt"
"log"
"time"
)
// Sheet holds raw and rendered values for a spreadsheet.
type Sheet struct {
Formatter *Formatter
NumRows int
NumCols int
Rows [][]Cell
CurRow int
}
// Resize the sheet for the number of rows and cols given.
// Newly added cells default to blank.
func (s *Sheet) Resize(rows, cols int) {
// some sheets are off by one
rows++
cols++
if rows <= 0 {
rows = 1
}
if cols <= 0 {
cols = 1
}
s.CurRow = 0
s.NumRows = rows
s.NumCols = cols
for rows >= len(s.Rows) {
s.Rows = append(s.Rows, make([]Cell, cols))
}
for i := 0; len(s.Rows[i]) < cols; i++ {
r2 := make([]Cell, cols-len(s.Rows[i]))
s.Rows[i] = append(s.Rows[i], r2...)
}
}
// Put the value at the cell location given.
func (s *Sheet) Put(row, col int, value interface{}, fmtNum uint16) {
if row >= s.NumRows || col >= s.NumCols {
log.Printf("grate: cell out of bounds row %d>=%d, col %d>=%d",
row, s.NumRows, col, s.NumCols)
return
}
ct, ok := s.Formatter.getCellType(fmtNum)
if !ok || fmtNum == 0 {
s.Rows[row][col] = NewCell(value)
} else {
s.Rows[row][col] = NewCellWithType(value, ct)
}
s.Rows[row][col].SetFormatNumber(fmtNum)
}
// Set changes the value in an existing cell location.
// NB Currently only used for populating string results for formulas.
func (s *Sheet) Set(row, col int, value interface{}) {
if row > s.NumRows || col > s.NumCols {
log.Println("grate: cell out of bounds")
return
}
s.Rows[row][col][0] = value
s.Rows[row][col][1] = StringCell
}
// SetURL adds a hyperlink to an existing cell location.
func (s *Sheet) SetURL(row, col int, link string) {
if row > s.NumRows || col > s.NumCols {
log.Println("grate: cell out of bounds")
return
}
s.Rows[row][col].SetURL(link)
}
// Next advances to the next record of content.
// It MUST be called prior to any Scan().
func (s *Sheet) Next() bool {
if (s.CurRow + 1) >= len(s.Rows) {
return false
}
s.CurRow++
return true
}
// Strings extracts values from the current record into a list of strings.
func (s *Sheet) Strings() []string {
res := make([]string, s.NumCols)
for i, cell := range s.Rows[s.CurRow] {
if cell.Type() == BlankCell {
res[i] = ""
continue
}
val := cell.Value()
fs, ok := s.Formatter.Apply(cell.FormatNo(), val)
if !ok {
fs = fmt.Sprint(val)
}
res[i] = fs
}
return res
}
// Scan extracts values from the current record into the provided arguments
// Arguments must be pointers to one of 5 supported types:
// bool, int64, float64, string, or time.Time
// If invalid, returns ErrInvalidScanType
func (s *Sheet) Scan(args ...interface{}) error {
row := s.Rows[s.CurRow]
for i, a := range args {
val := row[i].Value()
switch v := a.(type) {
case bool, int64, float64, string, time.Time:
return fmt.Errorf("scan destinations must be pointer (arg %d is not)", i)
case *bool:
if x, ok := val.(bool); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *bool", i, val)
}
case *int64:
if x, ok := val.(int64); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *int64", i, val)
}
case *float64:
if x, ok := val.(float64); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *float64", i, val)
}
case *string:
if x, ok := val.(string); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *string", i, val)
}
case *time.Time:
if x, ok := val.(time.Time); ok {
*v = x
} else {
return fmt.Errorf("scan destination %d expected *%T, not *time.Time", i, val)
}
default:
return fmt.Errorf("scan destination for arg %d is not supported (%T)", i, a)
}
}
return nil
}
// IsEmpty returns true if there are no data values.
func (s *Sheet) IsEmpty() bool {
return (s.NumCols <= 1 && s.NumRows <= 1)
}
// Err returns the last error that occured.
func (s *Sheet) Err() error {
return nil
}

View File

@ -31,7 +31,7 @@ type Collection interface {
// Scan extracts values from the current record into the provided arguments // Scan extracts values from the current record into the provided arguments
// Arguments must be pointers to one of 5 supported types: // Arguments must be pointers to one of 5 supported types:
// bool, int, float64, string, or time.Time // bool, int64, float64, string, or time.Time
// If invalid, returns ErrInvalidScanType // If invalid, returns ErrInvalidScanType
Scan(args ...interface{}) error Scan(args ...interface{}) error
@ -82,3 +82,15 @@ func Register(name string, priority int, opener OpenFunc) error {
}) })
return nil return nil
} }
const (
// ContinueColumnMerged marks a continuation column within a merged cell.
ContinueColumnMerged = "→"
// EndColumnMerged marks the last column of a merged cell.
EndColumnMerged = "⇥"
// ContinueRowMerged marks a continuation row within a merged cell.
ContinueRowMerged = "↓"
// EndRowMerged marks the last row of a merged cell.
EndRowMerged = "⤓"
)

View File

@ -3,13 +3,12 @@ package xls
import ( import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt"
"log" "log"
"math" "math"
"time"
"unicode/utf16" "unicode/utf16"
"github.com/pbnjay/grate" "github.com/pbnjay/grate"
"github.com/pbnjay/grate/commonxl"
) )
// List (visible) sheet names from the workbook. // List (visible) sheet names from the workbook.
@ -39,94 +38,24 @@ func (b *WorkBook) Get(sheetName string) (grate.Collection, error) {
for _, s := range b.sheets { for _, s := range b.sheets {
if s.Name == sheetName { if s.Name == sheetName {
ss := b.pos2substream[int64(s.Position)] ss := b.pos2substream[int64(s.Position)]
ws := &WorkSheet{ return b.parseSheet(s, ss)
b: b, s: s, ss: ss,
iterRow: -1,
}
return ws, ws.parse()
} }
} }
return nil, errors.New("xls: sheet not found") return nil, errors.New("xls: sheet not found")
} }
// WorkSheet holds various metadata about a sheet in a Workbook. func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) {
type WorkSheet struct { res := &commonxl.Sheet{
b *WorkBook Formatter: &b.nfmt,
s *boundSheet
ss int
err error
minRow int
maxRow int // maximum valid row index (0xFFFF)
minCol int
maxCol int // maximum valid column index (0xFF)
rows []*row
empty bool
iterRow int
iterMC int
}
type staticCellType rune
const (
staticBlank staticCellType = 0
// marks a continuation column within a merged cell.
continueColumnMerged staticCellType = '→'
// marks the last column of a merged cell.
endColumnMerged staticCellType = '⇥'
// marks a continuation row within a merged cell.
continueRowMerged staticCellType = '↓'
// marks the last row of a merged cell.
endRowMerged staticCellType = '⤓'
)
func (s staticCellType) String() string {
if s == 0 {
return ""
} }
return string([]rune{rune(s)}) var minRow, maxRow uint32
} var minCol, maxCol uint16
type row struct {
// each value must be one of: int, float64, string, or time.Time
cols []interface{}
}
func (s *WorkSheet) makeCells() {
// ensure we always have a complete matrix
for len(s.rows) <= s.maxRow {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
}
func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) {
if colIndex > s.maxCol || rowIndex > s.maxRow {
// invalid
return
}
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
s.rows[rowIndex].cols[colIndex] = val
}
func (s *WorkSheet) IsEmpty() bool {
return s.empty
}
func (s *WorkSheet) parse() error {
// temporary string buffer // temporary string buffer
us := make([]uint16, 8224) us := make([]uint16, 8224)
inSubstream := 0 inSubstream := 0
for idx, r := range s.b.substreams[s.ss] { for idx, r := range b.substreams[ss] {
if inSubstream > 0 { if inSubstream > 0 {
if r.RecType == RecTypeEOF { if r.RecType == RecTypeEOF {
inSubstream-- inSubstream--
@ -145,15 +74,15 @@ func (s *WorkSheet) parse() error {
case RecTypeWsBool: case RecTypeWsBool:
if (r.Data[1] & 0x10) != 0 { if (r.Data[1] & 0x10) != 0 {
// it's a dialog // it's a dialog
return nil return nil, nil
} }
case RecTypeDimensions: case RecTypeDimensions:
// max = 0-based index of the row AFTER the last valid index // max = 0-based index of the row AFTER the last valid index
minRow := binary.LittleEndian.Uint32(r.Data[:4]) minRow = binary.LittleEndian.Uint32(r.Data[:4])
maxRow := binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000 maxRow = binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000
minCol := binary.LittleEndian.Uint16(r.Data[8:10]) minCol = binary.LittleEndian.Uint16(r.Data[8:10])
maxCol := binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100 maxCol = binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100
if grate.Debug { if grate.Debug {
log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)", log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)",
minCol, minRow, maxCol, maxRow) minCol, minRow, maxCol, maxRow)
@ -164,21 +93,15 @@ func (s *WorkSheet) parse() error {
if minCol > 0x00FF || maxCol > 0x0100 { if minCol > 0x00FF || maxCol > 0x0100 {
log.Println("invalid dimensions") log.Println("invalid dimensions")
} }
s.minRow = int(uint64(minRow) & 0x0FFFF)
s.maxRow = int(uint64(maxRow)&0x1FFFF) - 1 // translate to last valid index
s.minCol = int(uint64(minCol) & 0x000FF)
s.maxCol = int(uint64(maxCol)&0x001FF) - 1 // translate to last valid index
if (maxRow-minRow) == 0 || (maxCol-minCol) == 0 {
s.empty = true
}
// pre-allocate cells // pre-allocate cells
s.makeCells() res.Resize(int(maxRow), int(maxCol))
} }
} }
inSubstream = 0 inSubstream = 0
var formulaRow, formulaCol uint16 var formulaRow, formulaCol uint16
for ridx, r := range s.b.substreams[s.ss] { for ridx, r := range b.substreams[ss] {
if inSubstream > 0 { if inSubstream > 0 {
if r.RecType == RecTypeEOF { if r.RecType == RecTypeEOF {
inSubstream-- inSubstream--
@ -216,15 +139,11 @@ func (s *WorkSheet) parse() error {
if r.Data[6] == 1 { if r.Data[6] == 1 {
bv = true bv = true
} }
var rval interface{} = bv
var fno uint16 var fno uint16
if ixfe < len(s.b.xfs) { if ixfe < len(b.xfs) {
fno = s.b.xfs[ixfe] fno = b.xfs[ixfe]
} }
if fval, ok := s.b.nfmt.Apply(fno, bv); ok { res.Put(rowIndex, colIndex, bv, fno)
rval = fval
}
s.placeValue(rowIndex, colIndex, rval)
//log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv) //log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv)
} else { } else {
// it's an error, load the label // it's an error, load the label
@ -232,7 +151,7 @@ func (s *WorkSheet) parse() error {
if !ok { if !ok {
be = "<unknown error>" be = "<unknown error>"
} }
s.placeValue(rowIndex, colIndex, be) res.Put(rowIndex, colIndex, be, 0)
//log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be) //log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be)
} }
@ -253,11 +172,10 @@ func (s *WorkSheet) parse() error {
rval = value.Float64() rval = value.Float64()
} }
var fno uint16 var fno uint16
if ixfe < len(s.b.xfs) { if ixfe < len(b.xfs) {
fno = s.b.xfs[ixfe] fno = b.xfs[ixfe]
} }
rval, _ = s.b.nfmt.Apply(fno, rval) res.Put(rowIndex, colIndex+i, rval, fno)
s.placeValue(rowIndex, colIndex+i, rval)
} }
//log.Printf("mulrow spec: %+v", *mr) //log.Printf("mulrow spec: %+v", *mr)
@ -269,12 +187,10 @@ func (s *WorkSheet) parse() error {
value := math.Float64frombits(xnum) value := math.Float64frombits(xnum)
var fno uint16 var fno uint16
if ixfe < len(s.b.xfs) { if ixfe < len(b.xfs) {
fno = s.b.xfs[ixfe] fno = b.xfs[ixfe]
} }
rval, _ := s.b.nfmt.Apply(fno, value) res.Put(rowIndex, colIndex, value, fno)
s.placeValue(rowIndex, colIndex, rval)
//log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value) //log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value)
case RecTypeRK: case RecTypeRK:
@ -290,11 +206,10 @@ func (s *WorkSheet) parse() error {
rval = value.Float64() rval = value.Float64()
} }
var fno uint16 var fno uint16
if ixfe < len(s.b.xfs) { if ixfe < len(b.xfs) {
fno = s.b.xfs[ixfe] fno = b.xfs[ixfe]
} }
rval, _ = s.b.nfmt.Apply(fno, rval) res.Put(rowIndex, colIndex, rval, fno)
s.placeValue(rowIndex, colIndex, rval)
//log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String()) //log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String())
case RecTypeFormula: case RecTypeFormula:
@ -302,32 +217,30 @@ func (s *WorkSheet) parse() error {
formulaCol = binary.LittleEndian.Uint16(r.Data[2:4]) formulaCol = binary.LittleEndian.Uint16(r.Data[2:4])
ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
fdata := r.Data[6:] fdata := r.Data[6:]
var fno uint16
if ixfe < len(b.xfs) {
fno = b.xfs[ixfe]
}
if fdata[6] == 0xFF && r.Data[7] == 0xFF { if fdata[6] == 0xFF && r.Data[7] == 0xFF {
switch fdata[0] { switch fdata[0] {
case 0: case 0:
// string in next record // string in next record
// put placeholder now to record the numFmt
res.Put(int(formulaRow), int(formulaCol), "", fno)
case 1: case 1:
// boolean // boolean
bv := false bv := false
if fdata[2] != 0 { if fdata[2] != 0 {
bv = true bv = true
} }
var rval interface{} = bv res.Put(int(formulaRow), int(formulaCol), bv, fno)
var fno uint16
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
}
if fval, ok := s.b.nfmt.Apply(fno, bv); ok {
rval = fval
}
s.placeValue(int(formulaRow), int(formulaCol), rval)
case 2: case 2:
// error value // error value
be, ok := berrLookup[fdata[2]] be, ok := berrLookup[fdata[2]]
if !ok { if !ok {
be = "<unknown error>" be = "<unknown error>"
} }
s.placeValue(int(formulaRow), int(formulaCol), be) res.Put(int(formulaRow), int(formulaCol), be, 0)
case 3: case 3:
// blank string // blank string
default: default:
@ -336,12 +249,7 @@ func (s *WorkSheet) parse() error {
} else { } else {
xnum := binary.LittleEndian.Uint64(r.Data[6:]) xnum := binary.LittleEndian.Uint64(r.Data[6:])
value := math.Float64frombits(xnum) value := math.Float64frombits(xnum)
var fno uint16 res.Put(int(formulaRow), int(formulaCol), value, fno)
if ixfe < len(s.b.xfs) {
fno = s.b.xfs[ixfe]
}
rval, _ := s.b.nfmt.Apply(fno, value)
s.placeValue(int(formulaRow), int(formulaCol), rval)
} }
//log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data) //log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data)
@ -370,11 +278,11 @@ func (s *WorkSheet) parse() error {
fstr = string(utf16.Decode(us)) fstr = string(utf16.Decode(us))
} }
if (ridx + 1) < len(s.b.substreams[s.ss]) { if (ridx + 1) < len(b.substreams[ss]) {
ridx2 := ridx + 1 ridx2 := ridx + 1
nrecs := len(s.b.substreams[s.ss]) nrecs := len(b.substreams[ss])
for ridx2 < nrecs { for ridx2 < nrecs {
r2 := s.b.substreams[s.ss][ridx2] r2 := b.substreams[ss][ridx2]
if r2.RecType != RecTypeContinue { if r2.RecType != RecTypeContinue {
break break
} }
@ -393,20 +301,22 @@ func (s *WorkSheet) parse() error {
ridx2++ ridx2++
} }
} }
// TODO: does formula record formatted dates as pre-computed strings? res.Set(int(formulaRow), int(formulaCol), fstr)
s.placeValue(int(formulaRow), int(formulaCol), fstr)
case RecTypeLabelSst: case RecTypeLabelSst:
rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2]))
colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4]))
//ixfe := binary.LittleEndian.Uint16(r.Data[4:6]) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6]))
sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:])) sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:]))
if sstIndex > len(s.b.strings) { if sstIndex > len(b.strings) {
return errors.New("xls: invalid sst index") return nil, errors.New("xls: invalid sst index")
} }
// FIXME: double check that ixfe doesn't modify output var fno uint16
if s.b.strings[sstIndex] != "" { if ixfe < len(b.xfs) {
s.placeValue(rowIndex, colIndex, s.b.strings[sstIndex]) fno = b.xfs[ixfe]
}
if b.strings[sstIndex] != "" {
res.Put(rowIndex, colIndex, b.strings[sstIndex], fno)
} }
//log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex]) //log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex])
@ -415,19 +325,19 @@ func (s *WorkSheet) parse() error {
lastRow := binary.LittleEndian.Uint16(r.Data[2:4]) lastRow := binary.LittleEndian.Uint16(r.Data[2:4])
firstCol := binary.LittleEndian.Uint16(r.Data[4:6]) firstCol := binary.LittleEndian.Uint16(r.Data[4:6])
lastCol := binary.LittleEndian.Uint16(r.Data[6:]) lastCol := binary.LittleEndian.Uint16(r.Data[6:])
if int(firstCol) > s.maxCol { if int(firstCol) > int(maxCol) {
//log.Println("invalid hyperlink column") //log.Println("invalid hyperlink column")
continue continue
} }
if int(firstRow) > s.maxRow { if int(firstRow) > int(maxRow) {
//log.Println("invalid hyperlink row") //log.Println("invalid hyperlink row")
continue continue
} }
if lastRow == 0xFFFF { // placeholder value indicate "last" if lastRow == 0xFFFF { // placeholder value indicate "last"
lastRow = uint16(s.maxRow) lastRow = uint16(maxRow) - 1
} }
if lastCol == 0xFF { // placeholder value indicate "last" if lastCol == 0xFF { // placeholder value indicate "last"
lastCol = uint16(s.maxCol) lastCol = uint16(maxCol) - 1
} }
// decode the hyperlink datastructure and try to find the // decode the hyperlink datastructure and try to find the
@ -443,19 +353,19 @@ func (s *WorkSheet) parse() error {
for cn := int(firstCol); cn <= int(lastCol); cn++ { for cn := int(firstCol); cn <= int(lastCol); cn++ {
if rn == int(firstRow) && cn == int(firstCol) { if rn == int(firstRow) && cn == int(firstCol) {
// TODO: provide custom hooks for how to handle links in output // TODO: provide custom hooks for how to handle links in output
s.placeValue(rn, cn, displayText+" <"+linkText+">") res.Put(rn, cn, displayText+" <"+linkText+">", 0)
} else if cn == int(firstCol) { } else if cn == int(firstCol) {
// first and last column MAY be the same // first and last column MAY be the same
if rn == int(lastRow) { if rn == int(lastRow) {
s.placeValue(rn, cn, endRowMerged) res.Put(rn, cn, grate.EndRowMerged, 0)
} else { } else {
s.placeValue(rn, cn, continueRowMerged) res.Put(rn, cn, grate.ContinueRowMerged, 0)
} }
} else if cn == int(lastCol) { } else if cn == int(lastCol) {
// first and last column are NOT the same // first and last column are NOT the same
s.placeValue(rn, cn, endColumnMerged) res.Put(rn, cn, grate.EndColumnMerged, 0)
} else { } else {
s.placeValue(rn, cn, continueColumnMerged) res.Put(rn, cn, grate.ContinueColumnMerged, 0)
} }
} }
} }
@ -482,10 +392,10 @@ func (s *WorkSheet) parse() error {
raw = raw[8:] raw = raw[8:]
if lastRow == 0xFFFF { // placeholder value indicate "last" if lastRow == 0xFFFF { // placeholder value indicate "last"
lastRow = uint16(s.maxRow) lastRow = uint16(maxRow) - 1
} }
if lastCol == 0xFF { // placeholder value indicate "last" if lastCol == 0xFF { // placeholder value indicate "last"
lastCol = uint16(s.maxCol) lastCol = uint16(maxCol) - 1
} }
for rn := int(firstRow); rn <= int(lastRow); rn++ { for rn := int(firstRow); rn <= int(lastRow); rn++ {
for cn := int(firstCol); cn <= int(lastCol); cn++ { for cn := int(firstCol); cn <= int(lastCol); cn++ {
@ -494,15 +404,15 @@ func (s *WorkSheet) parse() error {
} else if cn == int(firstCol) { } else if cn == int(firstCol) {
// first and last column MAY be the same // first and last column MAY be the same
if rn == int(lastRow) { if rn == int(lastRow) {
s.placeValue(rn, cn, endRowMerged) res.Put(rn, cn, grate.EndRowMerged, 0)
} else { } else {
s.placeValue(rn, cn, continueRowMerged) res.Put(rn, cn, grate.ContinueRowMerged, 0)
} }
} else if cn == int(lastCol) { } else if cn == int(lastCol) {
// first and last column are NOT the same // first and last column are NOT the same
s.placeValue(rn, cn, endColumnMerged) res.Put(rn, cn, grate.EndColumnMerged, 0)
} else { } else {
s.placeValue(rn, cn, continueColumnMerged) res.Put(rn, cn, grate.ContinueColumnMerged, 0)
} }
} }
} }
@ -524,64 +434,7 @@ func (s *WorkSheet) parse() error {
*/ */
} }
} }
return nil return res, nil
}
// Err returns the last error that occured.
func (s *WorkSheet) Err() error {
return s.err
}
// Next advances to the next row of content.
// It MUST be called prior to any Scan().
func (s *WorkSheet) Next() bool {
s.iterRow++
return s.iterRow < len(s.rows)
}
// Strings returns the contents of the row as string types.
func (s *WorkSheet) Strings() []string {
currow := s.rows[s.iterRow]
res := make([]string, len(currow.cols))
for i, col := range currow.cols {
if col == nil || col == "" {
continue
}
switch v := col.(type) {
case string:
res[i] = v
case fmt.Stringer:
res[i] = v.String()
default:
res[i] = fmt.Sprint(col)
}
}
return res
}
// Scan extracts values from the row into the provided arguments
// Arguments must be pointers to one of 5 supported types:
// bool, int, float64, string, or time.Time
func (s *WorkSheet) Scan(args ...interface{}) error {
currow := s.rows[s.iterRow]
for i, a := range args {
switch v := a.(type) {
case *bool:
*v = currow.cols[i].(bool)
case *int:
*v = currow.cols[i].(int)
case *float64:
*v = currow.cols[i].(float64)
case *string:
*v = currow.cols[i].(string)
case *time.Time:
*v = currow.cols[i].(time.Time)
default:
return grate.ErrInvalidScanType
}
}
return nil
} }
var berrLookup = map[byte]string{ var berrLookup = map[byte]string{

View File

@ -282,11 +282,13 @@ func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error {
if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD { if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD {
return errors.New("xls: unsupported biff version") return errors.New("xls: unsupported biff version")
} }
if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 { /*
// we only support the workbook or worksheet substreams if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 {
log.Println("xls: unsupported document type") // we only support the workbook or worksheet substreams
//break log.Println("xls: unsupported document type")
} //break
}
*/
case RecTypeCodePage: case RecTypeCodePage:
// BIFF8 is entirely UTF-16LE so this is actually ignored // BIFF8 is entirely UTF-16LE so this is actually ignored

View File

@ -3,13 +3,11 @@ package xlsx
import ( import (
"encoding/xml" "encoding/xml"
"errors" "errors"
"fmt"
"io" "io"
"log" "log"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/pbnjay/grate" "github.com/pbnjay/grate"
"github.com/pbnjay/grate/commonxl" "github.com/pbnjay/grate/commonxl"
@ -23,24 +21,15 @@ type Sheet struct {
err error err error
minRow int wrapped *commonxl.Sheet
maxRow int
minCol int
maxCol int
rows []*row
empty bool
iterRow int
} }
var errNotLoaded = errors.New("xlsx: sheet not loaded") var errNotLoaded = errors.New("xlsx: sheet not loaded")
type row struct {
// each value must be one of: int, float64, string, or time.Time
cols []interface{}
}
func (s *Sheet) parseSheet() error { func (s *Sheet) parseSheet() error {
s.wrapped = &commonxl.Sheet{
Formatter: &s.d.fmt,
}
linkmap := make(map[string]string) linkmap := make(map[string]string)
base := filepath.Base(s.docname) base := filepath.Base(s.docname)
sub := strings.TrimSuffix(s.docname, base) sub := strings.TrimSuffix(s.docname, base)
@ -68,7 +57,9 @@ func (s *Sheet) parseSheet() error {
currentCellType := BlankCellType currentCellType := BlankCellType
currentCell := "" currentCell := ""
var numFormat commonxl.FmtFunc var fno uint16
var maxCol, maxRow int
tok, err := dec.RawToken() tok, err := dec.RawToken()
for ; err == nil; tok, err = dec.RawToken() { for ; err == nil; tok, err = dec.RawToken() {
switch v := tok.(type) { switch v := tok.(type) {
@ -79,6 +70,7 @@ func (s *Sheet) parseSheet() error {
c, r := refToIndexes(currentCell) c, r := refToIndexes(currentCell)
if c >= 0 && r >= 0 { if c >= 0 && r >= 0 {
var val interface{} = string(v) var val interface{} = string(v)
switch currentCellType { switch currentCellType {
case BooleanCellType: case BooleanCellType:
if v[0] == '1' { if v[0] == '1' {
@ -87,13 +79,12 @@ func (s *Sheet) parseSheet() error {
val = false val = false
} }
case DateCellType: case DateCellType:
log.Println("CELL DATE", val, numFormat) log.Println("CELL DATE", val, fno)
case NumberCellType: case NumberCellType:
fval, err := strconv.ParseFloat(string(v), 64) fval, err := strconv.ParseFloat(string(v), 64)
if err == nil { if err == nil {
val = fval val = fval
} }
val = numFormat(&s.d.fmt, fval)
//log.Println("CELL NUMBER", val, numFormat) //log.Println("CELL NUMBER", val, numFormat)
case SharedStringCellType: case SharedStringCellType:
//log.Println("CELL SHSTR", val, currentCellType, numFormat) //log.Println("CELL SHSTR", val, currentCellType, numFormat)
@ -106,9 +97,9 @@ func (s *Sheet) parseSheet() error {
case ErrorCellType, FormulaStringCellType, InlineStringCellType: case ErrorCellType, FormulaStringCellType, InlineStringCellType:
//log.Println("CELL ERR/FORM/INLINE", val, currentCellType) //log.Println("CELL ERR/FORM/INLINE", val, currentCellType)
default: default:
log.Println("CELL UNKNOWN", val, currentCellType, numFormat) log.Println("CELL UNKNOWN", val, currentCellType, fno)
} }
s.placeValue(r, c, val) s.wrapped.Put(r, c, val, fno)
} else { } else {
//log.Println("FAIL row/col: ", currentCell) //log.Println("FAIL row/col: ", currentCell)
} }
@ -117,20 +108,19 @@ func (s *Sheet) parseSheet() error {
case "dimension": case "dimension":
ax := getAttrs(v.Attr, "ref") ax := getAttrs(v.Attr, "ref")
if ax[0] == "A1" { if ax[0] == "A1" {
maxCol, maxRow = 1, 1
// short-circuit empty sheet // short-circuit empty sheet
s.minCol, s.minRow = 0, 0 s.wrapped.Resize(1, 1)
s.maxCol, s.maxRow = 1, 1
s.empty = true
continue continue
} }
dims := strings.Split(ax[0], ":") dims := strings.Split(ax[0], ":")
if len(dims) == 1 { if len(dims) == 1 {
s.minCol, s.minRow = 0, 0 maxCol, maxRow = refToIndexes(dims[0])
s.maxCol, s.maxRow = refToIndexes(dims[0])
} else { } else {
s.minCol, s.minRow = refToIndexes(dims[0]) //minCol, minRow := refToIndexes(dims[0])
s.maxCol, s.maxRow = refToIndexes(dims[1]) maxCol, maxRow = refToIndexes(dims[1])
} }
s.wrapped.Resize(maxRow, maxCol)
//log.Println("DIMENSION:", s.minRow, s.minCol, ">", s.maxRow, s.maxCol) //log.Println("DIMENSION:", s.minRow, s.minCol, ">", s.maxRow, s.maxCol)
case "row": case "row":
//currentRow = ax["r"] // unsigned int row index //currentRow = ax["r"] // unsigned int row index
@ -145,9 +135,9 @@ func (s *Sheet) parseSheet() error {
style := ax[2] style := ax[2]
sid, _ := strconv.ParseInt(style, 10, 64) sid, _ := strconv.ParseInt(style, 10, 64)
if len(s.d.xfs) > int(sid) { if len(s.d.xfs) > int(sid) {
numFormat = s.d.xfs[sid] // unsigned integer lookup fno = s.d.xfs[sid]
} else { } else {
numFormat = s.d.xfs[0] fno = 0
} }
//log.Println("CELL", currentCell, sid, numFormat, currentCellType) //log.Println("CELL", currentCell, sid, numFormat, currentCellType)
case "v": case "v":
@ -161,6 +151,12 @@ func (s *Sheet) parseSheet() error {
if len(dims) > 1 { if len(dims) > 1 {
endCol, endRow = refToIndexes(dims[1]) endCol, endRow = refToIndexes(dims[1])
} }
if endRow > maxRow {
endRow = maxRow
}
if endCol > maxCol {
endCol = maxCol
}
for r := startRow; r <= endRow; r++ { for r := startRow; r <= endRow; r++ {
for c := startCol; c <= endCol; c++ { for c := startCol; c <= endCol; c++ {
if r == startRow && c == startCol { if r == startRow && c == startCol {
@ -168,15 +164,15 @@ func (s *Sheet) parseSheet() error {
} else if c == startCol { } else if c == startCol {
// first and last column MAY be the same // first and last column MAY be the same
if r == endRow { if r == endRow {
s.placeValue(r, c, endRowMerged) s.wrapped.Put(r, c, grate.EndRowMerged, 0)
} else { } else {
s.placeValue(r, c, continueRowMerged) s.wrapped.Put(r, c, grate.ContinueRowMerged, 0)
} }
} else if c == endCol { } else if c == endCol {
// first and last column are NOT the same // first and last column are NOT the same
s.placeValue(r, c, endColumnMerged) s.wrapped.Put(r, c, grate.EndColumnMerged, 0)
} else { } else {
s.placeValue(r, c, continueColumnMerged) s.wrapped.Put(r, c, grate.ContinueColumnMerged, 0)
} }
} }
} }
@ -185,12 +181,8 @@ func (s *Sheet) parseSheet() error {
ax := getAttrs(v.Attr, "ref", "id") ax := getAttrs(v.Attr, "ref", "id")
col, row := refToIndexes(ax[0]) col, row := refToIndexes(ax[0])
link := linkmap[ax[1]] link := linkmap[ax[1]]
if len(s.rows) > row && len(s.rows[row].cols) > col { s.wrapped.Put(row, col, link, 0)
if sstr, ok := s.rows[row].cols[col].(string); ok { s.wrapped.SetURL(row, col, link)
link = sstr + " <" + link + ">"
}
}
s.placeValue(row, col, link)
case "worksheet", "mergeCells", "hyperlinks": case "worksheet", "mergeCells", "hyperlinks":
// containers // containers
@ -220,71 +212,3 @@ func (s *Sheet) parseSheet() error {
} }
return err return err
} }
func (s *Sheet) placeValue(rowIndex, colIndex int, val interface{}) {
if colIndex > s.maxCol || rowIndex > s.maxRow {
// invalid
return
}
// ensure we always have a complete matrix
for len(s.rows) <= rowIndex {
emptyRow := make([]interface{}, s.maxCol+1)
s.rows = append(s.rows, &row{emptyRow})
}
s.empty = false
s.rows[rowIndex].cols[colIndex] = val
}
// Next advances to the next row of content.
// It MUST be called prior to any Scan().
func (s *Sheet) Next() bool {
s.iterRow++
return s.iterRow < len(s.rows)
}
func (s *Sheet) Strings() []string {
currow := s.rows[s.iterRow]
res := make([]string, len(currow.cols))
for i, col := range currow.cols {
if col == nil || col == "" {
continue
}
res[i] = fmt.Sprint(col)
}
return res
}
// Scan extracts values from the row into the provided arguments
// Arguments must be pointers to one of 5 supported types:
// bool, int, float64, string, or time.Time
func (s *Sheet) Scan(args ...interface{}) error {
currow := s.rows[s.iterRow]
for i, a := range args {
switch v := a.(type) {
case *bool:
*v = currow.cols[i].(bool)
case *int:
*v = currow.cols[i].(int)
case *float64:
*v = currow.cols[i].(float64)
case *string:
*v = currow.cols[i].(string)
case *time.Time:
*v = currow.cols[i].(time.Time)
default:
return grate.ErrInvalidScanType
}
}
return nil
}
func (s *Sheet) IsEmpty() bool {
return s.empty
}
// Err returns the last error that occured.
func (s *Sheet) Err() error {
return s.err
}

View File

@ -9,7 +9,6 @@ import (
"strconv" "strconv"
"github.com/pbnjay/grate" "github.com/pbnjay/grate"
"github.com/pbnjay/grate/commonxl"
) )
func (d *Document) parseRels(dec *xml.Decoder, basedir string) error { func (d *Document) parseRels(dec *xml.Decoder, basedir string) error {
@ -119,7 +118,7 @@ func (d *Document) parseStyles(dec *xml.Decoder) error {
section = 2 section = 2
ax := getAttrs(v.Attr, "count") ax := getAttrs(v.Attr, "count")
n, _ := strconv.ParseInt(ax[0], 10, 64) n, _ := strconv.ParseInt(ax[0], 10, 64)
d.xfs = make([]commonxl.FmtFunc, 0, n) d.xfs = make([]uint16, 0, n)
case "xf": case "xf":
ax := getAttrs(v.Attr, "numFmtId", "applyNumberFormat", "xfId") ax := getAttrs(v.Attr, "numFmtId", "applyNumberFormat", "xfId")
@ -148,11 +147,7 @@ func (d *Document) parseStyles(dec *xml.Decoder) error {
} }
nfid, _ := strconv.ParseInt(numFmtID, 10, 16) nfid, _ := strconv.ParseInt(numFmtID, 10, 16)
thisXF, ok := d.fmt.Get(uint16(nfid)) d.xfs = append(d.xfs, uint16(nfid))
if !ok {
panic("numformat unknown")
}
d.xfs = append(d.xfs, thisXF)
} else { } else {
panic("wheres is this xf??") panic("wheres is this xf??")
} }

View File

@ -27,7 +27,7 @@ type Document struct {
rels map[string]map[string]string rels map[string]map[string]string
sheets []*Sheet sheets []*Sheet
strings []string strings []string
xfs []commonxl.FmtFunc xfs []uint16
fmt commonxl.Formatter fmt commonxl.Formatter
} }
@ -163,7 +163,7 @@ func (d *Document) Get(sheetName string) (grate.Collection, error) {
if s.err == errNotLoaded { if s.err == errNotLoaded {
s.err = s.parseSheet() s.err = s.parseSheet()
} }
return s, s.err return s.wrapped, s.err
} }
} }
return nil, errors.New("xlsx: sheet not found") return nil, errors.New("xlsx: sheet not found")