diff --git a/commonxl/cell.go b/commonxl/cell.go new file mode 100644 index 0000000..3dc54f0 --- /dev/null +++ b/commonxl/cell.go @@ -0,0 +1,281 @@ +package commonxl + +import ( + "fmt" + "math" + "net/url" + "strconv" + "time" + "unicode/utf16" +) + +// CellType annotates the type of data extracted in the cell. +type CellType uint16 + +// CellType annotations for various cell value types. +const ( + BlankCell CellType = iota + IntegerCell + FloatCell + StringCell + BooleanCell + DateCell + + HyperlinkStringCell // internal type to separate URLs + StaticCell // placeholder, internal use only +) + +// Cell represents a single cell value. +type Cell []interface{} + +// internally, it is a slice sized 2 or 3 +// [Value, CellType] or [Value, CellType, FormatNumber] +// where FormatNumber is a uint16 if not 0 + +// Value returns the contents as a generic interface{}. +func (c Cell) Value() interface{} { + if len(c) == 0 { + return "" + } + return c[0] +} + +// SetURL adds a URL hyperlink to the cell. +func (c *Cell) SetURL(link string) { + (*c)[1] = HyperlinkStringCell + if len(*c) == 2 { + *c = append(*c, uint16(0), link) + } else { // len = 3 already + *c = append(*c, link) + } +} + +// URL returns the parsed URL when a cell contains a hyperlink. +func (c Cell) URL() (*url.URL, bool) { + if c.Type() == HyperlinkStringCell && len(c) >= 4 { + u, err := url.Parse(c[3].(string)) + return u, err == nil + } + return nil, false +} + +// Type returns the CellType of the value. +func (c Cell) Type() CellType { + if len(c) < 2 { + return BlankCell + } + return c[1].(CellType) +} + +// FormatNo returns the NumberFormat used for display. +func (c Cell) FormatNo() uint16 { + if len(c) == 3 { + return c[2].(uint16) + } + return 0 +} + +/////// + +var boolStrings = map[string]bool{ + "yes": true, "true": true, "t": true, "y": true, "1": true, "on": true, + "no": false, "false": false, "f": false, "n": false, "0": false, "off": false, + "YES": true, "TRUE": true, "T": true, "Y": true, "1.0": true, "ON": true, + "NO": false, "FALSE": false, "F": false, "N": false, "0.0": false, "OFF": false, +} + +// NewCellWithType creates a new cell value with the given type, coercing as necessary. +func NewCellWithType(value interface{}, t CellType) Cell { + c := NewCell(value) + if c[1] == t { + // fast path if it was already typed correctly + return c + } + + if c[1] == BooleanCell { + if t == IntegerCell { + if c[0].(bool) { + c[0] = int64(1) + } else { + c[0] = int64(0) + } + c[1] = IntegerCell + } else if t == FloatCell { + if c[0].(bool) { + c[0] = float64(1.0) + } else { + c[0] = float64(0.0) + } + c[1] = FloatCell + } else if t == StringCell { + if c[0].(bool) { + c[0] = "TRUE" + } else { + c[0] = "FALSE" + } + c[1] = FloatCell + } + } + + if c[1] == FloatCell { + if t == IntegerCell { + c[0] = int64(c[0].(float64)) + c[1] = IntegerCell + } else if t == BooleanCell { + c[0] = c[0].(float64) != 0.0 + c[1] = BooleanCell + } + } + if c[1] == IntegerCell { + if t == FloatCell { + c[0] = float64(c[0].(int64)) + c[1] = FloatCell + } else if t == BooleanCell { + c[0] = c[0].(int64) != 0 + c[1] = BooleanCell + } + } + if c[1] == StringCell { + if t == IntegerCell { + x, _ := strconv.ParseInt(c[0].(string), 10, 64) + c[0] = x + c[1] = IntegerCell + } else if t == FloatCell { + x, _ := strconv.ParseFloat(c[0].(string), 64) + c[0] = x + c[1] = FloatCell + } else if t == BooleanCell { + c[0] = boolStrings[c[0].(string)] + c[1] = BooleanCell + } + } + if t == StringCell { + c[0] = fmt.Sprint(c[0]) + c[1] = StringCell + } + if t == DateCell { + /// DO THE MAGIC CONVERSION HERE + } + return c +} + +// NewCell creates a new cell value from any builtin type. +func NewCell(value interface{}) Cell { + c := make([]interface{}, 2) + switch v := value.(type) { + case bool: + c[0] = v + c[1] = BooleanCell + case int: + c[0] = int64(v) + c[1] = IntegerCell + case int8: + c[0] = int64(v) + c[1] = IntegerCell + case int16: + c[0] = int64(v) + c[1] = IntegerCell + case int32: + c[0] = int64(v) + c[1] = IntegerCell + case int64: + c[0] = int64(v) + c[1] = IntegerCell + case uint8: + c[0] = int64(v) + c[1] = IntegerCell + case uint16: + c[0] = int64(v) + c[1] = IntegerCell + case uint32: + c[0] = int64(v) + c[1] = IntegerCell + + case uint: + if v > math.MaxInt64 { + c[0] = float64(v) + c[1] = FloatCell + } else { + c[0] = int64(v) + c[1] = IntegerCell + } + case uint64: + if v > math.MaxInt64 { + c[0] = float64(v) + c[1] = FloatCell + } else { + c[0] = int64(v) + c[1] = IntegerCell + } + + case float32: + c[0] = float64(v) + c[1] = FloatCell + case float64: + c[0] = float64(v) + c[1] = FloatCell + + case string: + if len(v) == 0 { + c[0] = nil + c[1] = BlankCell + } else { + c[0] = v + c[1] = StringCell + } + case []byte: + if len(v) == 0 { + c[0] = nil + c[1] = BlankCell + } else { + c[0] = string(v) + c[1] = StringCell + } + case []uint16: + if len(v) == 0 { + c[0] = nil + c[1] = BlankCell + } else { + c[0] = string(utf16.Decode(v)) + c[1] = StringCell + } + case []rune: + if len(v) == 0 { + c[0] = nil + c[1] = BlankCell + } else { + c[0] = string(v) + c[1] = StringCell + } + case time.Time: + c[0] = v + c[1] = DateCell + + case fmt.Stringer: + s := v.String() + if len(s) == 0 { + c[0] = nil + c[1] = BlankCell + } else { + c[0] = s + c[1] = StringCell + } + default: + panic("grate: data type not handled") + } + return Cell(c) +} + +// SetFormatNumber changes the number format stored with the cell. +func (c *Cell) SetFormatNumber(f uint16) { + if f == 0 { + *c = (*c)[:2] + return + } + + if len(*c) == 2 { + *c = append(*c, f) + } else { + (*c)[2] = f + } +} diff --git a/commonxl/dates.go b/commonxl/dates.go index 73eeeed..dad6ae9 100644 --- a/commonxl/dates.go +++ b/commonxl/dates.go @@ -59,6 +59,8 @@ func timeFmtFunc(f string) FmtFunc { } } +// same as above but replaces "AM" and "PM" with chinese translations. +// TODO: implement others func cnTimeFmtFunc(f string) FmtFunc { return func(x *Formatter, v interface{}) string { t, ok := v.(time.Time) @@ -74,12 +76,3 @@ func cnTimeFmtFunc(f string) FmtFunc { return strings.Replace(s, `PM`, `下午`, 1) } } - -// 0x0001 = date 0b0010 = time 0b0011 = date+time -var builtInDateFormats = map[uint16]byte{ - 14: 1, 15: 1, 16: 1, 17: 1, 18: 2, 19: 2, 20: 2, 21: 2, 22: 3, - 45: 2, 46: 2, 47: 2, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 2, - 33: 2, 34: 2, 35: 2, 36: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, - 55: 2, 56: 2, 57: 1, 58: 1, 71: 1, 72: 1, 73: 1, 74: 1, 75: 2, - 76: 2, 77: 3, 78: 2, 79: 2, 80: 2, 81: 1, -} diff --git a/commonxl/fmt.go b/commonxl/fmt.go index 9416a72..dd3a419 100644 --- a/commonxl/fmt.go +++ b/commonxl/fmt.go @@ -124,6 +124,7 @@ func convertToFloat64(v interface{}) (float64, bool) { } } +// replaces a zero with a dash func zeroDashFunc(ff FmtFunc) FmtFunc { return func(x *Formatter, v interface{}) string { fval, ok := convertToFloat64(v) @@ -152,7 +153,8 @@ func fracFmtFunc(n int) FmtFunc { } } -// handle (up to) all four format cases +// handle (up to) all four format cases: +// positive;negative;zero;other func switchFmtFunc(pos FmtFunc, others ...FmtFunc) FmtFunc { stringFF := identFunc zeroFF := pos diff --git a/commonxl/fmt_test.go b/commonxl/fmt_test.go index d844ff1..074b8a2 100644 --- a/commonxl/fmt_test.go +++ b/commonxl/fmt_test.go @@ -97,7 +97,10 @@ func TestDateFormats(t *testing.T) { fx := &Formatter{} for _, t := range testDates { - for fid := range builtInDateFormats { + for fid, ctype := range builtInFormatTypes { + if ctype != DateCell { + continue + } ff, _ := goFormatters[fid] // mainly testing these don't crash... log.Println(ff(fx, t)) @@ -105,7 +108,7 @@ func TestDateFormats(t *testing.T) { } } func TestBoolFormats(t *testing.T) { - ff := makeFormatter(`"yes";"yes";"no"`) + ff, _ := makeFormatter(`"yes";"yes";"no"`) if "no" != ff(nil, false) { t.Fatal(`false should be "no"`) diff --git a/commonxl/formats.go b/commonxl/formats.go index 831eab5..4049085 100644 --- a/commonxl/formats.go +++ b/commonxl/formats.go @@ -9,8 +9,9 @@ import ( // Formatter contains formatting methods common to Excel spreadsheets. type Formatter struct { - flags uint64 - customCodes map[uint16]FmtFunc + flags uint64 + customCodes map[uint16]FmtFunc + customCodeTypes map[uint16]CellType } const ( @@ -31,6 +32,7 @@ func (x *Formatter) Mode1904(enabled bool) { func (x *Formatter) Add(fmtID uint16, formatCode string) error { if x.customCodes == nil { x.customCodes = make(map[uint16]FmtFunc) + x.customCodeTypes = make(map[uint16]CellType) } if strings.ToLower(formatCode) == "general" { x.customCodes[fmtID] = goFormatters[0] @@ -46,10 +48,21 @@ func (x *Formatter) Add(fmtID uint16, formatCode string) error { return errors.New("grate/commonxl: cannot replace existing number formats") } - x.customCodes[fmtID] = makeFormatter(formatCode) + x.customCodes[fmtID], x.customCodeTypes[fmtID] = makeFormatter(formatCode) return nil } +func (x *Formatter) getCellType(fmtID uint16) (CellType, bool) { + if ct, ok := builtInFormatTypes[fmtID]; ok { + return ct, true + } + if x.customCodeTypes != nil { + ct, ok := x.customCodeTypes[fmtID] + return ct, ok + } + return 0, false +} + var ( minsMatch = regexp.MustCompile("h.*m.*s") nonEsc = regexp.MustCompile(`([^"]|^)"`) @@ -60,18 +73,18 @@ var ( formatMatchTextLiteral = regexp.MustCompile(`"[^"]*"`) ) -func makeFormatter(s string) FmtFunc { +func makeFormatter(s string) (FmtFunc, CellType) { //log.Printf("makeFormatter('%s')", s) // remove any coloring marks s = formatMatchBrackets.ReplaceAllString(s, "") if strings.Contains(s, ";") { parts := strings.Split(s, ";") - posFF := makeFormatter(parts[0]) + posFF, ctypePos := makeFormatter(parts[0]) rem := make([]FmtFunc, len(parts)-1) for i, ps := range parts[1:] { - rem[i] = makeFormatter(ps) + rem[i], _ = makeFormatter(ps) } - return switchFmtFunc(posFF, rem...) + return switchFmtFunc(posFF, rem...), ctypePos } // escaped characters, and quoted text @@ -111,10 +124,11 @@ func makeFormatter(s string) FmtFunc { s = fixEsc.ReplaceAllString(s, `$1`) //log.Printf(" made time formatter '%s'", s) - return timeFmtFunc(s) + return timeFmtFunc(s), DateCell } var ff FmtFunc + var ctype CellType if strings.ContainsAny(s, ".Ee") { verb := "f" if strings.ContainsAny(s, "Ee") { @@ -132,6 +146,7 @@ func makeFormatter(s string) FmtFunc { sf := fmt.Sprintf("%%%d.%d%s", i3-i1, i3-i2, verb) //log.Printf(" made float formatter '%s'", sf) ff = sprintfFunc(sf, mul) + ctype = FloatCell } else { s2 := strings.ReplaceAll(s, ",", "") i1 := strings.IndexAny(s2, "0") @@ -146,6 +161,7 @@ func makeFormatter(s string) FmtFunc { } //log.Printf(" made int formatter '%s'", sf) ff = sprintfFunc(sf, mul) + ctype = IntegerCell } if strings.Contains(s, ",") { @@ -162,10 +178,10 @@ func makeFormatter(s string) FmtFunc { } if len(prepost) == 1 { if prepost[0] == "@" { - return identFunc + return identFunc, StringCell } //log.Printf(" added static ('%s')", prepost[0]) - return staticFmtFunc(prepost[0]) + return staticFmtFunc(prepost[0]), StringCell } if len(prepost[0]) > 0 || len(prepost[1]) > 0 { prepost[1] = nonEsc.ReplaceAllString(prepost[1], `$1`) @@ -176,7 +192,7 @@ func makeFormatter(s string) FmtFunc { //log.Printf(" added surround ('%s' ... '%s')", prepost[0], prepost[1]) } - return ff + return ff, ctype } // Get the number format func to use for formatting values, @@ -288,3 +304,78 @@ var builtInFormats = map[uint16]string{ 80: `mm:ss.0`, // `นน:ทท.0`, 81: `d/m/bb`, // `d/m/bb`, } + +// builtInFormatTypes are the underlying datatypes for built-in number formats in XLS/XLSX. +var builtInFormatTypes = map[uint16]CellType{ + // 0 has no defined type + 1: IntegerCell, + 2: FloatCell, + 3: IntegerCell, + 4: FloatCell, + 9: FloatCell, + 10: FloatCell, + + 11: FloatCell, + 12: FloatCell, + 13: FloatCell, + 14: DateCell, + 15: DateCell, + 16: DateCell, + 17: DateCell, + 18: DateCell, + 19: DateCell, + 20: DateCell, + 21: DateCell, + 22: DateCell, + 37: IntegerCell, + 38: IntegerCell, + 39: FloatCell, + 40: FloatCell, + 41: IntegerCell, + 42: IntegerCell, + 43: FloatCell, + 44: FloatCell, + 45: DateCell, // Durations? + 46: DateCell, + 47: DateCell, + 48: FloatCell, + 49: StringCell, + 27: DateCell, + 28: DateCell, + 29: DateCell, + 30: DateCell, + 31: DateCell, + 32: DateCell, + 33: DateCell, + 34: DateCell, + 35: DateCell, + 36: DateCell, + 50: DateCell, + 51: DateCell, + 52: DateCell, + 53: DateCell, + 54: DateCell, + 55: DateCell, + 56: DateCell, + 57: DateCell, + 58: DateCell, + 59: IntegerCell, + 60: FloatCell, + 61: IntegerCell, + 62: FloatCell, + 67: FloatCell, + 68: FloatCell, + 69: FloatCell, + 70: FloatCell, + 71: DateCell, + 72: DateCell, + 73: DateCell, + 74: DateCell, + 75: DateCell, + 76: DateCell, + 77: DateCell, + 78: DateCell, + 79: DateCell, + 80: DateCell, + 81: DateCell, +} diff --git a/commonxl/sheet.go b/commonxl/sheet.go new file mode 100644 index 0000000..f8e728e --- /dev/null +++ b/commonxl/sheet.go @@ -0,0 +1,171 @@ +package commonxl + +import ( + "fmt" + "log" + "time" +) + +// Sheet holds raw and rendered values for a spreadsheet. +type Sheet struct { + Formatter *Formatter + NumRows int + NumCols int + Rows [][]Cell + + CurRow int +} + +// Resize the sheet for the number of rows and cols given. +// Newly added cells default to blank. +func (s *Sheet) Resize(rows, cols int) { + // some sheets are off by one + rows++ + cols++ + + if rows <= 0 { + rows = 1 + } + if cols <= 0 { + cols = 1 + } + s.CurRow = 0 + s.NumRows = rows + s.NumCols = cols + + for rows >= len(s.Rows) { + s.Rows = append(s.Rows, make([]Cell, cols)) + } + + for i := 0; len(s.Rows[i]) < cols; i++ { + r2 := make([]Cell, cols-len(s.Rows[i])) + s.Rows[i] = append(s.Rows[i], r2...) + } +} + +// Put the value at the cell location given. +func (s *Sheet) Put(row, col int, value interface{}, fmtNum uint16) { + if row >= s.NumRows || col >= s.NumCols { + log.Printf("grate: cell out of bounds row %d>=%d, col %d>=%d", + row, s.NumRows, col, s.NumCols) + return + } + + ct, ok := s.Formatter.getCellType(fmtNum) + if !ok || fmtNum == 0 { + s.Rows[row][col] = NewCell(value) + } else { + s.Rows[row][col] = NewCellWithType(value, ct) + } + s.Rows[row][col].SetFormatNumber(fmtNum) +} + +// Set changes the value in an existing cell location. +// NB Currently only used for populating string results for formulas. +func (s *Sheet) Set(row, col int, value interface{}) { + if row > s.NumRows || col > s.NumCols { + log.Println("grate: cell out of bounds") + return + } + + s.Rows[row][col][0] = value + s.Rows[row][col][1] = StringCell +} + +// SetURL adds a hyperlink to an existing cell location. +func (s *Sheet) SetURL(row, col int, link string) { + if row > s.NumRows || col > s.NumCols { + log.Println("grate: cell out of bounds") + return + } + + s.Rows[row][col].SetURL(link) +} + +// Next advances to the next record of content. +// It MUST be called prior to any Scan(). +func (s *Sheet) Next() bool { + if (s.CurRow + 1) >= len(s.Rows) { + return false + } + s.CurRow++ + return true +} + +// Strings extracts values from the current record into a list of strings. +func (s *Sheet) Strings() []string { + res := make([]string, s.NumCols) + for i, cell := range s.Rows[s.CurRow] { + if cell.Type() == BlankCell { + res[i] = "" + continue + } + val := cell.Value() + fs, ok := s.Formatter.Apply(cell.FormatNo(), val) + if !ok { + fs = fmt.Sprint(val) + } + res[i] = fs + } + return res +} + +// Scan extracts values from the current record into the provided arguments +// Arguments must be pointers to one of 5 supported types: +// bool, int64, float64, string, or time.Time +// If invalid, returns ErrInvalidScanType +func (s *Sheet) Scan(args ...interface{}) error { + row := s.Rows[s.CurRow] + + for i, a := range args { + val := row[i].Value() + + switch v := a.(type) { + case bool, int64, float64, string, time.Time: + return fmt.Errorf("scan destinations must be pointer (arg %d is not)", i) + case *bool: + if x, ok := val.(bool); ok { + *v = x + } else { + return fmt.Errorf("scan destination %d expected *%T, not *bool", i, val) + } + case *int64: + if x, ok := val.(int64); ok { + *v = x + } else { + return fmt.Errorf("scan destination %d expected *%T, not *int64", i, val) + } + case *float64: + if x, ok := val.(float64); ok { + *v = x + } else { + return fmt.Errorf("scan destination %d expected *%T, not *float64", i, val) + } + case *string: + if x, ok := val.(string); ok { + *v = x + } else { + return fmt.Errorf("scan destination %d expected *%T, not *string", i, val) + } + case *time.Time: + if x, ok := val.(time.Time); ok { + *v = x + } else { + return fmt.Errorf("scan destination %d expected *%T, not *time.Time", i, val) + } + default: + return fmt.Errorf("scan destination for arg %d is not supported (%T)", i, a) + } + } + return nil +} + +// IsEmpty returns true if there are no data values. +func (s *Sheet) IsEmpty() bool { + return (s.NumCols <= 1 && s.NumRows <= 1) +} + +// Err returns the last error that occured. +func (s *Sheet) Err() error { + return nil +} diff --git a/grate.go b/grate.go index 4176617..6ad421a 100644 --- a/grate.go +++ b/grate.go @@ -31,7 +31,7 @@ type Collection interface { // Scan extracts values from the current record into the provided arguments // Arguments must be pointers to one of 5 supported types: - // bool, int, float64, string, or time.Time + // bool, int64, float64, string, or time.Time // If invalid, returns ErrInvalidScanType Scan(args ...interface{}) error @@ -82,3 +82,15 @@ func Register(name string, priority int, opener OpenFunc) error { }) return nil } + +const ( + // ContinueColumnMerged marks a continuation column within a merged cell. + ContinueColumnMerged = "→" + // EndColumnMerged marks the last column of a merged cell. + EndColumnMerged = "⇥" + + // ContinueRowMerged marks a continuation row within a merged cell. + ContinueRowMerged = "↓" + // EndRowMerged marks the last row of a merged cell. + EndRowMerged = "⤓" +) diff --git a/xls/sheets.go b/xls/sheets.go index a69dea8..3be1be4 100644 --- a/xls/sheets.go +++ b/xls/sheets.go @@ -3,13 +3,12 @@ package xls import ( "encoding/binary" "errors" - "fmt" "log" "math" - "time" "unicode/utf16" "github.com/pbnjay/grate" + "github.com/pbnjay/grate/commonxl" ) // List (visible) sheet names from the workbook. @@ -39,94 +38,24 @@ func (b *WorkBook) Get(sheetName string) (grate.Collection, error) { for _, s := range b.sheets { if s.Name == sheetName { ss := b.pos2substream[int64(s.Position)] - ws := &WorkSheet{ - b: b, s: s, ss: ss, - iterRow: -1, - } - return ws, ws.parse() + return b.parseSheet(s, ss) } } return nil, errors.New("xls: sheet not found") } -// WorkSheet holds various metadata about a sheet in a Workbook. -type WorkSheet struct { - b *WorkBook - s *boundSheet - ss int - err error - - minRow int - maxRow int // maximum valid row index (0xFFFF) - minCol int - maxCol int // maximum valid column index (0xFF) - rows []*row - empty bool - - iterRow int - iterMC int -} - -type staticCellType rune - -const ( - staticBlank staticCellType = 0 - - // marks a continuation column within a merged cell. - continueColumnMerged staticCellType = '→' - // marks the last column of a merged cell. - endColumnMerged staticCellType = '⇥' - - // marks a continuation row within a merged cell. - continueRowMerged staticCellType = '↓' - // marks the last row of a merged cell. - endRowMerged staticCellType = '⤓' -) - -func (s staticCellType) String() string { - if s == 0 { - return "" +func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) { + res := &commonxl.Sheet{ + Formatter: &b.nfmt, } - return string([]rune{rune(s)}) -} + var minRow, maxRow uint32 + var minCol, maxCol uint16 -type row struct { - // each value must be one of: int, float64, string, or time.Time - cols []interface{} -} - -func (s *WorkSheet) makeCells() { - // ensure we always have a complete matrix - for len(s.rows) <= s.maxRow { - emptyRow := make([]interface{}, s.maxCol+1) - s.rows = append(s.rows, &row{emptyRow}) - } -} - -func (s *WorkSheet) placeValue(rowIndex, colIndex int, val interface{}) { - if colIndex > s.maxCol || rowIndex > s.maxRow { - // invalid - return - } - // ensure we always have a complete matrix - for len(s.rows) <= rowIndex { - emptyRow := make([]interface{}, s.maxCol+1) - s.rows = append(s.rows, &row{emptyRow}) - } - - s.rows[rowIndex].cols[colIndex] = val -} - -func (s *WorkSheet) IsEmpty() bool { - return s.empty -} - -func (s *WorkSheet) parse() error { // temporary string buffer us := make([]uint16, 8224) inSubstream := 0 - for idx, r := range s.b.substreams[s.ss] { + for idx, r := range b.substreams[ss] { if inSubstream > 0 { if r.RecType == RecTypeEOF { inSubstream-- @@ -145,15 +74,15 @@ func (s *WorkSheet) parse() error { case RecTypeWsBool: if (r.Data[1] & 0x10) != 0 { // it's a dialog - return nil + return nil, nil } case RecTypeDimensions: // max = 0-based index of the row AFTER the last valid index - minRow := binary.LittleEndian.Uint32(r.Data[:4]) - maxRow := binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000 - minCol := binary.LittleEndian.Uint16(r.Data[8:10]) - maxCol := binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100 + minRow = binary.LittleEndian.Uint32(r.Data[:4]) + maxRow = binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000 + minCol = binary.LittleEndian.Uint16(r.Data[8:10]) + maxCol = binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100 if grate.Debug { log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)", minCol, minRow, maxCol, maxRow) @@ -164,21 +93,15 @@ func (s *WorkSheet) parse() error { if minCol > 0x00FF || maxCol > 0x0100 { log.Println("invalid dimensions") } - s.minRow = int(uint64(minRow) & 0x0FFFF) - s.maxRow = int(uint64(maxRow)&0x1FFFF) - 1 // translate to last valid index - s.minCol = int(uint64(minCol) & 0x000FF) - s.maxCol = int(uint64(maxCol)&0x001FF) - 1 // translate to last valid index - if (maxRow-minRow) == 0 || (maxCol-minCol) == 0 { - s.empty = true - } + // pre-allocate cells - s.makeCells() + res.Resize(int(maxRow), int(maxCol)) } } inSubstream = 0 var formulaRow, formulaCol uint16 - for ridx, r := range s.b.substreams[s.ss] { + for ridx, r := range b.substreams[ss] { if inSubstream > 0 { if r.RecType == RecTypeEOF { inSubstream-- @@ -216,15 +139,11 @@ func (s *WorkSheet) parse() error { if r.Data[6] == 1 { bv = true } - var rval interface{} = bv var fno uint16 - if ixfe < len(s.b.xfs) { - fno = s.b.xfs[ixfe] + if ixfe < len(b.xfs) { + fno = b.xfs[ixfe] } - if fval, ok := s.b.nfmt.Apply(fno, bv); ok { - rval = fval - } - s.placeValue(rowIndex, colIndex, rval) + res.Put(rowIndex, colIndex, bv, fno) //log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv) } else { // it's an error, load the label @@ -232,7 +151,7 @@ func (s *WorkSheet) parse() error { if !ok { be = "" } - s.placeValue(rowIndex, colIndex, be) + res.Put(rowIndex, colIndex, be, 0) //log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be) } @@ -253,11 +172,10 @@ func (s *WorkSheet) parse() error { rval = value.Float64() } var fno uint16 - if ixfe < len(s.b.xfs) { - fno = s.b.xfs[ixfe] + if ixfe < len(b.xfs) { + fno = b.xfs[ixfe] } - rval, _ = s.b.nfmt.Apply(fno, rval) - s.placeValue(rowIndex, colIndex+i, rval) + res.Put(rowIndex, colIndex+i, rval, fno) } //log.Printf("mulrow spec: %+v", *mr) @@ -269,12 +187,10 @@ func (s *WorkSheet) parse() error { value := math.Float64frombits(xnum) var fno uint16 - if ixfe < len(s.b.xfs) { - fno = s.b.xfs[ixfe] + if ixfe < len(b.xfs) { + fno = b.xfs[ixfe] } - rval, _ := s.b.nfmt.Apply(fno, value) - - s.placeValue(rowIndex, colIndex, rval) + res.Put(rowIndex, colIndex, value, fno) //log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value) case RecTypeRK: @@ -290,11 +206,10 @@ func (s *WorkSheet) parse() error { rval = value.Float64() } var fno uint16 - if ixfe < len(s.b.xfs) { - fno = s.b.xfs[ixfe] + if ixfe < len(b.xfs) { + fno = b.xfs[ixfe] } - rval, _ = s.b.nfmt.Apply(fno, rval) - s.placeValue(rowIndex, colIndex, rval) + res.Put(rowIndex, colIndex, rval, fno) //log.Printf("RK spec: %d %d = %s", rowIndex, colIndex, rr.Value.String()) case RecTypeFormula: @@ -302,32 +217,30 @@ func (s *WorkSheet) parse() error { formulaCol = binary.LittleEndian.Uint16(r.Data[2:4]) ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) fdata := r.Data[6:] + var fno uint16 + if ixfe < len(b.xfs) { + fno = b.xfs[ixfe] + } if fdata[6] == 0xFF && r.Data[7] == 0xFF { switch fdata[0] { case 0: // string in next record + // put placeholder now to record the numFmt + res.Put(int(formulaRow), int(formulaCol), "", fno) case 1: // boolean bv := false if fdata[2] != 0 { bv = true } - var rval interface{} = bv - var fno uint16 - if ixfe < len(s.b.xfs) { - fno = s.b.xfs[ixfe] - } - if fval, ok := s.b.nfmt.Apply(fno, bv); ok { - rval = fval - } - s.placeValue(int(formulaRow), int(formulaCol), rval) + res.Put(int(formulaRow), int(formulaCol), bv, fno) case 2: // error value be, ok := berrLookup[fdata[2]] if !ok { be = "" } - s.placeValue(int(formulaRow), int(formulaCol), be) + res.Put(int(formulaRow), int(formulaCol), be, 0) case 3: // blank string default: @@ -336,12 +249,7 @@ func (s *WorkSheet) parse() error { } else { xnum := binary.LittleEndian.Uint64(r.Data[6:]) value := math.Float64frombits(xnum) - var fno uint16 - if ixfe < len(s.b.xfs) { - fno = s.b.xfs[ixfe] - } - rval, _ := s.b.nfmt.Apply(fno, value) - s.placeValue(int(formulaRow), int(formulaCol), rval) + res.Put(int(formulaRow), int(formulaCol), value, fno) } //log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data) @@ -370,11 +278,11 @@ func (s *WorkSheet) parse() error { fstr = string(utf16.Decode(us)) } - if (ridx + 1) < len(s.b.substreams[s.ss]) { + if (ridx + 1) < len(b.substreams[ss]) { ridx2 := ridx + 1 - nrecs := len(s.b.substreams[s.ss]) + nrecs := len(b.substreams[ss]) for ridx2 < nrecs { - r2 := s.b.substreams[s.ss][ridx2] + r2 := b.substreams[ss][ridx2] if r2.RecType != RecTypeContinue { break } @@ -393,20 +301,22 @@ func (s *WorkSheet) parse() error { ridx2++ } } - // TODO: does formula record formatted dates as pre-computed strings? - s.placeValue(int(formulaRow), int(formulaCol), fstr) + res.Set(int(formulaRow), int(formulaCol), fstr) case RecTypeLabelSst: rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) - //ixfe := binary.LittleEndian.Uint16(r.Data[4:6]) + ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:])) - if sstIndex > len(s.b.strings) { - return errors.New("xls: invalid sst index") + if sstIndex > len(b.strings) { + return nil, errors.New("xls: invalid sst index") } - // FIXME: double check that ixfe doesn't modify output - if s.b.strings[sstIndex] != "" { - s.placeValue(rowIndex, colIndex, s.b.strings[sstIndex]) + var fno uint16 + if ixfe < len(b.xfs) { + fno = b.xfs[ixfe] + } + if b.strings[sstIndex] != "" { + res.Put(rowIndex, colIndex, b.strings[sstIndex], fno) } //log.Printf("SST spec: %d %d = [%d] %s", rowIndex, colIndex, sstIndex, s.b.strings[sstIndex]) @@ -415,19 +325,19 @@ func (s *WorkSheet) parse() error { lastRow := binary.LittleEndian.Uint16(r.Data[2:4]) firstCol := binary.LittleEndian.Uint16(r.Data[4:6]) lastCol := binary.LittleEndian.Uint16(r.Data[6:]) - if int(firstCol) > s.maxCol { + if int(firstCol) > int(maxCol) { //log.Println("invalid hyperlink column") continue } - if int(firstRow) > s.maxRow { + if int(firstRow) > int(maxRow) { //log.Println("invalid hyperlink row") continue } if lastRow == 0xFFFF { // placeholder value indicate "last" - lastRow = uint16(s.maxRow) + lastRow = uint16(maxRow) - 1 } if lastCol == 0xFF { // placeholder value indicate "last" - lastCol = uint16(s.maxCol) + lastCol = uint16(maxCol) - 1 } // decode the hyperlink datastructure and try to find the @@ -443,19 +353,19 @@ func (s *WorkSheet) parse() error { for cn := int(firstCol); cn <= int(lastCol); cn++ { if rn == int(firstRow) && cn == int(firstCol) { // TODO: provide custom hooks for how to handle links in output - s.placeValue(rn, cn, displayText+" <"+linkText+">") + res.Put(rn, cn, displayText+" <"+linkText+">", 0) } else if cn == int(firstCol) { // first and last column MAY be the same if rn == int(lastRow) { - s.placeValue(rn, cn, endRowMerged) + res.Put(rn, cn, grate.EndRowMerged, 0) } else { - s.placeValue(rn, cn, continueRowMerged) + res.Put(rn, cn, grate.ContinueRowMerged, 0) } } else if cn == int(lastCol) { // first and last column are NOT the same - s.placeValue(rn, cn, endColumnMerged) + res.Put(rn, cn, grate.EndColumnMerged, 0) } else { - s.placeValue(rn, cn, continueColumnMerged) + res.Put(rn, cn, grate.ContinueColumnMerged, 0) } } } @@ -482,10 +392,10 @@ func (s *WorkSheet) parse() error { raw = raw[8:] if lastRow == 0xFFFF { // placeholder value indicate "last" - lastRow = uint16(s.maxRow) + lastRow = uint16(maxRow) - 1 } if lastCol == 0xFF { // placeholder value indicate "last" - lastCol = uint16(s.maxCol) + lastCol = uint16(maxCol) - 1 } for rn := int(firstRow); rn <= int(lastRow); rn++ { for cn := int(firstCol); cn <= int(lastCol); cn++ { @@ -494,15 +404,15 @@ func (s *WorkSheet) parse() error { } else if cn == int(firstCol) { // first and last column MAY be the same if rn == int(lastRow) { - s.placeValue(rn, cn, endRowMerged) + res.Put(rn, cn, grate.EndRowMerged, 0) } else { - s.placeValue(rn, cn, continueRowMerged) + res.Put(rn, cn, grate.ContinueRowMerged, 0) } } else if cn == int(lastCol) { // first and last column are NOT the same - s.placeValue(rn, cn, endColumnMerged) + res.Put(rn, cn, grate.EndColumnMerged, 0) } else { - s.placeValue(rn, cn, continueColumnMerged) + res.Put(rn, cn, grate.ContinueColumnMerged, 0) } } } @@ -524,64 +434,7 @@ func (s *WorkSheet) parse() error { */ } } - return nil -} - -// Err returns the last error that occured. -func (s *WorkSheet) Err() error { - return s.err -} - -// Next advances to the next row of content. -// It MUST be called prior to any Scan(). -func (s *WorkSheet) Next() bool { - s.iterRow++ - return s.iterRow < len(s.rows) -} - -// Strings returns the contents of the row as string types. -func (s *WorkSheet) Strings() []string { - currow := s.rows[s.iterRow] - res := make([]string, len(currow.cols)) - for i, col := range currow.cols { - if col == nil || col == "" { - continue - } - switch v := col.(type) { - case string: - res[i] = v - case fmt.Stringer: - res[i] = v.String() - default: - res[i] = fmt.Sprint(col) - } - } - return res -} - -// Scan extracts values from the row into the provided arguments -// Arguments must be pointers to one of 5 supported types: -// bool, int, float64, string, or time.Time -func (s *WorkSheet) Scan(args ...interface{}) error { - currow := s.rows[s.iterRow] - - for i, a := range args { - switch v := a.(type) { - case *bool: - *v = currow.cols[i].(bool) - case *int: - *v = currow.cols[i].(int) - case *float64: - *v = currow.cols[i].(float64) - case *string: - *v = currow.cols[i].(string) - case *time.Time: - *v = currow.cols[i].(time.Time) - default: - return grate.ErrInvalidScanType - } - } - return nil + return res, nil } var berrLookup = map[byte]string{ diff --git a/xls/xls.go b/xls/xls.go index 7deecd3..15ff55c 100644 --- a/xls/xls.go +++ b/xls/xls.go @@ -282,11 +282,13 @@ func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error { if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD { return errors.New("xls: unsupported biff version") } - if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 { - // we only support the workbook or worksheet substreams - log.Println("xls: unsupported document type") - //break - } + /* + if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 { + // we only support the workbook or worksheet substreams + log.Println("xls: unsupported document type") + //break + } + */ case RecTypeCodePage: // BIFF8 is entirely UTF-16LE so this is actually ignored diff --git a/xlsx/sheets.go b/xlsx/sheets.go index 7ca98b8..fc9c76b 100644 --- a/xlsx/sheets.go +++ b/xlsx/sheets.go @@ -3,13 +3,11 @@ package xlsx import ( "encoding/xml" "errors" - "fmt" "io" "log" "path/filepath" "strconv" "strings" - "time" "github.com/pbnjay/grate" "github.com/pbnjay/grate/commonxl" @@ -23,24 +21,15 @@ type Sheet struct { err error - minRow int - maxRow int - minCol int - maxCol int - rows []*row - empty bool - - iterRow int + wrapped *commonxl.Sheet } var errNotLoaded = errors.New("xlsx: sheet not loaded") -type row struct { - // each value must be one of: int, float64, string, or time.Time - cols []interface{} -} - func (s *Sheet) parseSheet() error { + s.wrapped = &commonxl.Sheet{ + Formatter: &s.d.fmt, + } linkmap := make(map[string]string) base := filepath.Base(s.docname) sub := strings.TrimSuffix(s.docname, base) @@ -68,7 +57,9 @@ func (s *Sheet) parseSheet() error { currentCellType := BlankCellType currentCell := "" - var numFormat commonxl.FmtFunc + var fno uint16 + var maxCol, maxRow int + tok, err := dec.RawToken() for ; err == nil; tok, err = dec.RawToken() { switch v := tok.(type) { @@ -79,6 +70,7 @@ func (s *Sheet) parseSheet() error { c, r := refToIndexes(currentCell) if c >= 0 && r >= 0 { var val interface{} = string(v) + switch currentCellType { case BooleanCellType: if v[0] == '1' { @@ -87,13 +79,12 @@ func (s *Sheet) parseSheet() error { val = false } case DateCellType: - log.Println("CELL DATE", val, numFormat) + log.Println("CELL DATE", val, fno) case NumberCellType: fval, err := strconv.ParseFloat(string(v), 64) if err == nil { val = fval } - val = numFormat(&s.d.fmt, fval) //log.Println("CELL NUMBER", val, numFormat) case SharedStringCellType: //log.Println("CELL SHSTR", val, currentCellType, numFormat) @@ -106,9 +97,9 @@ func (s *Sheet) parseSheet() error { case ErrorCellType, FormulaStringCellType, InlineStringCellType: //log.Println("CELL ERR/FORM/INLINE", val, currentCellType) default: - log.Println("CELL UNKNOWN", val, currentCellType, numFormat) + log.Println("CELL UNKNOWN", val, currentCellType, fno) } - s.placeValue(r, c, val) + s.wrapped.Put(r, c, val, fno) } else { //log.Println("FAIL row/col: ", currentCell) } @@ -117,20 +108,19 @@ func (s *Sheet) parseSheet() error { case "dimension": ax := getAttrs(v.Attr, "ref") if ax[0] == "A1" { + maxCol, maxRow = 1, 1 // short-circuit empty sheet - s.minCol, s.minRow = 0, 0 - s.maxCol, s.maxRow = 1, 1 - s.empty = true + s.wrapped.Resize(1, 1) continue } dims := strings.Split(ax[0], ":") if len(dims) == 1 { - s.minCol, s.minRow = 0, 0 - s.maxCol, s.maxRow = refToIndexes(dims[0]) + maxCol, maxRow = refToIndexes(dims[0]) } else { - s.minCol, s.minRow = refToIndexes(dims[0]) - s.maxCol, s.maxRow = refToIndexes(dims[1]) + //minCol, minRow := refToIndexes(dims[0]) + maxCol, maxRow = refToIndexes(dims[1]) } + s.wrapped.Resize(maxRow, maxCol) //log.Println("DIMENSION:", s.minRow, s.minCol, ">", s.maxRow, s.maxCol) case "row": //currentRow = ax["r"] // unsigned int row index @@ -145,9 +135,9 @@ func (s *Sheet) parseSheet() error { style := ax[2] sid, _ := strconv.ParseInt(style, 10, 64) if len(s.d.xfs) > int(sid) { - numFormat = s.d.xfs[sid] // unsigned integer lookup + fno = s.d.xfs[sid] } else { - numFormat = s.d.xfs[0] + fno = 0 } //log.Println("CELL", currentCell, sid, numFormat, currentCellType) case "v": @@ -161,6 +151,12 @@ func (s *Sheet) parseSheet() error { if len(dims) > 1 { endCol, endRow = refToIndexes(dims[1]) } + if endRow > maxRow { + endRow = maxRow + } + if endCol > maxCol { + endCol = maxCol + } for r := startRow; r <= endRow; r++ { for c := startCol; c <= endCol; c++ { if r == startRow && c == startCol { @@ -168,15 +164,15 @@ func (s *Sheet) parseSheet() error { } else if c == startCol { // first and last column MAY be the same if r == endRow { - s.placeValue(r, c, endRowMerged) + s.wrapped.Put(r, c, grate.EndRowMerged, 0) } else { - s.placeValue(r, c, continueRowMerged) + s.wrapped.Put(r, c, grate.ContinueRowMerged, 0) } } else if c == endCol { // first and last column are NOT the same - s.placeValue(r, c, endColumnMerged) + s.wrapped.Put(r, c, grate.EndColumnMerged, 0) } else { - s.placeValue(r, c, continueColumnMerged) + s.wrapped.Put(r, c, grate.ContinueColumnMerged, 0) } } } @@ -185,12 +181,8 @@ func (s *Sheet) parseSheet() error { ax := getAttrs(v.Attr, "ref", "id") col, row := refToIndexes(ax[0]) link := linkmap[ax[1]] - if len(s.rows) > row && len(s.rows[row].cols) > col { - if sstr, ok := s.rows[row].cols[col].(string); ok { - link = sstr + " <" + link + ">" - } - } - s.placeValue(row, col, link) + s.wrapped.Put(row, col, link, 0) + s.wrapped.SetURL(row, col, link) case "worksheet", "mergeCells", "hyperlinks": // containers @@ -220,71 +212,3 @@ func (s *Sheet) parseSheet() error { } return err } - -func (s *Sheet) placeValue(rowIndex, colIndex int, val interface{}) { - if colIndex > s.maxCol || rowIndex > s.maxRow { - // invalid - return - } - - // ensure we always have a complete matrix - for len(s.rows) <= rowIndex { - emptyRow := make([]interface{}, s.maxCol+1) - s.rows = append(s.rows, &row{emptyRow}) - } - s.empty = false - s.rows[rowIndex].cols[colIndex] = val -} - -// Next advances to the next row of content. -// It MUST be called prior to any Scan(). -func (s *Sheet) Next() bool { - s.iterRow++ - return s.iterRow < len(s.rows) -} - -func (s *Sheet) Strings() []string { - currow := s.rows[s.iterRow] - res := make([]string, len(currow.cols)) - for i, col := range currow.cols { - if col == nil || col == "" { - continue - } - res[i] = fmt.Sprint(col) - } - return res -} - -// Scan extracts values from the row into the provided arguments -// Arguments must be pointers to one of 5 supported types: -// bool, int, float64, string, or time.Time -func (s *Sheet) Scan(args ...interface{}) error { - currow := s.rows[s.iterRow] - - for i, a := range args { - switch v := a.(type) { - case *bool: - *v = currow.cols[i].(bool) - case *int: - *v = currow.cols[i].(int) - case *float64: - *v = currow.cols[i].(float64) - case *string: - *v = currow.cols[i].(string) - case *time.Time: - *v = currow.cols[i].(time.Time) - default: - return grate.ErrInvalidScanType - } - } - return nil -} - -func (s *Sheet) IsEmpty() bool { - return s.empty -} - -// Err returns the last error that occured. -func (s *Sheet) Err() error { - return s.err -} diff --git a/xlsx/workbook.go b/xlsx/workbook.go index 2205094..4971554 100644 --- a/xlsx/workbook.go +++ b/xlsx/workbook.go @@ -9,7 +9,6 @@ import ( "strconv" "github.com/pbnjay/grate" - "github.com/pbnjay/grate/commonxl" ) func (d *Document) parseRels(dec *xml.Decoder, basedir string) error { @@ -119,7 +118,7 @@ func (d *Document) parseStyles(dec *xml.Decoder) error { section = 2 ax := getAttrs(v.Attr, "count") n, _ := strconv.ParseInt(ax[0], 10, 64) - d.xfs = make([]commonxl.FmtFunc, 0, n) + d.xfs = make([]uint16, 0, n) case "xf": ax := getAttrs(v.Attr, "numFmtId", "applyNumberFormat", "xfId") @@ -148,11 +147,7 @@ func (d *Document) parseStyles(dec *xml.Decoder) error { } nfid, _ := strconv.ParseInt(numFmtID, 10, 16) - thisXF, ok := d.fmt.Get(uint16(nfid)) - if !ok { - panic("numformat unknown") - } - d.xfs = append(d.xfs, thisXF) + d.xfs = append(d.xfs, uint16(nfid)) } else { panic("wheres is this xf??") } diff --git a/xlsx/xlsx.go b/xlsx/xlsx.go index e08dd5c..3d5eaad 100644 --- a/xlsx/xlsx.go +++ b/xlsx/xlsx.go @@ -27,7 +27,7 @@ type Document struct { rels map[string]map[string]string sheets []*Sheet strings []string - xfs []commonxl.FmtFunc + xfs []uint16 fmt commonxl.Formatter } @@ -163,7 +163,7 @@ func (d *Document) Get(sheetName string) (grate.Collection, error) { if s.err == errNotLoaded { s.err = s.parseSheet() } - return s, s.err + return s.wrapped, s.err } } return nil, errors.New("xlsx: sheet not found")