diff --git a/simple/csv.go b/simple/csv.go new file mode 100644 index 0000000..c13d144 --- /dev/null +++ b/simple/csv.go @@ -0,0 +1,55 @@ +package simple + +import ( + "encoding/csv" + "os" + + "github.com/pbnjay/grate" +) + +var _ = grate.Register("csv", 15, OpenCSV) + +// OpenCSV defines a Source's instantiation function. +// It should return ErrNotInFormat immediately if filename is not of the correct file type. +func OpenCSV(filename string) (grate.Source, error) { + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer f.Close() + t := &simpleFile{ + filename: filename, + iterRow: -1, + } + + s := csv.NewReader(f) + s.FieldsPerRecord = -1 + + total := 0 + ncols := make(map[int]int) + rec, err := s.Read() + for ; err == nil; rec, err = s.Read() { + ncols[len(rec)]++ + total++ + t.rows = append(t.rows, rec) + } + + // kinda arbitrary metrics for detecting CSV + looksGood := 0 + for c, n := range ncols { + if c <= 1 { + continue + } + if n > 10 && float64(n)/float64(total) > 0.8 { + // more than 80% of rows have the same number of columns, we're good + looksGood = 2 + } else if n > 25 && looksGood == 0 { + looksGood = 1 + } + } + if looksGood == 1 { + return t, grate.ErrNotInFormat + } + + return t, nil +} diff --git a/simple/simple.go b/simple/simple.go new file mode 100644 index 0000000..3108ded --- /dev/null +++ b/simple/simple.go @@ -0,0 +1,89 @@ +package simple + +import ( + "errors" + "fmt" + "strconv" + "strings" + "time" + + "github.com/pbnjay/grate" +) + +// represents a set of data collections. +type simpleFile struct { + filename string + rows [][]string + iterRow int +} + +// List the individual data tables within this source. +func (t *simpleFile) List() ([]string, error) { + return []string{t.filename}, nil +} + +// Get a Collection from the source by name. +func (t *simpleFile) Get(name string) (grate.Collection, error) { + return t, nil +} + +// Next advances to the next record of content. +// It MUST be called prior to any Scan(). +func (t *simpleFile) Next() bool { + t.iterRow++ + return t.iterRow < len(t.rows) +} + +// Strings extracts values from the current record into a list of strings. +func (t *simpleFile) Strings() []string { + return t.rows[t.iterRow] +} + +// Scan extracts values from the current record into the provided arguments +// Arguments must be pointers to one of 5 supported types: +// bool, int, float64, string, or time.Time +func (t *simpleFile) Scan(args ...interface{}) error { + var err error + row := t.rows[t.iterRow] + if len(row) != len(args) { + return fmt.Errorf("grate/simple: expected %d Scan destinations, got %d", len(row), len(args)) + } + + for i, a := range args { + switch v := a.(type) { + case *bool: + switch strings.ToLower(row[i]) { + case "1", "t", "true", "y", "yes": + *v = true + default: + *v = false + } + case *int: + var n int64 + n, err = strconv.ParseInt(row[i], 10, 64) + *v = int(n) + case *float64: + *v, err = strconv.ParseFloat(row[i], 64) + case *string: + *v = row[i] + case *time.Time: + return errors.New("grate/simple: time.Time not supported, you must parse string manually") + default: + return errors.New("grate/simple: scan destination must be one of: *bool, *int, *float64, *string, or *time.Time") + } + if err != nil { + return err + } + } + return nil +} + +// IsEmpty returns true if there are no data values. +func (t *simpleFile) IsEmpty() bool { + return len(t.rows) == 0 +} + +// Err returns the last error that occured. +func (t *simpleFile) Err() error { + return nil +} diff --git a/simple/tsv.go b/simple/tsv.go new file mode 100644 index 0000000..b78c004 --- /dev/null +++ b/simple/tsv.go @@ -0,0 +1,54 @@ +package simple + +import ( + "bufio" + "os" + "strings" + + "github.com/pbnjay/grate" +) + +var _ = grate.Register("tsv", 10, OpenTSV) + +// OpenTSV defines a Source's instantiation function. +// It should return ErrNotInFormat immediately if filename is not of the correct file type. +func OpenTSV(filename string) (grate.Source, error) { + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer f.Close() + t := &simpleFile{ + filename: filename, + iterRow: -1, + } + + s := bufio.NewScanner(f) + total := 0 + ncols := make(map[int]int) + for s.Scan() { + r := strings.Split(s.Text(), "\t") + ncols[len(r)]++ + total++ + t.rows = append(t.rows, r) + } + + // kinda arbitrary metrics for detecting TSV + looksGood := 0 + for c, n := range ncols { + if c <= 1 { + continue + } + if n > 10 && float64(n)/float64(total) > 0.8 { + // more than 80% of rows have the same number of columns, we're good + looksGood = 2 + } else if n > 25 && looksGood == 0 { + looksGood = 1 + } + } + if looksGood == 1 { + return t, grate.ErrNotInFormat + } + + return t, nil +}