1
0
mirror of https://github.com/pbnjay/grate.git synced 2024-12-12 21:49:14 +02:00

tool to create tsv from xls

This commit is contained in:
Jeremy Jay 2021-02-07 22:53:28 -05:00
parent bde3f21cfc
commit c01ac5f01b

70
cmd/xls2tsv/main.go Normal file
View File

@ -0,0 +1,70 @@
package main
import (
"context"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/pbnjay/grate/xls"
)
func main() {
//infoOnly := flag.Bool("i", false, "show info/stats ONLY")
removeNewlines := flag.Bool("r", true, "remove embedded tabs, newlines, and condense spaces in cell contents")
trimSpaces := flag.Bool("w", true, "trim whitespace from cell contents")
skipBlanks := flag.Bool("b", true, "discard blank rows from the output")
flag.Parse()
sanitize := regexp.MustCompile("[^a-zA-Z0-9]+")
newlines := regexp.MustCompile("[ \n\r\t]+")
for _, fn := range flag.Args() {
wb, err := xls.Open(context.Background(), fn)
if err != nil {
log.Fatal(err)
}
log.Println(fn)
ext := filepath.Ext(fn)
fn2 := filepath.Base(strings.TrimSuffix(fn, ext))
for _, s := range wb.Sheets() {
sheet, err := wb.Get(s)
if err != nil {
log.Println(err)
continue
}
s2 := sanitize.ReplaceAllString(s, "_")
f, err := os.Create(fn2 + "." + s2 + ".tsv")
if err != nil {
log.Fatal(err)
}
for sheet.Next() {
row := sheet.Strings()
nonblank := false
for i, x := range row {
if *removeNewlines {
x = newlines.ReplaceAllString(x, " ")
}
if *trimSpaces {
x = strings.TrimSpace(x)
row[i] = x
}
if x != "" {
nonblank = true
}
}
if nonblank || !*skipBlanks {
fmt.Fprintln(f, strings.Join(row, "\t"))
f.Sync()
}
}
f.Close()
}
}
}