From 218b9aec8ac397e26e6cd4ff407f3d10ef88ea9b Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Sun, 24 Oct 2021 13:10:47 -0700 Subject: [PATCH] doc update for streaming CSV parser --- README.md | 1 + docs/parsers/csv_s.md | 75 +++++++++++++++++++++++++++++++++++++++++++ jc/parsers/csv_s.py | 52 +++--------------------------- man/jc.1 | 7 +++- 4 files changed, 87 insertions(+), 48 deletions(-) create mode 100644 docs/parsers/csv_s.md diff --git a/README.md b/README.md index 6b1baa49..2547657a 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,7 @@ The JSON output can be compact (default) or pretty formatted with the `-p` optio - `--crontab` enables the `crontab` command and file parser ([documentation](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab)) - `--crontab-u` enables the `crontab` file parser with user support ([documentation](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab_u)) - `--csv` enables the CSV file parser ([documentation](https://kellyjonbrazil.github.io/jc/docs/parsers/csv)) +- `--csv-s` enables the CSV file streaming parser ([documentation](https://kellyjonbrazil.github.io/jc/docs/parsers/csv_s)) - `--date` enables the `date` command parser ([documentation](https://kellyjonbrazil.github.io/jc/docs/parsers/date)) - `--df` enables the `df` command parser ([documentation](https://kellyjonbrazil.github.io/jc/docs/parsers/df)) - `--dig` enables the `dig` command parser ([documentation](https://kellyjonbrazil.github.io/jc/docs/parsers/dig)) diff --git a/docs/parsers/csv_s.md b/docs/parsers/csv_s.md new file mode 100644 index 00000000..1cc80720 --- /dev/null +++ b/docs/parsers/csv_s.md @@ -0,0 +1,75 @@ +[Home](https://kellyjonbrazil.github.io/jc/) + +# jc.parsers.csv_s +jc - JSON CLI output utility `csv` file streaming parser + +The `csv` streaming parser will attempt to automatically detect the delimiter character. If the delimiter cannot be detected it will default to comma. The first row of the file must be a header row. + +Note: The first 100 rows are read into memory to enable delimiter detection, then the rest of the rows are loaded lazily. + +Usage (cli): + + $ cat file.csv | jc --csv-s + +Usage (module): + + import jc.parsers.csv_s + result = jc.parsers.csv_s.parse(csv_output) + +Schema: + + csv file converted to a Dictionary: https://docs.python.org/3/library/csv.html + + { + "column_name1": string, + "column_name2": string + } + +Examples: + + $ cat homes.csv + "Sell", "List", "Living", "Rooms", "Beds", "Baths", "Age", "Acres", "Taxes" + 142, 160, 28, 10, 5, 3, 60, 0.28, 3167 + 175, 180, 18, 8, 4, 1, 12, 0.43, 4033 + 129, 132, 13, 6, 3, 1, 41, 0.33, 1471 + ... + + $ cat homes.csv | jc --csv-s + {"Sell":"142","List":"160","Living":"28","Rooms":"10","Beds":"5","Baths":"3","Age":"60","Acres":"0.28","Taxes":"3167"} + {"Sell":"175","List":"180","Living":"18","Rooms":"8","Beds":"4","Baths":"1","Age":"12","Acres":"0.43","Taxes":"4033"} + {"Sell":"129","List":"132","Living":"13","Rooms":"6","Beds":"3","Baths":"1","Age":"41","Acres":"0.33","Taxes":"1471"} + ... + + +## info +```python +info() +``` +Provides parser metadata (version, author, etc.) + +## parse +```python +parse(data, raw=False, quiet=False, ignore_exceptions=False) +``` + +Main text parsing generator function. Returns an iterator object. + +Parameters: + + data: (iterable) line-based text data to parse (e.g. sys.stdin or str.splitlines()) + raw: (boolean) output preprocessed JSON if True + quiet: (boolean) suppress warning messages if True + ignore_exceptions: (boolean) ignore parsing exceptions if True + +Yields: + + Dictionary. Raw or processed structured data. + +Returns: + + Iterator object + +## Parser Information +Compatibility: linux, darwin, cygwin, win32, aix, freebsd + +Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com) diff --git a/jc/parsers/csv_s.py b/jc/parsers/csv_s.py index a447be2c..608b340e 100644 --- a/jc/parsers/csv_s.py +++ b/jc/parsers/csv_s.py @@ -1,12 +1,12 @@ """jc - JSON CLI output utility `csv` file streaming parser -The `csv` parser will attempt to automatically detect the delimiter character. If the delimiter cannot be detected it will default to comma. The first row of the file must be a header row. +The `csv` streaming parser will attempt to automatically detect the delimiter character. If the delimiter cannot be detected it will default to comma. The first row of the file must be a header row. -Note: The first 100 rows are read into memory to enable delimiter detection. Then the rest of the rows are loaded lazily. +Note: The first 100 rows are read into memory to enable delimiter detection, then the rest of the rows are loaded lazily. Usage (cli): - $ cat file.csv | jc --csv + $ cat file.csv | jc --csv-s Usage (module): @@ -31,7 +31,7 @@ Examples: 129, 132, 13, 6, 3, 1, 41, 0.33, 1471 ... - $ cat homes.csv | jc --csv_s + $ cat homes.csv | jc --csv-s {"Sell":"142","List":"160","Living":"28","Rooms":"10","Beds":"5","Baths":"3","Age":"60","Acres":"0.28","Taxes":"3167"} {"Sell":"175","List":"180","Living":"18","Rooms":"8","Beds":"4","Baths":"1","Age":"12","Acres":"0.43","Taxes":"4033"} {"Sell":"129","List":"132","Living":"13","Rooms":"6","Beds":"3","Baths":"1","Age":"41","Acres":"0.33","Taxes":"1471"} @@ -75,48 +75,6 @@ def _process(proc_data): return proc_data -def old_parse(data, raw=False, quiet=False): - """ - Main text parsing function - - Parameters: - - data: (string) text data to parse - raw: (boolean) output preprocessed JSON if True - quiet: (boolean) suppress warning messages if True - - Returns: - - List of Dictionaries. Raw or processed structured data. - """ - if not quiet: - jc.utils.compatibility(__name__, info.compatible) - - raw_output = [] - cleandata = data.splitlines() - - # Clear any blank lines - cleandata = list(filter(None, cleandata)) - - if jc.utils.has_data(data): - - dialect = None - try: - dialect = csv.Sniffer().sniff(data[:1024]) - except Exception: - pass - - reader = csv.DictReader(cleandata, dialect=dialect) - - for row in reader: - raw_output.append(row) - - if raw: - return raw_output - else: - return _process(raw_output) - - def parse(data, raw=False, quiet=False, ignore_exceptions=False): """ Main text parsing generator function. Returns an iterator object. @@ -153,7 +111,7 @@ def parse(data, raw=False, quiet=False, ignore_exceptions=False): except Exception: pass - # chain `temp_list` and `data` together to lazy load all of the CSV data + # chain `temp_list` and `data` together to lazy load the rest of the CSV data new_data = itertools.chain(temp_list, data) reader = csv.DictReader(new_data, dialect=dialect) diff --git a/man/jc.1 b/man/jc.1 index fafe7b5c..8908c723 100644 --- a/man/jc.1 +++ b/man/jc.1 @@ -1,4 +1,4 @@ -.TH jc 1 2021-10-23 1.17.1 "JSON CLI output utility" +.TH jc 1 2021-10-24 1.17.1 "JSON CLI output utility" .SH NAME jc \- JSONifies the output of many CLI tools and file-types .SH SYNOPSIS @@ -62,6 +62,11 @@ Parsers: \fB--csv\fP CSV file parser +.TP +.B +\fB--csv-s\fP +CSV file streaming parser + .TP .B \fB--date\fP