From d9c4e2ed4c0f908f4b63c2376b53ddfd32a0fae1 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Mon, 2 Mar 2020 14:03:58 -0800 Subject: [PATCH] add csv file parser --- docgen.sh | 1 + docs/parsers/csv.md | 104 ++++++++++++++++++++++++++++++++++ jc/cli.py | 1 + jc/parsers/csv.py | 135 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 241 insertions(+) create mode 100644 docs/parsers/csv.md create mode 100644 jc/parsers/csv.py diff --git a/docgen.sh b/docgen.sh index 7e6a34da..5c1788a0 100755 --- a/docgen.sh +++ b/docgen.sh @@ -8,6 +8,7 @@ pydocmd simple jc.parsers.arp+ > ../docs/parsers/arp.md pydocmd simple jc.parsers.blkid+ > ../docs/parsers/blkid.md pydocmd simple jc.parsers.crontab+ > ../docs/parsers/crontab.md pydocmd simple jc.parsers.crontab_u+ > ../docs/parsers/crontab_u.md +pydocmd simple jc.parsers.csv+ > ../docs/parsers/csv.md pydocmd simple jc.parsers.df+ > ../docs/parsers/df.md pydocmd simple jc.parsers.dig+ > ../docs/parsers/dig.md pydocmd simple jc.parsers.du+ > ../docs/parsers/du.md diff --git a/docs/parsers/csv.md b/docs/parsers/csv.md new file mode 100644 index 00000000..10241f8e --- /dev/null +++ b/docs/parsers/csv.md @@ -0,0 +1,104 @@ +# jc.parsers.csv +jc - JSON CLI output utility csv Parser + +Usage: + + specify --csv as the first argument if the piped input is coming from a csv file. + the csv parser will attempt to automatically detect the delimter character. + the first row of the file must be a header row. + +Compatibility: + + 'linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd' + +Examples: + + $ cat homes.csv + "Sell", "List", "Living", "Rooms", "Beds", "Baths", "Age", "Acres", "Taxes" + 142, 160, 28, 10, 5, 3, 60, 0.28, 3167 + 175, 180, 18, 8, 4, 1, 12, 0.43, 4033 + 129, 132, 13, 6, 3, 1, 41, 0.33, 1471 + ... + + $ cat homes.csv | jc --csv -p + [ + { + "Sell": "142", + "List": "160", + "Living": "28", + "Rooms": "10", + "Beds": "5", + "Baths": "3", + "Age": "60", + "Acres": "0.28", + "Taxes": "3167" + }, + { + "Sell": "175", + "List": "180", + "Living": "18", + "Rooms": "8", + "Beds": "4", + "Baths": "1", + "Age": "12", + "Acres": "0.43", + "Taxes": "4033" + }, + { + "Sell": "129", + "List": "132", + "Living": "13", + "Rooms": "6", + "Beds": "3", + "Baths": "1", + "Age": "41", + "Acres": "0.33", + "Taxes": "1471" + }, + ... + ] + +## info +```python +info(self, /, *args, **kwargs) +``` + +## process +```python +process(proc_data) +``` + +Final processing to conform to the schema. + +Parameters: + + proc_data: (dictionary) raw structured data to process + +Returns: + + List of dictionaries. Each dictionary represents a row in the csv file: + + [ + { + csv file converted to a Dictionary + https://docs.python.org/3/library/csv.html + } + ] + +## parse +```python +parse(data, raw=False, quiet=False) +``` + +Main text parsing function + +Parameters: + + data: (string) text data to parse + raw: (boolean) output preprocessed JSON if True + quiet: (boolean) suppress warning messages if True + +Returns: + + List of dictionaries. Raw or processed structured data. + diff --git a/jc/cli.py b/jc/cli.py index f82f214c..ea8baa5a 100644 --- a/jc/cli.py +++ b/jc/cli.py @@ -26,6 +26,7 @@ parsers = [ 'blkid', 'crontab', 'crontab-u', + 'csv', 'df', 'dig', 'du', diff --git a/jc/parsers/csv.py b/jc/parsers/csv.py new file mode 100644 index 00000000..793ac611 --- /dev/null +++ b/jc/parsers/csv.py @@ -0,0 +1,135 @@ +"""jc - JSON CLI output utility csv Parser + +Usage: + + specify --csv as the first argument if the piped input is coming from a csv file. + the csv parser will attempt to automatically detect the delimter character. + the first row of the file must be a header row. + +Compatibility: + + 'linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd' + +Examples: + + $ cat homes.csv + "Sell", "List", "Living", "Rooms", "Beds", "Baths", "Age", "Acres", "Taxes" + 142, 160, 28, 10, 5, 3, 60, 0.28, 3167 + 175, 180, 18, 8, 4, 1, 12, 0.43, 4033 + 129, 132, 13, 6, 3, 1, 41, 0.33, 1471 + ... + + $ cat homes.csv | jc --csv -p + [ + { + "Sell": "142", + "List": "160", + "Living": "28", + "Rooms": "10", + "Beds": "5", + "Baths": "3", + "Age": "60", + "Acres": "0.28", + "Taxes": "3167" + }, + { + "Sell": "175", + "List": "180", + "Living": "18", + "Rooms": "8", + "Beds": "4", + "Baths": "1", + "Age": "12", + "Acres": "0.43", + "Taxes": "4033" + }, + { + "Sell": "129", + "List": "132", + "Living": "13", + "Rooms": "6", + "Beds": "3", + "Baths": "1", + "Age": "41", + "Acres": "0.33", + "Taxes": "1471" + }, + ... + ] +""" +import jc.utils +import csv + + +class info(): + version = '1.0' + description = 'csv file parser' + author = 'Kelly Brazil' + author_email = 'kellyjonbrazil@gmail.com' + details = 'Using the python standard csv library' + + # compatible options: linux, darwin, cygwin, win32, aix, freebsd + compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd'] + + +__version__ = info.version + + +def process(proc_data): + """ + Final processing to conform to the schema. + + Parameters: + + proc_data: (dictionary) raw structured data to process + + Returns: + + List of dictionaries. Each dictionary represents a row in the csv file: + + [ + { + csv file converted to a Dictionary + https://docs.python.org/3/library/csv.html + } + ] + """ + + # No further processing + return proc_data + + +def parse(data, raw=False, quiet=False): + """ + Main text parsing function + + Parameters: + + data: (string) text data to parse + raw: (boolean) output preprocessed JSON if True + quiet: (boolean) suppress warning messages if True + + Returns: + + List of dictionaries. Raw or processed structured data. + """ + if not quiet: + jc.utils.compatibility(__name__, info.compatible) + + raw_output = [] + cleandata = data.splitlines() + + # Clear any blank lines + cleandata = list(filter(None, cleandata)) + + if cleandata: + dialect = csv.Sniffer().sniff(data[:1024]) + reader = csv.DictReader(cleandata, dialect=dialect) + + for row in reader: + raw_output.append(row) + + if raw: + return raw_output + else: + return process(raw_output)