1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-23 00:29:59 +02:00
Files
jc/jc/parsers/csv.py
Kelly Brazil bc5821e69f force ci tests
2022-10-26 15:36:22 -07:00

162 lines
3.5 KiB
Python

"""jc - JSON Convert `csv` file parser
The `csv` parser will attempt to automatically detect the delimiter
character. If the delimiter cannot be detected it will default to comma.
The first row of the file must be a header row.
Usage (cli):
$ cat file.csv | jc --csv
Usage (module):
import jc
result = jc.parse('csv', csv_output)
Schema:
CSV file converted to a Dictionary:
https://docs.python.org/3/library/csv.html
[
{
"column_name1": string,
"column_name2": string
}
]
Examples:
$ cat homes.csv
"Sell", "List", "Living", "Rooms", "Beds", "Baths", "Age", "Acres"...
142, 160, 28, 10, 5, 3, 60, 0.28, 3167
175, 180, 18, 8, 4, 1, 12, 0.43, 4033
129, 132, 13, 6, 3, 1, 41, 0.33, 1471
...
$ cat homes.csv | jc --csv -p
[
{
"Sell": "142",
"List": "160",
"Living": "28",
"Rooms": "10",
"Beds": "5",
"Baths": "3",
"Age": "60",
"Acres": "0.28",
"Taxes": "3167"
},
{
"Sell": "175",
"List": "180",
"Living": "18",
"Rooms": "8",
"Beds": "4",
"Baths": "1",
"Age": "12",
"Acres": "0.43",
"Taxes": "4033"
},
{
"Sell": "129",
"List": "132",
"Living": "13",
"Rooms": "6",
"Beds": "3",
"Baths": "1",
"Age": "41",
"Acres": "0.33",
"Taxes": "1471"
},
...
]
"""
from typing import List, Union, Type
from jc.jc_types import JSONDictType
import jc.utils
import csv
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.5'
description = 'CSV file parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
details = 'Using the python standard csv library'
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
__version__ = info.version
def _process(proc_data: List[JSONDictType]) -> List[JSONDictType]:
"""
Final processing to conform to the schema.
Parameters:
proc_data: (List of Dictionaries) raw structured data to process
Returns:
List of Dictionaries. Each Dictionary represents a row in the csv
file.
"""
# No further processing
return proc_data
def parse(
data: Union[str, bytes],
raw: bool = False,
quiet: bool = False
) -> List[JSONDictType]:
"""
Main text parsing function
Parameters:
data: (string) text data to parse
raw: (boolean) unprocessed output if True
quiet: (boolean) suppress warning messages if True
Returns:
List of Dictionaries. Raw or processed structured data.
"""
jc.utils.compatibility(__name__, info.compatible, quiet)
jc.utils.input_type_check(data)
# remove BOM bytes, if present
if isinstance(data, str):
data = data.encode('utf-8')
data = data.decode('utf-8-sig')
raw_output = []
cleandata = data.splitlines()
# Clear any blank lines
cleandata = list(filter(None, cleandata))
if jc.utils.has_data(data):
dialect: Union[str, Type[csv.Dialect]] = 'excel' # default in csv module
try:
dialect = csv.Sniffer().sniff(data[:1024])
if '""' in data:
dialect.doublequote = True
except Exception:
pass
reader = csv.DictReader(cleandata, dialect=dialect)
for row in reader:
raw_output.append(row)
return raw_output if raw else _process(raw_output)