1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-19 00:17:51 +02:00
Files
jc/jc/parsers/universal.py

117 lines
4.1 KiB
Python
Raw Normal View History

"""jc - JSON CLI output utility universal Parsers"""
import string
2022-01-26 17:08:03 -08:00
from typing import List, Dict, Optional
2022-01-26 17:08:03 -08:00
def simple_table_parse(data: List[str]) -> List[Dict]:
2019-12-11 17:27:48 -08:00
"""
2022-01-20 09:59:23 -08:00
Parse simple tables. The last column may contain data with spaces.
2019-12-12 15:59:36 -08:00
2019-12-11 17:27:48 -08:00
Parameters:
2022-01-19 17:29:22 -08:00
data: (list) Text data to parse that has been split into lines
via .splitlines(). Item 0 must be the header row.
Any spaces in header names should be changed to
underscore '_'. You should also ensure headers are
lowercase by using .lower().
2019-12-11 17:27:48 -08:00
2022-01-19 17:29:22 -08:00
Also, ensure there are no blank lines (list items)
in the data.
2019-12-11 17:27:48 -08:00
Returns:
2022-01-19 17:29:22 -08:00
List of Dictionaries
2019-12-11 17:27:48 -08:00
"""
2022-01-20 09:59:23 -08:00
# code adapted from Conor Heine at:
# https://gist.github.com/cahna/43a1a3ff4d075bcd71f9d7120037a501
2019-12-11 17:27:48 -08:00
headers = [h for h in ' '.join(data[0].strip().split()).split() if h]
raw_data = map(lambda s: s.strip().split(None, len(headers) - 1), data[1:])
raw_output = [dict(zip(headers, r)) for r in raw_data]
return raw_output
2022-01-26 17:08:03 -08:00
def sparse_table_parse(data: List[str], delim: Optional[str] ='\u2063') -> List[Dict]:
"""
Parse tables with missing column data or with spaces in column data.
Parameters:
2022-01-19 17:29:22 -08:00
data: (list) Text data to parse that has been split into lines
via .splitlines(). Item 0 must be the header row.
Any spaces in header names should be changed to
underscore '_'. You should also ensure headers are
lowercase by using .lower(). Do not change the
position of header names as the positions are used
to find the data.
2022-01-19 17:29:22 -08:00
Also, ensure there are no blank lines (list items)
in the data.
2022-01-19 18:47:44 -08:00
delim: (string) Delimiter to use. By default `u\\2063`
2022-01-20 09:40:08 -08:00
(invisible separator) is used since it is unlikely
2022-01-19 18:47:44 -08:00
to ever be seen in terminal output. You can change
this for troubleshooting purposes or if there is a
delimiter conflict with your data.
Returns:
2022-01-19 17:29:22 -08:00
List of Dictionaries
"""
output = []
header_text = data.pop(0)
header_text = header_text + ' '
header_list = header_text.split()
# find each column index and end position
header_search = [header_list[0]]
for h in header_list[1:]:
header_search.append(' ' + h + ' ')
header_spec_list = []
for i, column in enumerate(header_list[0:len(header_list) - 1]):
header_spec = {
'name': column,
'end': header_text.find(header_search[i + 1])
}
header_spec_list.append(header_spec)
# parse lines
if data:
for entry in data:
output_line = {}
# insert new separator since data can contain spaces
for col in reversed(header_list):
# find the right header_spec
for h_spec in header_spec_list:
if h_spec['name'] == col:
h_end = h_spec['end']
# check if the location contains whitespace. if not
# then move to the left until a space is found
while h_end > 0 and entry[h_end] not in string.whitespace:
h_end -= 1
# insert custom delimiter
entry = entry[:h_end] + delim + entry[h_end + 1:]
# create the entry list from the new custom delimiter
entry_list = entry.split(delim, maxsplit=len(header_list) - 1)
# clean up leading and trailing spaces in entry
clean_entry_list = []
for col in entry_list:
clean_entry = col.strip()
if clean_entry == '':
clean_entry = None
clean_entry_list.append(clean_entry)
output_line = dict(zip(header_list, clean_entry_list))
output.append(output_line)
return output