1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-07-13 01:20:24 +02:00

new universal parsers to limit code duplication

This commit is contained in:
Kelly Brazil
2019-12-09 14:01:47 -08:00
parent bbba1fe477
commit 50a6b256b8

83
jc/parsers/universal.py Normal file
View File

@ -0,0 +1,83 @@
"""jc - JSON CLI output utility universal Parsers"""
import string
def sparse_table_parse(data, delim='\u2063'):
"""
Parse tables with missing column data or with spaces in column data.
Parameters:
data: (list) Text data to parse that has been split into lines via .splitlines().
Item 0 must be the header row. Any spaces in header names should be
changed to underscore '_'. You should also ensure headers are
lowercase by using .lower(). Do not change the position of header
names as the positions are used to find the data.
Also, ensure there are no blank lines (list items) in the data.
delim: (string) Delimiter to use. By default 'u\2063' (invisible separator) is used
since this is unlikely to ever be seen in terminal output. You can
change this for troubleshooting purposes or if there is a delimiter
conflict with your data.
Returns:
dictionary raw structured data
"""
output = []
header_text = data.pop(0)
header_text = header_text + ' '
header_list = header_text.split()
# find each column index and end position
header_search = [header_list[0]]
for h in header_list[1:]:
header_search.append(' ' + h + ' ')
header_spec_list = []
for i, column in enumerate(header_list[0:len(header_list) - 1]):
header_spec = {
'name': column,
'end': header_text.find(header_search[i + 1])
}
header_spec_list.append(header_spec)
# parse lines
if data:
for entry in data:
output_line = {}
# insert new separator since data can contain spaces
for col in reversed(header_list):
# find the right header_spec
for h_spec in header_spec_list:
if h_spec['name'] == col:
h_end = h_spec['end']
# check if the location contains whitespace. if not
# then move to the left until a space is found
while h_end > 0 and entry[h_end] not in string.whitespace:
h_end -= 1
# insert custom delimiter
entry = entry[:h_end] + delim + entry[h_end + 1:]
# create the entry list from the new custom delimiter
entry_list = entry.split(delim, maxsplit=len(header_list) - 1)
# clean up leading and trailing spaces in entry
clean_entry_list = []
for col in entry_list:
clean_entry = col.strip()
if clean_entry == '':
clean_entry = None
clean_entry_list.append(clean_entry)
output_line = dict(zip(header_list, clean_entry_list))
output.append(output_line)
return output