mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-07-13 01:20:24 +02:00
new universal parsers to limit code duplication
This commit is contained in:
83
jc/parsers/universal.py
Normal file
83
jc/parsers/universal.py
Normal file
@ -0,0 +1,83 @@
|
||||
"""jc - JSON CLI output utility universal Parsers"""
|
||||
|
||||
|
||||
import string
|
||||
|
||||
|
||||
def sparse_table_parse(data, delim='\u2063'):
|
||||
"""
|
||||
Parse tables with missing column data or with spaces in column data.
|
||||
|
||||
Parameters:
|
||||
|
||||
data: (list) Text data to parse that has been split into lines via .splitlines().
|
||||
Item 0 must be the header row. Any spaces in header names should be
|
||||
changed to underscore '_'. You should also ensure headers are
|
||||
lowercase by using .lower(). Do not change the position of header
|
||||
names as the positions are used to find the data.
|
||||
|
||||
Also, ensure there are no blank lines (list items) in the data.
|
||||
|
||||
delim: (string) Delimiter to use. By default 'u\2063' (invisible separator) is used
|
||||
since this is unlikely to ever be seen in terminal output. You can
|
||||
change this for troubleshooting purposes or if there is a delimiter
|
||||
conflict with your data.
|
||||
|
||||
Returns:
|
||||
|
||||
dictionary raw structured data
|
||||
"""
|
||||
output = []
|
||||
header_text = data.pop(0)
|
||||
header_text = header_text + ' '
|
||||
header_list = header_text.split()
|
||||
|
||||
# find each column index and end position
|
||||
header_search = [header_list[0]]
|
||||
for h in header_list[1:]:
|
||||
header_search.append(' ' + h + ' ')
|
||||
|
||||
header_spec_list = []
|
||||
for i, column in enumerate(header_list[0:len(header_list) - 1]):
|
||||
header_spec = {
|
||||
'name': column,
|
||||
'end': header_text.find(header_search[i + 1])
|
||||
}
|
||||
|
||||
header_spec_list.append(header_spec)
|
||||
|
||||
# parse lines
|
||||
if data:
|
||||
for entry in data:
|
||||
output_line = {}
|
||||
|
||||
# insert new separator since data can contain spaces
|
||||
for col in reversed(header_list):
|
||||
# find the right header_spec
|
||||
for h_spec in header_spec_list:
|
||||
if h_spec['name'] == col:
|
||||
h_end = h_spec['end']
|
||||
# check if the location contains whitespace. if not
|
||||
# then move to the left until a space is found
|
||||
while h_end > 0 and entry[h_end] not in string.whitespace:
|
||||
h_end -= 1
|
||||
|
||||
# insert custom delimiter
|
||||
entry = entry[:h_end] + delim + entry[h_end + 1:]
|
||||
|
||||
# create the entry list from the new custom delimiter
|
||||
entry_list = entry.split(delim, maxsplit=len(header_list) - 1)
|
||||
|
||||
# clean up leading and trailing spaces in entry
|
||||
clean_entry_list = []
|
||||
for col in entry_list:
|
||||
clean_entry = col.strip()
|
||||
if clean_entry == '':
|
||||
clean_entry = None
|
||||
|
||||
clean_entry_list.append(clean_entry)
|
||||
|
||||
output_line = dict(zip(header_list, clean_entry_list))
|
||||
output.append(output_line)
|
||||
|
||||
return output
|
Reference in New Issue
Block a user