1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-23 00:29:59 +02:00

pad lines in sparse_table_parse and use str.isspace()

This commit is contained in:
Kelly Brazil
2022-03-17 16:24:18 -07:00
parent f804c9627f
commit c27bd5ff39

View File

@ -1,8 +1,5 @@
"""jc - JSON Convert universal parsers"""
import string
from typing import List, Dict
from typing import Iterable, List, Dict
def simple_table_parse(data: List[str]) -> List[Dict]:
@ -47,7 +44,7 @@ def simple_table_parse(data: List[str]) -> List[Dict]:
return raw_output
def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
def sparse_table_parse(data: Iterable[str], delim: str = '\u2063') -> List[Dict]:
"""
Parse tables with missing column data or with spaces in column data.
Blank cells are converted to None in the resulting dictionary. Data
@ -69,16 +66,14 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
Parameters:
data: (list) Text data to parse that has been split into lines
via .splitlines(). Item 0 must be the header row.
Any spaces in header names should be changed to
underscore '_'. You should also ensure headers are
lowercase by using .lower(). Do not change the
position of header names as the positions are used
to find the data.
data: (iter) An iterable of string lines (e.g. str.splitlines())
Item 0 must be the header row. Any spaces in header
names should be changed to underscore '_'. You
should also ensure headers are lowercase by using
.lower(). Do not change the position of header
names as the positions are used to find the data.
Also, ensure there are no blank lines (list items)
in the data.
Also, ensure there are no blank line items.
delim: (string) Delimiter to use. By default `u\\2063`
(invisible separator) is used since it is unlikely
@ -90,7 +85,19 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
List of Dictionaries
"""
data = data.copy()
# cast iterable to a list. Also keeps from mutating the caller's list
data = list(data)
# find the longest line and pad all lines with spaces to match
max_len = max([len(x) for x in data])
new_data = []
for line in data:
new_data.append(line + ' ' * (max_len - len(line)))
data = new_data
# find header
output: List = []
header_text: str = data.pop(0)
header_text = header_text + ' '
@ -123,7 +130,7 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
h_end = h_spec['end']
# check if the location contains whitespace. if not
# then move to the left until a space is found
while h_end > 0 and entry[h_end] not in string.whitespace:
while h_end > 0 and not entry[h_end].isspace():
h_end -= 1
# insert custom delimiter