1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-23 00:29:59 +02:00

pad lines in sparse_table_parse and use str.isspace()

This commit is contained in:
Kelly Brazil
2022-03-17 16:24:18 -07:00
parent f804c9627f
commit c27bd5ff39

View File

@ -1,8 +1,5 @@
"""jc - JSON Convert universal parsers""" """jc - JSON Convert universal parsers"""
from typing import Iterable, List, Dict
import string
from typing import List, Dict
def simple_table_parse(data: List[str]) -> List[Dict]: def simple_table_parse(data: List[str]) -> List[Dict]:
@ -47,7 +44,7 @@ def simple_table_parse(data: List[str]) -> List[Dict]:
return raw_output return raw_output
def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]: def sparse_table_parse(data: Iterable[str], delim: str = '\u2063') -> List[Dict]:
""" """
Parse tables with missing column data or with spaces in column data. Parse tables with missing column data or with spaces in column data.
Blank cells are converted to None in the resulting dictionary. Data Blank cells are converted to None in the resulting dictionary. Data
@ -69,16 +66,14 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
Parameters: Parameters:
data: (list) Text data to parse that has been split into lines data: (iter) An iterable of string lines (e.g. str.splitlines())
via .splitlines(). Item 0 must be the header row. Item 0 must be the header row. Any spaces in header
Any spaces in header names should be changed to names should be changed to underscore '_'. You
underscore '_'. You should also ensure headers are should also ensure headers are lowercase by using
lowercase by using .lower(). Do not change the .lower(). Do not change the position of header
position of header names as the positions are used names as the positions are used to find the data.
to find the data.
Also, ensure there are no blank lines (list items) Also, ensure there are no blank line items.
in the data.
delim: (string) Delimiter to use. By default `u\\2063` delim: (string) Delimiter to use. By default `u\\2063`
(invisible separator) is used since it is unlikely (invisible separator) is used since it is unlikely
@ -90,7 +85,19 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
List of Dictionaries List of Dictionaries
""" """
data = data.copy() # cast iterable to a list. Also keeps from mutating the caller's list
data = list(data)
# find the longest line and pad all lines with spaces to match
max_len = max([len(x) for x in data])
new_data = []
for line in data:
new_data.append(line + ' ' * (max_len - len(line)))
data = new_data
# find header
output: List = [] output: List = []
header_text: str = data.pop(0) header_text: str = data.pop(0)
header_text = header_text + ' ' header_text = header_text + ' '
@ -123,7 +130,7 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
h_end = h_spec['end'] h_end = h_spec['end']
# check if the location contains whitespace. if not # check if the location contains whitespace. if not
# then move to the left until a space is found # then move to the left until a space is found
while h_end > 0 and entry[h_end] not in string.whitespace: while h_end > 0 and not entry[h_end].isspace():
h_end -= 1 h_end -= 1
# insert custom delimiter # insert custom delimiter