diff --git a/jc/parsers/universal.py b/jc/parsers/universal.py index 8bc95000..81aa14f0 100644 --- a/jc/parsers/universal.py +++ b/jc/parsers/universal.py @@ -1,8 +1,5 @@ """jc - JSON Convert universal parsers""" - - -import string -from typing import List, Dict +from typing import Iterable, List, Dict def simple_table_parse(data: List[str]) -> List[Dict]: @@ -47,7 +44,7 @@ def simple_table_parse(data: List[str]) -> List[Dict]: return raw_output -def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]: +def sparse_table_parse(data: Iterable[str], delim: str = '\u2063') -> List[Dict]: """ Parse tables with missing column data or with spaces in column data. Blank cells are converted to None in the resulting dictionary. Data @@ -69,16 +66,14 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]: Parameters: - data: (list) Text data to parse that has been split into lines - via .splitlines(). Item 0 must be the header row. - Any spaces in header names should be changed to - underscore '_'. You should also ensure headers are - lowercase by using .lower(). Do not change the - position of header names as the positions are used - to find the data. + data: (iter) An iterable of string lines (e.g. str.splitlines()) + Item 0 must be the header row. Any spaces in header + names should be changed to underscore '_'. You + should also ensure headers are lowercase by using + .lower(). Do not change the position of header + names as the positions are used to find the data. - Also, ensure there are no blank lines (list items) - in the data. + Also, ensure there are no blank line items. delim: (string) Delimiter to use. By default `u\\2063` (invisible separator) is used since it is unlikely @@ -90,7 +85,19 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]: List of Dictionaries """ - data = data.copy() + # cast iterable to a list. Also keeps from mutating the caller's list + data = list(data) + + # find the longest line and pad all lines with spaces to match + max_len = max([len(x) for x in data]) + + new_data = [] + for line in data: + new_data.append(line + ' ' * (max_len - len(line))) + + data = new_data + + # find header output: List = [] header_text: str = data.pop(0) header_text = header_text + ' ' @@ -123,7 +130,7 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]: h_end = h_spec['end'] # check if the location contains whitespace. if not # then move to the left until a space is found - while h_end > 0 and entry[h_end] not in string.whitespace: + while h_end > 0 and not entry[h_end].isspace(): h_end -= 1 # insert custom delimiter