pad lines in sparse_table_parse and use str.isspace()

2025-08-06 22:32:54 +02:00 · 2022-03-17 16:24:18 -07:00
parent f804c9627f
commit c27bd5ff39
1 changed files with 23 additions and 16 deletions
--- a/jc/parsers/universal.py
+++ b/jc/parsers/universal.py
@ -1,8 +1,5 @@
 """jc - JSON Convert universal parsers"""
-
+from typing import Iterable, List, Dict
 import string
 from typing import List, Dict
 def simple_table_parse(data: List[str]) -> List[Dict]:
@ -47,7 +44,7 @@ def simple_table_parse(data: List[str]) -> List[Dict]:
    return raw_output
-def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
+def sparse_table_parse(data: Iterable[str], delim: str = '\u2063') -> List[Dict]:
    """
    Parse tables with missing column data or with spaces in column data.
    Blank cells are converted to None in the resulting dictionary. Data
@ -69,16 +66,14 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
    Parameters:
-        data:   (list)   Text data to parse that has been split into lines
+        data:   (iter)   An iterable of string lines (e.g. str.splitlines())
-                         via .splitlines(). Item 0 must be the header row.
+                         Item 0 must be the header row. Any spaces in header
-                         Any spaces in header names should be changed to
+                         names should be changed to underscore '_'. You
-                         underscore '_'. You should also ensure headers are
+                         should also ensure headers are lowercase by using
-                         lowercase by using .lower(). Do not change the
+                         .lower(). Do not change the position of header
-                         position of header names as the positions are used
+                         names as the positions are used to find the data.
                         to find the data.
-                         Also, ensure there are no blank lines (list items)
+                         Also, ensure there are no blank line items.
                         in the data.
        delim:  (string) Delimiter to use. By default `u\\2063`
                         (invisible separator) is used since it is unlikely
@ -90,7 +85,19 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
        List of Dictionaries
    """
-    data = data.copy()
+    # cast iterable to a list. Also keeps from mutating the caller's list
    data = list(data)
    # find the longest line and pad all lines with spaces to match
    max_len = max([len(x) for x in data])
    new_data = []
    for line in data:
        new_data.append(line + ' ' * (max_len - len(line)))
    data = new_data
    # find header
    output: List = []
    header_text: str = data.pop(0)
    header_text = header_text + ' '
@ -123,7 +130,7 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
                        h_end = h_spec['end']
                        # check if the location contains whitespace. if not
                        # then move to the left until a space is found
-                        while h_end > 0 and entry[h_end] not in string.whitespace:
+                        while h_end > 0 and not entry[h_end].isspace():
                            h_end -= 1
                        # insert custom delimiter