pad lines in sparse_table_parse and use str.isspace()

2025-08-06 22:32:54 +02:00 · 2022-03-17 16:24:18 -07:00
parent f804c9627f
commit c27bd5ff39
1 changed files with 23 additions and 16 deletions
--- a/jc/parsers/universal.py
+++ b/jc/parsers/universal.py
@ -1,8 +1,5 @@
 """jc - JSON Convert universal parsers"""
-
-
-import string
-from typing import List, Dict
+from typing import Iterable, List, Dict


 def simple_table_parse(data: List[str]) -> List[Dict]:
@ -47,7 +44,7 @@ def simple_table_parse(data: List[str]) -> List[Dict]:
    return raw_output


-def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
+def sparse_table_parse(data: Iterable[str], delim: str = '\u2063') -> List[Dict]:
    """
    Parse tables with missing column data or with spaces in column data.
    Blank cells are converted to None in the resulting dictionary. Data
@ -69,16 +66,14 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:

    Parameters:

-        data:   (list)   Text data to parse that has been split into lines
-                         via .splitlines(). Item 0 must be the header row.
-                         Any spaces in header names should be changed to
-                         underscore '_'. You should also ensure headers are
-                         lowercase by using .lower(). Do not change the
-                         position of header names as the positions are used
-                         to find the data.
+        data:   (iter)   An iterable of string lines (e.g. str.splitlines())
+                         Item 0 must be the header row. Any spaces in header
+                         names should be changed to underscore '_'. You
+                         should also ensure headers are lowercase by using
+                         .lower(). Do not change the position of header
+                         names as the positions are used to find the data.

-                         Also, ensure there are no blank lines (list items)
-                         in the data.
+                         Also, ensure there are no blank line items.

        delim:  (string) Delimiter to use. By default `u\\2063`
                         (invisible separator) is used since it is unlikely
@ -90,7 +85,19 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:

        List of Dictionaries
    """
-    data = data.copy()
+    # cast iterable to a list. Also keeps from mutating the caller's list
+    data = list(data)
+
+    # find the longest line and pad all lines with spaces to match
+    max_len = max([len(x) for x in data])
+
+    new_data = []
+    for line in data:
+        new_data.append(line + ' ' * (max_len - len(line)))
+
+    data = new_data
+
+    # find header
    output: List = []
    header_text: str = data.pop(0)
    header_text = header_text + ' '
@ -123,7 +130,7 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
                        h_end = h_spec['end']
                        # check if the location contains whitespace. if not
                        # then move to the left until a space is found
-                        while h_end > 0 and entry[h_end] not in string.whitespace:
+                        while h_end > 0 and not entry[h_end].isspace():
                            h_end -= 1

                        # insert custom delimiter