mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-23 00:29:59 +02:00
pad lines in sparse_table_parse and use str.isspace()
This commit is contained in:
@ -1,8 +1,5 @@
|
||||
"""jc - JSON Convert universal parsers"""
|
||||
|
||||
|
||||
import string
|
||||
from typing import List, Dict
|
||||
from typing import Iterable, List, Dict
|
||||
|
||||
|
||||
def simple_table_parse(data: List[str]) -> List[Dict]:
|
||||
@ -47,7 +44,7 @@ def simple_table_parse(data: List[str]) -> List[Dict]:
|
||||
return raw_output
|
||||
|
||||
|
||||
def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
||||
def sparse_table_parse(data: Iterable[str], delim: str = '\u2063') -> List[Dict]:
|
||||
"""
|
||||
Parse tables with missing column data or with spaces in column data.
|
||||
Blank cells are converted to None in the resulting dictionary. Data
|
||||
@ -69,16 +66,14 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
||||
|
||||
Parameters:
|
||||
|
||||
data: (list) Text data to parse that has been split into lines
|
||||
via .splitlines(). Item 0 must be the header row.
|
||||
Any spaces in header names should be changed to
|
||||
underscore '_'. You should also ensure headers are
|
||||
lowercase by using .lower(). Do not change the
|
||||
position of header names as the positions are used
|
||||
to find the data.
|
||||
data: (iter) An iterable of string lines (e.g. str.splitlines())
|
||||
Item 0 must be the header row. Any spaces in header
|
||||
names should be changed to underscore '_'. You
|
||||
should also ensure headers are lowercase by using
|
||||
.lower(). Do not change the position of header
|
||||
names as the positions are used to find the data.
|
||||
|
||||
Also, ensure there are no blank lines (list items)
|
||||
in the data.
|
||||
Also, ensure there are no blank line items.
|
||||
|
||||
delim: (string) Delimiter to use. By default `u\\2063`
|
||||
(invisible separator) is used since it is unlikely
|
||||
@ -90,7 +85,19 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
||||
|
||||
List of Dictionaries
|
||||
"""
|
||||
data = data.copy()
|
||||
# cast iterable to a list. Also keeps from mutating the caller's list
|
||||
data = list(data)
|
||||
|
||||
# find the longest line and pad all lines with spaces to match
|
||||
max_len = max([len(x) for x in data])
|
||||
|
||||
new_data = []
|
||||
for line in data:
|
||||
new_data.append(line + ' ' * (max_len - len(line)))
|
||||
|
||||
data = new_data
|
||||
|
||||
# find header
|
||||
output: List = []
|
||||
header_text: str = data.pop(0)
|
||||
header_text = header_text + ' '
|
||||
@ -123,7 +130,7 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
||||
h_end = h_spec['end']
|
||||
# check if the location contains whitespace. if not
|
||||
# then move to the left until a space is found
|
||||
while h_end > 0 and entry[h_end] not in string.whitespace:
|
||||
while h_end > 0 and not entry[h_end].isspace():
|
||||
h_end -= 1
|
||||
|
||||
# insert custom delimiter
|
||||
|
Reference in New Issue
Block a user