mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-23 00:29:59 +02:00
pad lines in sparse_table_parse and use str.isspace()
This commit is contained in:
@ -1,8 +1,5 @@
|
|||||||
"""jc - JSON Convert universal parsers"""
|
"""jc - JSON Convert universal parsers"""
|
||||||
|
from typing import Iterable, List, Dict
|
||||||
|
|
||||||
import string
|
|
||||||
from typing import List, Dict
|
|
||||||
|
|
||||||
|
|
||||||
def simple_table_parse(data: List[str]) -> List[Dict]:
|
def simple_table_parse(data: List[str]) -> List[Dict]:
|
||||||
@ -47,7 +44,7 @@ def simple_table_parse(data: List[str]) -> List[Dict]:
|
|||||||
return raw_output
|
return raw_output
|
||||||
|
|
||||||
|
|
||||||
def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
def sparse_table_parse(data: Iterable[str], delim: str = '\u2063') -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
Parse tables with missing column data or with spaces in column data.
|
Parse tables with missing column data or with spaces in column data.
|
||||||
Blank cells are converted to None in the resulting dictionary. Data
|
Blank cells are converted to None in the resulting dictionary. Data
|
||||||
@ -69,16 +66,14 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
|||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
|
||||||
data: (list) Text data to parse that has been split into lines
|
data: (iter) An iterable of string lines (e.g. str.splitlines())
|
||||||
via .splitlines(). Item 0 must be the header row.
|
Item 0 must be the header row. Any spaces in header
|
||||||
Any spaces in header names should be changed to
|
names should be changed to underscore '_'. You
|
||||||
underscore '_'. You should also ensure headers are
|
should also ensure headers are lowercase by using
|
||||||
lowercase by using .lower(). Do not change the
|
.lower(). Do not change the position of header
|
||||||
position of header names as the positions are used
|
names as the positions are used to find the data.
|
||||||
to find the data.
|
|
||||||
|
|
||||||
Also, ensure there are no blank lines (list items)
|
Also, ensure there are no blank line items.
|
||||||
in the data.
|
|
||||||
|
|
||||||
delim: (string) Delimiter to use. By default `u\\2063`
|
delim: (string) Delimiter to use. By default `u\\2063`
|
||||||
(invisible separator) is used since it is unlikely
|
(invisible separator) is used since it is unlikely
|
||||||
@ -90,7 +85,19 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
|||||||
|
|
||||||
List of Dictionaries
|
List of Dictionaries
|
||||||
"""
|
"""
|
||||||
data = data.copy()
|
# cast iterable to a list. Also keeps from mutating the caller's list
|
||||||
|
data = list(data)
|
||||||
|
|
||||||
|
# find the longest line and pad all lines with spaces to match
|
||||||
|
max_len = max([len(x) for x in data])
|
||||||
|
|
||||||
|
new_data = []
|
||||||
|
for line in data:
|
||||||
|
new_data.append(line + ' ' * (max_len - len(line)))
|
||||||
|
|
||||||
|
data = new_data
|
||||||
|
|
||||||
|
# find header
|
||||||
output: List = []
|
output: List = []
|
||||||
header_text: str = data.pop(0)
|
header_text: str = data.pop(0)
|
||||||
header_text = header_text + ' '
|
header_text = header_text + ' '
|
||||||
@ -123,7 +130,7 @@ def sparse_table_parse(data: List[str], delim: str = '\u2063') -> List[Dict]:
|
|||||||
h_end = h_spec['end']
|
h_end = h_spec['end']
|
||||||
# check if the location contains whitespace. if not
|
# check if the location contains whitespace. if not
|
||||||
# then move to the left until a space is found
|
# then move to the left until a space is found
|
||||||
while h_end > 0 and entry[h_end] not in string.whitespace:
|
while h_end > 0 and not entry[h_end].isspace():
|
||||||
h_end -= 1
|
h_end -= 1
|
||||||
|
|
||||||
# insert custom delimiter
|
# insert custom delimiter
|
||||||
|
Reference in New Issue
Block a user