1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00

fix asciitable parser for cases where centered rows cause misaligned fields

This commit is contained in:
Kelly Brazil
2022-05-26 08:57:35 -07:00
parent f67a916940
commit 9f977d06e0
5 changed files with 117 additions and 13 deletions

View File

@ -1,11 +1,12 @@
jc changelog jc changelog
20220525 v1.20.0 (in progress) 20220526 v1.20.0 (in progress)
- Add YAML output option with `-y` - Add YAML output option with `-y`
- Add `top -b` standard and streaming parsers tested on linux - Add `top -b` standard and streaming parsers tested on linux
- Add `plugin_parser_count`, `standard_parser_count`, and `streaming_parser_count` - Add `plugin_parser_count`, `standard_parser_count`, and `streaming_parser_count`
keys to `jc -a` output keys to `jc -a` output
- Fix pip-show parser for packages with a multi-line license field - Fix `pip-show` parser for packages with a multi-line license field
- Fix ASCII Table parser for cases where centered headers cause mis-aligned fields
20220513 v1.19.0 20220513 v1.19.0
- Add `chage --list` command parser tested on linux - Add `chage --list` command parser tested on linux

View File

@ -136,4 +136,4 @@ Returns:
### Parser Information ### Parser Information
Compatibility: linux, darwin, cygwin, win32, aix, freebsd Compatibility: linux, darwin, cygwin, win32, aix, freebsd
Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com) Version 1.1 by Kelly Brazil (kellyjonbrazil@gmail.com)

View File

@ -117,7 +117,7 @@ from jc.parsers.universal import sparse_table_parse
class info(): class info():
"""Provides parser metadata (version, author, etc.)""" """Provides parser metadata (version, author, etc.)"""
version = '1.0' version = '1.1'
description = 'ASCII and Unicode table parser' description = 'ASCII and Unicode table parser'
author = 'Kelly Brazil' author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com' author_email = 'kellyjonbrazil@gmail.com'
@ -222,18 +222,20 @@ def _is_separator(line: str) -> bool:
def _snake_case(line: str) -> str: def _snake_case(line: str) -> str:
""" """
replace spaces between words and special characters with an underscore Replace spaces between words and special characters with an underscore
and set to lowercase and set to lowercase. Ignore the replacement char (�) used for header
padding.
""" """
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line) line = re.sub(r'[^a-zA-Z0-9 ]', '_', line) # special characters
return re.sub(r'\b \b', '_', line).lower() line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words
return line
def _normalize_rows(table: str) -> List[str]: def _normalize_rows(table: str) -> List[str]:
""" """
returns a List of row strings. Header is snake-cased returns a List of row strings. Header is snake-cased
""" """
result = [] result: List[str] = []
for line in table.splitlines(): for line in table.splitlines():
# skip blank lines # skip blank lines
if not line.strip(): if not line.strip():
@ -243,7 +245,36 @@ def _normalize_rows(table: str) -> List[str]:
if _is_separator(line): if _is_separator(line):
continue continue
# data row - remove column separators # header or data row found - remove column separators
if not result: # this is the header row
# normalize the separator
line = line.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')
# find the number of chars to pad in front of headers that are too
# far away from the separator. Replace spaces with unicode char: �
# we will remove this char from headers after sparse_table_parse
problem_header_pattern = re.compile(r'(?:\| )( +)([^|]+)')
problem_headers = problem_header_pattern.findall(line)
if problem_headers:
for p_header in problem_headers:
old_header = p_header[0] + p_header[1]
sub_chars = '' * len(p_header[0])
new_header = sub_chars + p_header[1]
line = line.replace(old_header, new_header)
line = line.replace('|', ' ')
result.append(_snake_case(line))
continue
# this is a data row
line = line.replace('|', ' ')\ line = line.replace('|', ' ')\
.replace('', ' ')\ .replace('', ' ')\
.replace('', ' ')\ .replace('', ' ')\
@ -256,7 +287,6 @@ def _normalize_rows(table: str) -> List[str]:
.replace('', ' ') .replace('', ' ')
result.append(line) result.append(line)
result[0] = _snake_case(result[0])
return result return result
@ -266,7 +296,8 @@ def _fixup_headers(table: List[Dict]) -> List[Dict]:
for row in table: for row in table:
new_row = row.copy() new_row = row.copy()
for k in row: for k in row:
k_new = k # remove replacement character
k_new = k.replace('', '')
# remove consecutive underscores # remove consecutive underscores
k_new = re.sub(r'__+', '_', k_new) k_new = re.sub(r'__+', '_', k_new)
# remove trailing underscores # remove trailing underscores

View File

@ -1,4 +1,4 @@
.TH jc 1 2022-05-25 1.20.0 "JSON Convert" .TH jc 1 2022-05-26 1.20.0 "JSON Convert"
.SH NAME .SH NAME
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools and file-types \fBjc\fP \- JSON Convert JSONifies the output of many CLI tools and file-types
.SH SYNOPSIS .SH SYNOPSIS

View File

@ -344,6 +344,78 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
def test_asciitable_centered_col_header(self):
"""
Test 'asciitable' with long centered column header which can break
column alignment
"""
input = '''
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
| fdc_id | data_type | description | food_category_id | publication_date |
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
| 167512 | sr_legacy_food | Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough | | 2019-04-01 |
| 167513 | sr_legacy_food | Pillsbury, Cinnamon Rolls with Icing, refrigerated dough | | 2019-04-01 |
| 167514 | sr_legacy_food | Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry | | 2019-04-01 |
| 167515 | sr_legacy_food | George Weston Bakeries, Thomas English Muffins | | 2019-04-01 |
| 167516 | sr_legacy_food | Waffles, buttermilk, frozen, ready-to-heat | | 2019-04-01 |
| 167517 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, toasted | | 2019-04-01 |
| 167518 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, microwaved | | 2019-04-01 |
'''
expected = [
{
"fdc_id": "167512",
"data_type": "sr_legacy_food",
"description": "Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167513",
"data_type": "sr_legacy_food",
"description": "Pillsbury, Cinnamon Rolls with Icing, refrigerated dough",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167514",
"data_type": "sr_legacy_food",
"description": "Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167515",
"data_type": "sr_legacy_food",
"description": "George Weston Bakeries, Thomas English Muffins",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167516",
"data_type": "sr_legacy_food",
"description": "Waffles, buttermilk, frozen, ready-to-heat",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167517",
"data_type": "sr_legacy_food",
"description": "Waffle, buttermilk, frozen, ready-to-heat, toasted",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167518",
"data_type": "sr_legacy_food",
"description": "Waffle, buttermilk, frozen, ready-to-heat, microwaved",
"food_category_id": None,
"publication_date": "2019-04-01"
}
]
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()