1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00

fix asciitable parser for cases where centered rows cause misaligned fields

This commit is contained in:
Kelly Brazil
2022-05-26 08:57:35 -07:00
parent f67a916940
commit 9f977d06e0
5 changed files with 117 additions and 13 deletions

View File

@ -1,11 +1,12 @@
jc changelog
20220525 v1.20.0 (in progress)
20220526 v1.20.0 (in progress)
- Add YAML output option with `-y`
- Add `top -b` standard and streaming parsers tested on linux
- Add `plugin_parser_count`, `standard_parser_count`, and `streaming_parser_count`
keys to `jc -a` output
- Fix pip-show parser for packages with a multi-line license field
- Fix `pip-show` parser for packages with a multi-line license field
- Fix ASCII Table parser for cases where centered headers cause mis-aligned fields
20220513 v1.19.0
- Add `chage --list` command parser tested on linux

View File

@ -136,4 +136,4 @@ Returns:
### Parser Information
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com)
Version 1.1 by Kelly Brazil (kellyjonbrazil@gmail.com)

View File

@ -117,7 +117,7 @@ from jc.parsers.universal import sparse_table_parse
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.0'
version = '1.1'
description = 'ASCII and Unicode table parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
@ -222,18 +222,20 @@ def _is_separator(line: str) -> bool:
def _snake_case(line: str) -> str:
"""
replace spaces between words and special characters with an underscore
and set to lowercase
Replace spaces between words and special characters with an underscore
and set to lowercase. Ignore the replacement char (�) used for header
padding.
"""
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
return re.sub(r'\b \b', '_', line).lower()
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line) # special characters
line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words
return line
def _normalize_rows(table: str) -> List[str]:
"""
returns a List of row strings. Header is snake-cased
"""
result = []
result: List[str] = []
for line in table.splitlines():
# skip blank lines
if not line.strip():
@ -243,7 +245,36 @@ def _normalize_rows(table: str) -> List[str]:
if _is_separator(line):
continue
# data row - remove column separators
# header or data row found - remove column separators
if not result: # this is the header row
# normalize the separator
line = line.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')\
.replace('', '|')
# find the number of chars to pad in front of headers that are too
# far away from the separator. Replace spaces with unicode char: �
# we will remove this char from headers after sparse_table_parse
problem_header_pattern = re.compile(r'(?:\| )( +)([^|]+)')
problem_headers = problem_header_pattern.findall(line)
if problem_headers:
for p_header in problem_headers:
old_header = p_header[0] + p_header[1]
sub_chars = '' * len(p_header[0])
new_header = sub_chars + p_header[1]
line = line.replace(old_header, new_header)
line = line.replace('|', ' ')
result.append(_snake_case(line))
continue
# this is a data row
line = line.replace('|', ' ')\
.replace('', ' ')\
.replace('', ' ')\
@ -256,7 +287,6 @@ def _normalize_rows(table: str) -> List[str]:
.replace('', ' ')
result.append(line)
result[0] = _snake_case(result[0])
return result
@ -266,7 +296,8 @@ def _fixup_headers(table: List[Dict]) -> List[Dict]:
for row in table:
new_row = row.copy()
for k in row:
k_new = k
# remove replacement character
k_new = k.replace('', '')
# remove consecutive underscores
k_new = re.sub(r'__+', '_', k_new)
# remove trailing underscores

View File

@ -1,4 +1,4 @@
.TH jc 1 2022-05-25 1.20.0 "JSON Convert"
.TH jc 1 2022-05-26 1.20.0 "JSON Convert"
.SH NAME
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools and file-types
.SH SYNOPSIS

View File

@ -344,6 +344,78 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
def test_asciitable_centered_col_header(self):
"""
Test 'asciitable' with long centered column header which can break
column alignment
"""
input = '''
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
| fdc_id | data_type | description | food_category_id | publication_date |
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
| 167512 | sr_legacy_food | Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough | | 2019-04-01 |
| 167513 | sr_legacy_food | Pillsbury, Cinnamon Rolls with Icing, refrigerated dough | | 2019-04-01 |
| 167514 | sr_legacy_food | Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry | | 2019-04-01 |
| 167515 | sr_legacy_food | George Weston Bakeries, Thomas English Muffins | | 2019-04-01 |
| 167516 | sr_legacy_food | Waffles, buttermilk, frozen, ready-to-heat | | 2019-04-01 |
| 167517 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, toasted | | 2019-04-01 |
| 167518 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, microwaved | | 2019-04-01 |
'''
expected = [
{
"fdc_id": "167512",
"data_type": "sr_legacy_food",
"description": "Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167513",
"data_type": "sr_legacy_food",
"description": "Pillsbury, Cinnamon Rolls with Icing, refrigerated dough",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167514",
"data_type": "sr_legacy_food",
"description": "Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167515",
"data_type": "sr_legacy_food",
"description": "George Weston Bakeries, Thomas English Muffins",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167516",
"data_type": "sr_legacy_food",
"description": "Waffles, buttermilk, frozen, ready-to-heat",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167517",
"data_type": "sr_legacy_food",
"description": "Waffle, buttermilk, frozen, ready-to-heat, toasted",
"food_category_id": None,
"publication_date": "2019-04-01"
},
{
"fdc_id": "167518",
"data_type": "sr_legacy_food",
"description": "Waffle, buttermilk, frozen, ready-to-heat, microwaved",
"food_category_id": None,
"publication_date": "2019-04-01"
}
]
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
if __name__ == '__main__':
unittest.main()