mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-17 00:07:37 +02:00
fix asciitable parser for cases where centered rows cause misaligned fields
This commit is contained in:
@ -1,11 +1,12 @@
|
||||
jc changelog
|
||||
|
||||
20220525 v1.20.0 (in progress)
|
||||
20220526 v1.20.0 (in progress)
|
||||
- Add YAML output option with `-y`
|
||||
- Add `top -b` standard and streaming parsers tested on linux
|
||||
- Add `plugin_parser_count`, `standard_parser_count`, and `streaming_parser_count`
|
||||
keys to `jc -a` output
|
||||
- Fix pip-show parser for packages with a multi-line license field
|
||||
- Fix `pip-show` parser for packages with a multi-line license field
|
||||
- Fix ASCII Table parser for cases where centered headers cause mis-aligned fields
|
||||
|
||||
20220513 v1.19.0
|
||||
- Add `chage --list` command parser tested on linux
|
||||
|
@ -136,4 +136,4 @@ Returns:
|
||||
### Parser Information
|
||||
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
|
||||
|
||||
Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com)
|
||||
Version 1.1 by Kelly Brazil (kellyjonbrazil@gmail.com)
|
||||
|
@ -117,7 +117,7 @@ from jc.parsers.universal import sparse_table_parse
|
||||
|
||||
class info():
|
||||
"""Provides parser metadata (version, author, etc.)"""
|
||||
version = '1.0'
|
||||
version = '1.1'
|
||||
description = 'ASCII and Unicode table parser'
|
||||
author = 'Kelly Brazil'
|
||||
author_email = 'kellyjonbrazil@gmail.com'
|
||||
@ -222,18 +222,20 @@ def _is_separator(line: str) -> bool:
|
||||
|
||||
def _snake_case(line: str) -> str:
|
||||
"""
|
||||
replace spaces between words and special characters with an underscore
|
||||
and set to lowercase
|
||||
Replace spaces between words and special characters with an underscore
|
||||
and set to lowercase. Ignore the replacement char (�) used for header
|
||||
padding.
|
||||
"""
|
||||
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
|
||||
return re.sub(r'\b \b', '_', line).lower()
|
||||
line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters
|
||||
line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words
|
||||
return line
|
||||
|
||||
|
||||
def _normalize_rows(table: str) -> List[str]:
|
||||
"""
|
||||
returns a List of row strings. Header is snake-cased
|
||||
"""
|
||||
result = []
|
||||
result: List[str] = []
|
||||
for line in table.splitlines():
|
||||
# skip blank lines
|
||||
if not line.strip():
|
||||
@ -243,7 +245,36 @@ def _normalize_rows(table: str) -> List[str]:
|
||||
if _is_separator(line):
|
||||
continue
|
||||
|
||||
# data row - remove column separators
|
||||
# header or data row found - remove column separators
|
||||
if not result: # this is the header row
|
||||
# normalize the separator
|
||||
line = line.replace('│', '|')\
|
||||
.replace('┃', '|')\
|
||||
.replace('┆', '|')\
|
||||
.replace('┇', '|')\
|
||||
.replace('┊', '|')\
|
||||
.replace('┋', '|')\
|
||||
.replace('╎', '|')\
|
||||
.replace('╏', '|')\
|
||||
.replace('║', '|')
|
||||
|
||||
# find the number of chars to pad in front of headers that are too
|
||||
# far away from the separator. Replace spaces with unicode char: �
|
||||
# we will remove this char from headers after sparse_table_parse
|
||||
problem_header_pattern = re.compile(r'(?:\| )( +)([^|]+)')
|
||||
problem_headers = problem_header_pattern.findall(line)
|
||||
if problem_headers:
|
||||
for p_header in problem_headers:
|
||||
old_header = p_header[0] + p_header[1]
|
||||
sub_chars = '�' * len(p_header[0])
|
||||
new_header = sub_chars + p_header[1]
|
||||
line = line.replace(old_header, new_header)
|
||||
|
||||
line = line.replace('|', ' ')
|
||||
result.append(_snake_case(line))
|
||||
continue
|
||||
|
||||
# this is a data row
|
||||
line = line.replace('|', ' ')\
|
||||
.replace('│', ' ')\
|
||||
.replace('┃', ' ')\
|
||||
@ -256,7 +287,6 @@ def _normalize_rows(table: str) -> List[str]:
|
||||
.replace('║', ' ')
|
||||
result.append(line)
|
||||
|
||||
result[0] = _snake_case(result[0])
|
||||
return result
|
||||
|
||||
|
||||
@ -266,7 +296,8 @@ def _fixup_headers(table: List[Dict]) -> List[Dict]:
|
||||
for row in table:
|
||||
new_row = row.copy()
|
||||
for k in row:
|
||||
k_new = k
|
||||
# remove replacement character
|
||||
k_new = k.replace('�', '')
|
||||
# remove consecutive underscores
|
||||
k_new = re.sub(r'__+', '_', k_new)
|
||||
# remove trailing underscores
|
||||
|
2
man/jc.1
2
man/jc.1
@ -1,4 +1,4 @@
|
||||
.TH jc 1 2022-05-25 1.20.0 "JSON Convert"
|
||||
.TH jc 1 2022-05-26 1.20.0 "JSON Convert"
|
||||
.SH NAME
|
||||
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools and file-types
|
||||
.SH SYNOPSIS
|
||||
|
@ -344,6 +344,78 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
|
||||
|
||||
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||
|
||||
def test_asciitable_centered_col_header(self):
|
||||
"""
|
||||
Test 'asciitable' with long centered column header which can break
|
||||
column alignment
|
||||
"""
|
||||
input = '''
|
||||
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
|
||||
| fdc_id | data_type | description | food_category_id | publication_date |
|
||||
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
|
||||
| 167512 | sr_legacy_food | Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough | | 2019-04-01 |
|
||||
| 167513 | sr_legacy_food | Pillsbury, Cinnamon Rolls with Icing, refrigerated dough | | 2019-04-01 |
|
||||
| 167514 | sr_legacy_food | Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry | | 2019-04-01 |
|
||||
| 167515 | sr_legacy_food | George Weston Bakeries, Thomas English Muffins | | 2019-04-01 |
|
||||
| 167516 | sr_legacy_food | Waffles, buttermilk, frozen, ready-to-heat | | 2019-04-01 |
|
||||
| 167517 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, toasted | | 2019-04-01 |
|
||||
| 167518 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, microwaved | | 2019-04-01 |
|
||||
'''
|
||||
|
||||
expected = [
|
||||
{
|
||||
"fdc_id": "167512",
|
||||
"data_type": "sr_legacy_food",
|
||||
"description": "Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough",
|
||||
"food_category_id": None,
|
||||
"publication_date": "2019-04-01"
|
||||
},
|
||||
{
|
||||
"fdc_id": "167513",
|
||||
"data_type": "sr_legacy_food",
|
||||
"description": "Pillsbury, Cinnamon Rolls with Icing, refrigerated dough",
|
||||
"food_category_id": None,
|
||||
"publication_date": "2019-04-01"
|
||||
},
|
||||
{
|
||||
"fdc_id": "167514",
|
||||
"data_type": "sr_legacy_food",
|
||||
"description": "Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry",
|
||||
"food_category_id": None,
|
||||
"publication_date": "2019-04-01"
|
||||
},
|
||||
{
|
||||
"fdc_id": "167515",
|
||||
"data_type": "sr_legacy_food",
|
||||
"description": "George Weston Bakeries, Thomas English Muffins",
|
||||
"food_category_id": None,
|
||||
"publication_date": "2019-04-01"
|
||||
},
|
||||
{
|
||||
"fdc_id": "167516",
|
||||
"data_type": "sr_legacy_food",
|
||||
"description": "Waffles, buttermilk, frozen, ready-to-heat",
|
||||
"food_category_id": None,
|
||||
"publication_date": "2019-04-01"
|
||||
},
|
||||
{
|
||||
"fdc_id": "167517",
|
||||
"data_type": "sr_legacy_food",
|
||||
"description": "Waffle, buttermilk, frozen, ready-to-heat, toasted",
|
||||
"food_category_id": None,
|
||||
"publication_date": "2019-04-01"
|
||||
},
|
||||
{
|
||||
"fdc_id": "167518",
|
||||
"data_type": "sr_legacy_food",
|
||||
"description": "Waffle, buttermilk, frozen, ready-to-heat, microwaved",
|
||||
"food_category_id": None,
|
||||
"publication_date": "2019-04-01"
|
||||
}
|
||||
]
|
||||
|
||||
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Reference in New Issue
Block a user