mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-17 00:07:37 +02:00
fix asciitable parser for cases where centered rows cause misaligned fields
This commit is contained in:
@ -1,11 +1,12 @@
|
|||||||
jc changelog
|
jc changelog
|
||||||
|
|
||||||
20220525 v1.20.0 (in progress)
|
20220526 v1.20.0 (in progress)
|
||||||
- Add YAML output option with `-y`
|
- Add YAML output option with `-y`
|
||||||
- Add `top -b` standard and streaming parsers tested on linux
|
- Add `top -b` standard and streaming parsers tested on linux
|
||||||
- Add `plugin_parser_count`, `standard_parser_count`, and `streaming_parser_count`
|
- Add `plugin_parser_count`, `standard_parser_count`, and `streaming_parser_count`
|
||||||
keys to `jc -a` output
|
keys to `jc -a` output
|
||||||
- Fix pip-show parser for packages with a multi-line license field
|
- Fix `pip-show` parser for packages with a multi-line license field
|
||||||
|
- Fix ASCII Table parser for cases where centered headers cause mis-aligned fields
|
||||||
|
|
||||||
20220513 v1.19.0
|
20220513 v1.19.0
|
||||||
- Add `chage --list` command parser tested on linux
|
- Add `chage --list` command parser tested on linux
|
||||||
|
@ -136,4 +136,4 @@ Returns:
|
|||||||
### Parser Information
|
### Parser Information
|
||||||
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
|
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
|
||||||
|
|
||||||
Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com)
|
Version 1.1 by Kelly Brazil (kellyjonbrazil@gmail.com)
|
||||||
|
@ -117,7 +117,7 @@ from jc.parsers.universal import sparse_table_parse
|
|||||||
|
|
||||||
class info():
|
class info():
|
||||||
"""Provides parser metadata (version, author, etc.)"""
|
"""Provides parser metadata (version, author, etc.)"""
|
||||||
version = '1.0'
|
version = '1.1'
|
||||||
description = 'ASCII and Unicode table parser'
|
description = 'ASCII and Unicode table parser'
|
||||||
author = 'Kelly Brazil'
|
author = 'Kelly Brazil'
|
||||||
author_email = 'kellyjonbrazil@gmail.com'
|
author_email = 'kellyjonbrazil@gmail.com'
|
||||||
@ -222,18 +222,20 @@ def _is_separator(line: str) -> bool:
|
|||||||
|
|
||||||
def _snake_case(line: str) -> str:
|
def _snake_case(line: str) -> str:
|
||||||
"""
|
"""
|
||||||
replace spaces between words and special characters with an underscore
|
Replace spaces between words and special characters with an underscore
|
||||||
and set to lowercase
|
and set to lowercase. Ignore the replacement char (�) used for header
|
||||||
|
padding.
|
||||||
"""
|
"""
|
||||||
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
|
line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters
|
||||||
return re.sub(r'\b \b', '_', line).lower()
|
line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
def _normalize_rows(table: str) -> List[str]:
|
def _normalize_rows(table: str) -> List[str]:
|
||||||
"""
|
"""
|
||||||
returns a List of row strings. Header is snake-cased
|
returns a List of row strings. Header is snake-cased
|
||||||
"""
|
"""
|
||||||
result = []
|
result: List[str] = []
|
||||||
for line in table.splitlines():
|
for line in table.splitlines():
|
||||||
# skip blank lines
|
# skip blank lines
|
||||||
if not line.strip():
|
if not line.strip():
|
||||||
@ -243,7 +245,36 @@ def _normalize_rows(table: str) -> List[str]:
|
|||||||
if _is_separator(line):
|
if _is_separator(line):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# data row - remove column separators
|
# header or data row found - remove column separators
|
||||||
|
if not result: # this is the header row
|
||||||
|
# normalize the separator
|
||||||
|
line = line.replace('│', '|')\
|
||||||
|
.replace('┃', '|')\
|
||||||
|
.replace('┆', '|')\
|
||||||
|
.replace('┇', '|')\
|
||||||
|
.replace('┊', '|')\
|
||||||
|
.replace('┋', '|')\
|
||||||
|
.replace('╎', '|')\
|
||||||
|
.replace('╏', '|')\
|
||||||
|
.replace('║', '|')
|
||||||
|
|
||||||
|
# find the number of chars to pad in front of headers that are too
|
||||||
|
# far away from the separator. Replace spaces with unicode char: �
|
||||||
|
# we will remove this char from headers after sparse_table_parse
|
||||||
|
problem_header_pattern = re.compile(r'(?:\| )( +)([^|]+)')
|
||||||
|
problem_headers = problem_header_pattern.findall(line)
|
||||||
|
if problem_headers:
|
||||||
|
for p_header in problem_headers:
|
||||||
|
old_header = p_header[0] + p_header[1]
|
||||||
|
sub_chars = '�' * len(p_header[0])
|
||||||
|
new_header = sub_chars + p_header[1]
|
||||||
|
line = line.replace(old_header, new_header)
|
||||||
|
|
||||||
|
line = line.replace('|', ' ')
|
||||||
|
result.append(_snake_case(line))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# this is a data row
|
||||||
line = line.replace('|', ' ')\
|
line = line.replace('|', ' ')\
|
||||||
.replace('│', ' ')\
|
.replace('│', ' ')\
|
||||||
.replace('┃', ' ')\
|
.replace('┃', ' ')\
|
||||||
@ -256,7 +287,6 @@ def _normalize_rows(table: str) -> List[str]:
|
|||||||
.replace('║', ' ')
|
.replace('║', ' ')
|
||||||
result.append(line)
|
result.append(line)
|
||||||
|
|
||||||
result[0] = _snake_case(result[0])
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@ -266,7 +296,8 @@ def _fixup_headers(table: List[Dict]) -> List[Dict]:
|
|||||||
for row in table:
|
for row in table:
|
||||||
new_row = row.copy()
|
new_row = row.copy()
|
||||||
for k in row:
|
for k in row:
|
||||||
k_new = k
|
# remove replacement character
|
||||||
|
k_new = k.replace('�', '')
|
||||||
# remove consecutive underscores
|
# remove consecutive underscores
|
||||||
k_new = re.sub(r'__+', '_', k_new)
|
k_new = re.sub(r'__+', '_', k_new)
|
||||||
# remove trailing underscores
|
# remove trailing underscores
|
||||||
|
2
man/jc.1
2
man/jc.1
@ -1,4 +1,4 @@
|
|||||||
.TH jc 1 2022-05-25 1.20.0 "JSON Convert"
|
.TH jc 1 2022-05-26 1.20.0 "JSON Convert"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools and file-types
|
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools and file-types
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -344,6 +344,78 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
|
|||||||
|
|
||||||
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
|
def test_asciitable_centered_col_header(self):
|
||||||
|
"""
|
||||||
|
Test 'asciitable' with long centered column header which can break
|
||||||
|
column alignment
|
||||||
|
"""
|
||||||
|
input = '''
|
||||||
|
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
|
||||||
|
| fdc_id | data_type | description | food_category_id | publication_date |
|
||||||
|
+---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+
|
||||||
|
| 167512 | sr_legacy_food | Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough | | 2019-04-01 |
|
||||||
|
| 167513 | sr_legacy_food | Pillsbury, Cinnamon Rolls with Icing, refrigerated dough | | 2019-04-01 |
|
||||||
|
| 167514 | sr_legacy_food | Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry | | 2019-04-01 |
|
||||||
|
| 167515 | sr_legacy_food | George Weston Bakeries, Thomas English Muffins | | 2019-04-01 |
|
||||||
|
| 167516 | sr_legacy_food | Waffles, buttermilk, frozen, ready-to-heat | | 2019-04-01 |
|
||||||
|
| 167517 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, toasted | | 2019-04-01 |
|
||||||
|
| 167518 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, microwaved | | 2019-04-01 |
|
||||||
|
'''
|
||||||
|
|
||||||
|
expected = [
|
||||||
|
{
|
||||||
|
"fdc_id": "167512",
|
||||||
|
"data_type": "sr_legacy_food",
|
||||||
|
"description": "Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough",
|
||||||
|
"food_category_id": None,
|
||||||
|
"publication_date": "2019-04-01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fdc_id": "167513",
|
||||||
|
"data_type": "sr_legacy_food",
|
||||||
|
"description": "Pillsbury, Cinnamon Rolls with Icing, refrigerated dough",
|
||||||
|
"food_category_id": None,
|
||||||
|
"publication_date": "2019-04-01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fdc_id": "167514",
|
||||||
|
"data_type": "sr_legacy_food",
|
||||||
|
"description": "Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry",
|
||||||
|
"food_category_id": None,
|
||||||
|
"publication_date": "2019-04-01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fdc_id": "167515",
|
||||||
|
"data_type": "sr_legacy_food",
|
||||||
|
"description": "George Weston Bakeries, Thomas English Muffins",
|
||||||
|
"food_category_id": None,
|
||||||
|
"publication_date": "2019-04-01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fdc_id": "167516",
|
||||||
|
"data_type": "sr_legacy_food",
|
||||||
|
"description": "Waffles, buttermilk, frozen, ready-to-heat",
|
||||||
|
"food_category_id": None,
|
||||||
|
"publication_date": "2019-04-01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fdc_id": "167517",
|
||||||
|
"data_type": "sr_legacy_food",
|
||||||
|
"description": "Waffle, buttermilk, frozen, ready-to-heat, toasted",
|
||||||
|
"food_category_id": None,
|
||||||
|
"publication_date": "2019-04-01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fdc_id": "167518",
|
||||||
|
"data_type": "sr_legacy_food",
|
||||||
|
"description": "Waffle, buttermilk, frozen, ready-to-heat, microwaved",
|
||||||
|
"food_category_id": None,
|
||||||
|
"publication_date": "2019-04-01"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Reference in New Issue
Block a user