diff --git a/CHANGELOG b/CHANGELOG index 0b7b8190..5a4f0cab 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,11 +1,12 @@ jc changelog -20220525 v1.20.0 (in progress) +20220526 v1.20.0 (in progress) - Add YAML output option with `-y` - Add `top -b` standard and streaming parsers tested on linux - Add `plugin_parser_count`, `standard_parser_count`, and `streaming_parser_count` keys to `jc -a` output -- Fix pip-show parser for packages with a multi-line license field +- Fix `pip-show` parser for packages with a multi-line license field +- Fix ASCII Table parser for cases where centered headers cause mis-aligned fields 20220513 v1.19.0 - Add `chage --list` command parser tested on linux diff --git a/docs/parsers/asciitable.md b/docs/parsers/asciitable.md index c3c129b0..e367ad63 100644 --- a/docs/parsers/asciitable.md +++ b/docs/parsers/asciitable.md @@ -136,4 +136,4 @@ Returns: ### Parser Information Compatibility: linux, darwin, cygwin, win32, aix, freebsd -Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com) +Version 1.1 by Kelly Brazil (kellyjonbrazil@gmail.com) diff --git a/jc/parsers/asciitable.py b/jc/parsers/asciitable.py index 339329bb..6f46f976 100644 --- a/jc/parsers/asciitable.py +++ b/jc/parsers/asciitable.py @@ -117,7 +117,7 @@ from jc.parsers.universal import sparse_table_parse class info(): """Provides parser metadata (version, author, etc.)""" - version = '1.0' + version = '1.1' description = 'ASCII and Unicode table parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' @@ -222,18 +222,20 @@ def _is_separator(line: str) -> bool: def _snake_case(line: str) -> str: """ - replace spaces between words and special characters with an underscore - and set to lowercase + Replace spaces between words and special characters with an underscore + and set to lowercase. Ignore the replacement char (�) used for header + padding. """ - line = re.sub(r'[^a-zA-Z0-9 ]', '_', line) - return re.sub(r'\b \b', '_', line).lower() + line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters + line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words + return line def _normalize_rows(table: str) -> List[str]: """ returns a List of row strings. Header is snake-cased """ - result = [] + result: List[str] = [] for line in table.splitlines(): # skip blank lines if not line.strip(): @@ -243,7 +245,36 @@ def _normalize_rows(table: str) -> List[str]: if _is_separator(line): continue - # data row - remove column separators + # header or data row found - remove column separators + if not result: # this is the header row + # normalize the separator + line = line.replace('│', '|')\ + .replace('┃', '|')\ + .replace('┆', '|')\ + .replace('┇', '|')\ + .replace('┊', '|')\ + .replace('┋', '|')\ + .replace('╎', '|')\ + .replace('╏', '|')\ + .replace('║', '|') + + # find the number of chars to pad in front of headers that are too + # far away from the separator. Replace spaces with unicode char: � + # we will remove this char from headers after sparse_table_parse + problem_header_pattern = re.compile(r'(?:\| )( +)([^|]+)') + problem_headers = problem_header_pattern.findall(line) + if problem_headers: + for p_header in problem_headers: + old_header = p_header[0] + p_header[1] + sub_chars = '�' * len(p_header[0]) + new_header = sub_chars + p_header[1] + line = line.replace(old_header, new_header) + + line = line.replace('|', ' ') + result.append(_snake_case(line)) + continue + + # this is a data row line = line.replace('|', ' ')\ .replace('│', ' ')\ .replace('┃', ' ')\ @@ -256,7 +287,6 @@ def _normalize_rows(table: str) -> List[str]: .replace('║', ' ') result.append(line) - result[0] = _snake_case(result[0]) return result @@ -266,7 +296,8 @@ def _fixup_headers(table: List[Dict]) -> List[Dict]: for row in table: new_row = row.copy() for k in row: - k_new = k + # remove replacement character + k_new = k.replace('�', '') # remove consecutive underscores k_new = re.sub(r'__+', '_', k_new) # remove trailing underscores diff --git a/man/jc.1 b/man/jc.1 index 8a769491..36763ec2 100644 --- a/man/jc.1 +++ b/man/jc.1 @@ -1,4 +1,4 @@ -.TH jc 1 2022-05-25 1.20.0 "JSON Convert" +.TH jc 1 2022-05-26 1.20.0 "JSON Convert" .SH NAME \fBjc\fP \- JSON Convert JSONifies the output of many CLI tools and file-types .SH SYNOPSIS diff --git a/tests/test_asciitable.py b/tests/test_asciitable.py index e8e9967a..e481f1d7 100644 --- a/tests/test_asciitable.py +++ b/tests/test_asciitable.py @@ -344,6 +344,78 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17 self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) + def test_asciitable_centered_col_header(self): + """ + Test 'asciitable' with long centered column header which can break + column alignment + """ + input = ''' + +---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+ + | fdc_id | data_type | description | food_category_id | publication_date | + +---------+--------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+------------------+ + | 167512 | sr_legacy_food | Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough | | 2019-04-01 | + | 167513 | sr_legacy_food | Pillsbury, Cinnamon Rolls with Icing, refrigerated dough | | 2019-04-01 | + | 167514 | sr_legacy_food | Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry | | 2019-04-01 | + | 167515 | sr_legacy_food | George Weston Bakeries, Thomas English Muffins | | 2019-04-01 | + | 167516 | sr_legacy_food | Waffles, buttermilk, frozen, ready-to-heat | | 2019-04-01 | + | 167517 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, toasted | | 2019-04-01 | + | 167518 | sr_legacy_food | Waffle, buttermilk, frozen, ready-to-heat, microwaved | | 2019-04-01 | + ''' + + expected = [ + { + "fdc_id": "167512", + "data_type": "sr_legacy_food", + "description": "Pillsbury Golden Layer Buttermilk Biscuits, Artificial Flavor, refrigerated dough", + "food_category_id": None, + "publication_date": "2019-04-01" + }, + { + "fdc_id": "167513", + "data_type": "sr_legacy_food", + "description": "Pillsbury, Cinnamon Rolls with Icing, refrigerated dough", + "food_category_id": None, + "publication_date": "2019-04-01" + }, + { + "fdc_id": "167514", + "data_type": "sr_legacy_food", + "description": "Kraft Foods, Shake N Bake Original Recipe, Coating for Pork, dry", + "food_category_id": None, + "publication_date": "2019-04-01" + }, + { + "fdc_id": "167515", + "data_type": "sr_legacy_food", + "description": "George Weston Bakeries, Thomas English Muffins", + "food_category_id": None, + "publication_date": "2019-04-01" + }, + { + "fdc_id": "167516", + "data_type": "sr_legacy_food", + "description": "Waffles, buttermilk, frozen, ready-to-heat", + "food_category_id": None, + "publication_date": "2019-04-01" + }, + { + "fdc_id": "167517", + "data_type": "sr_legacy_food", + "description": "Waffle, buttermilk, frozen, ready-to-heat, toasted", + "food_category_id": None, + "publication_date": "2019-04-01" + }, + { + "fdc_id": "167518", + "data_type": "sr_legacy_food", + "description": "Waffle, buttermilk, frozen, ready-to-heat, microwaved", + "food_category_id": None, + "publication_date": "2019-04-01" + } + ] + + self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) + if __name__ == '__main__': unittest.main()