From 0a462978b731770d1290187a502706c2322f5f91 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Wed, 23 Mar 2022 15:08:33 -0700 Subject: [PATCH] fix for special characters in headers --- jc/parsers/asciitable.py | 25 ++++++++++++++++++++-- jc/parsers/asciitable_m.py | 8 ++++++- tests/test_asciitable.py | 43 ++++++++++++++++++++++++++++++++++++++ tests/test_asciitable_m.py | 28 +++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 3 deletions(-) diff --git a/jc/parsers/asciitable.py b/jc/parsers/asciitable.py index 3c21fa60..9346278c 100644 --- a/jc/parsers/asciitable.py +++ b/jc/parsers/asciitable.py @@ -211,7 +211,11 @@ def _is_separator(line: str) -> bool: def _snake_case(line: str) -> str: - """replace spaces between words with an underscore and set to lowercase""" + """ + replace spaces between words and special characters with an underscore + and set to lowercase + """ + line = re.sub(r'[^a-zA-Z0-9 ]', '_', line) return re.sub(r'\b \b', '_', line).lower() @@ -246,6 +250,22 @@ def _normalize_rows(table: str) -> List[str]: return result +def _fixup_headers(table: List[Dict]) -> List[Dict]: + """remove consecutive underscores and any trailing underscores""" + new_table = [] + for row in table: + new_row = row.copy() + for k, v in row.items(): + k_new = k + # remove consecutive underscores + k_new = re.sub(r'__+', '_', k_new) + # remove trailing underscores + k_new = re.sub(r'_+$', '', k_new) + new_row[k_new] = new_row.pop(k) + new_table.append(new_row) + + return new_table + def parse( data: str, raw: bool = False, @@ -273,6 +293,7 @@ def parse( data = _remove_ansi(data) data = _strip(data) data_list = _normalize_rows(data) - raw_output = sparse_table_parse(data_list) + raw_table = sparse_table_parse(data_list) + raw_output = _fixup_headers(raw_table) return raw_output if raw else _process(raw_output) diff --git a/jc/parsers/asciitable_m.py b/jc/parsers/asciitable_m.py index 5f51b819..6ad45640 100644 --- a/jc/parsers/asciitable_m.py +++ b/jc/parsers/asciitable_m.py @@ -222,7 +222,12 @@ def _is_separator(line: str) -> bool: def _snake_case(line: str) -> str: - """replace spaces between words with an underscore and set to lowercase""" + """ + replace spaces between words and special characters with an underscore + and set to lowercase + """ + # must include all column separator characters in regex + line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line) return re.sub(r'\b \b', '_', line).lower() @@ -360,6 +365,7 @@ def _collapse_headers(table: List[List[str]]) -> List[str]: for i, header in enumerate(line): if header: new_header = result[i] + '_' + header + # remove consecutive underscores new_header = re.sub(r'__+', '_', new_header) new_line.append(new_header) else: diff --git a/tests/test_asciitable.py b/tests/test_asciitable.py index a8cdc14f..e8e9967a 100644 --- a/tests/test_asciitable.py +++ b/tests/test_asciitable.py @@ -301,6 +301,49 @@ class MyTests(unittest.TestCase): self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) + def test_asciitable_special_chars_in_header(self): + """ + Test 'asciitable' with a pure ASCII table that has special + characters in the header. These should be converted to underscores + and no trailing or consecutive underscores should end up in the + resulting key names. + """ + input = ''' +Protocol Address Age (min) Hardware Addr Type Interface +Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13 +Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13 +Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17 + ''' + + expected = [ + { + "protocol": "Internet", + "address": "10.12.13.1", + "age_min": "98", + "hardware_addr": "0950.5785.5cd1", + "type": "ARPA", + "interface": "FastEthernet2.13" + }, + { + "protocol": "Internet", + "address": "10.12.13.3", + "age_min": "131", + "hardware_addr": "0150.7685.14d5", + "type": "ARPA", + "interface": "GigabitEthernet2.13" + }, + { + "protocol": "Internet", + "address": "10.12.13.4", + "age_min": "198", + "hardware_addr": "0950.5C8A.5c41", + "type": "ARPA", + "interface": "GigabitEthernet2.17" + } + ] + + self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) + if __name__ == '__main__': unittest.main() diff --git a/tests/test_asciitable_m.py b/tests/test_asciitable_m.py index d2c6f483..0b8b0aa9 100644 --- a/tests/test_asciitable_m.py +++ b/tests/test_asciitable_m.py @@ -242,6 +242,34 @@ class MyTests(unittest.TestCase): self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected) + def test_asciitable_m_special_chars_in_header(self): + """ + Test 'asciitable_m' with a pure ASCII table that has special + characters in the header. These should be converted to underscores + and no trailing or consecutive underscores should end up in the + resulting key names. + """ + input = ''' ++----------+------------+-----------+----------------+-------+--------------------+ +| Protocol | Address | Age (min) | Hardware Addr | Type | Interface | +| | | of int | | | | ++----------+------------+-----------+----------------+-------+--------------------+ +| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 | ++----------+------------+-----------+----------------+-------+--------------------+ + ''' + expected = [ + { + "protocol": "Internet", + "address": "10.12.13.1", + "age_min_of_int": "98", + "hardware_addr": "0950.5785.5cd1", + "type": "ARPA", + "interface": "FastEthernet2.13" + } + ] + + self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected) + def test_asciitable_m_markdown(self): """ Test 'asciitable_m' with a markdown table. Should raise a ParseError