From 4f148469d76d8ec870bc219bf88978bb4e4320ad Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Wed, 15 Jun 2022 11:12:43 -0700 Subject: [PATCH] preserve keyname case with -r --- jc/parsers/asciitable.py | 13 ++++++++++-- jc/parsers/asciitable_m.py | 13 ++++++++++-- tests/test_asciitable.py | 43 ++++++++++++++++++++++++++++++++++++++ tests/test_asciitable_m.py | 28 +++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 4 deletions(-) diff --git a/jc/parsers/asciitable.py b/jc/parsers/asciitable.py index 5d4317b7..8e3f2f1d 100644 --- a/jc/parsers/asciitable.py +++ b/jc/parsers/asciitable.py @@ -54,6 +54,9 @@ etc... Headers (keys) are converted to snake-case. All values are returned as strings, except empty strings, which are converted to None/null. +> Note: To preserve the case of the keys use the `-r` cli option or +> `raw=True` argument in `parse()`. + Usage (cli): $ cat table.txt | jc --asciitable @@ -122,7 +125,7 @@ from jc.parsers.universal import sparse_table_parse class info(): """Provides parser metadata (version, author, etc.)""" - version = '1.1' + version = '1.2' description = 'ASCII and Unicode table parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' @@ -144,6 +147,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]: List of Dictionaries. Structured to conform to the schema. """ + # normalize keys: convert to lowercase + for item in proc_data: + for key in item.copy(): + k_new = key.lower() + item[k_new] = item.pop(key) + return proc_data @@ -232,7 +241,7 @@ def _snake_case(line: str) -> str: padding. """ line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters - line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words + line = re.sub(r'\b \b', '_', line) # spaces between words return line diff --git a/jc/parsers/asciitable_m.py b/jc/parsers/asciitable_m.py index d85bfa14..52069965 100644 --- a/jc/parsers/asciitable_m.py +++ b/jc/parsers/asciitable_m.py @@ -24,6 +24,9 @@ Headers (keys) are converted to snake-case and newlines between multi-line headers are joined with an underscore. All values are returned as strings, except empty strings, which are converted to None/null. +> Note: To preserve the case of the keys use the `-r` cli option or +> `raw=True` argument in `parse()`. + > Note: table column separator characters (e.g. `|`) cannot be present > inside the cell data. If detected, a warning message will be printed to > `STDERR` and the line will be skipped. The warning message can be @@ -107,7 +110,7 @@ from jc.exceptions import ParseError class info(): """Provides parser metadata (version, author, etc.)""" - version = '1.1' + version = '1.2' description = 'multi-line ASCII and Unicode table parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' @@ -129,6 +132,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]: List of Dictionaries. Structured to conform to the schema. """ + # normalize keys: convert to lowercase + for item in proc_data: + for key in item.copy(): + k_new = key.lower() + item[k_new] = item.pop(key) + return proc_data @@ -238,7 +247,7 @@ def _snake_case(line: str) -> str: """ # must include all column separator characters in regex line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line) - return re.sub(r'\b \b', '_', line).lower() + return re.sub(r'\b \b', '_', line) def _fixup_separators(line: str) -> str: diff --git a/tests/test_asciitable.py b/tests/test_asciitable.py index e481f1d7..64f74879 100644 --- a/tests/test_asciitable.py +++ b/tests/test_asciitable.py @@ -344,6 +344,49 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17 self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) + def test_asciitable_no_lower_raw(self): + """ + Test 'asciitable' with a pure ASCII table that has special + characters and mixed case in the header. These should be converted to underscores + and no trailing or consecutive underscores should end up in the + resulting key names. Using `raw` in this test to preserve case. (no lower) + """ + input = ''' +Protocol Address Age (min) Hardware Addr Type Interface +Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13 +Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13 +Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17 + ''' + + expected = [ + { + "Protocol": "Internet", + "Address": "10.12.13.1", + "Age_min": "98", + "Hardware_Addr": "0950.5785.5cd1", + "Type": "ARPA", + "Interface": "FastEthernet2.13" + }, + { + "Protocol": "Internet", + "Address": "10.12.13.3", + "Age_min": "131", + "Hardware_Addr": "0150.7685.14d5", + "Type": "ARPA", + "Interface": "GigabitEthernet2.13" + }, + { + "Protocol": "Internet", + "Address": "10.12.13.4", + "Age_min": "198", + "Hardware_Addr": "0950.5C8A.5c41", + "Type": "ARPA", + "Interface": "GigabitEthernet2.17" + } + ] + + self.assertEqual(jc.parsers.asciitable.parse(input, raw=True, quiet=True), expected) + def test_asciitable_centered_col_header(self): """ Test 'asciitable' with long centered column header which can break diff --git a/tests/test_asciitable_m.py b/tests/test_asciitable_m.py index 90248c0c..9a29945c 100644 --- a/tests/test_asciitable_m.py +++ b/tests/test_asciitable_m.py @@ -270,6 +270,34 @@ class MyTests(unittest.TestCase): self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected) + def test_asciitable_no_lower_raw(self): + """ + Test 'asciitable_m' with a pure ASCII table that has special + characters and mixed case in the header. These should be converted to underscores + and no trailing or consecutive underscores should end up in the + resulting key names. Using `raw` in this test to preserve case. (no lower) + """ + input = ''' ++----------+------------+-----------+----------------+-------+--------------------+ +| Protocol | Address | Age (min) | Hardware Addr | Type | Interface | +| | | of int | | | | ++----------+------------+-----------+----------------+-------+--------------------+ +| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 | ++----------+------------+-----------+----------------+-------+--------------------+ + ''' + expected = [ + { + "Protocol": "Internet", + "Address": "10.12.13.1", + "Age_min_of_int": "98", + "Hardware_Addr": "0950.5785.5cd1", + "Type": "ARPA", + "Interface": "FastEthernet2.13" + } + ] + + self.assertEqual(jc.parsers.asciitable_m.parse(input, raw=True, quiet=True), expected) + def test_asciitable_m_sep_char_in_cell(self): """ Test 'asciitable_m' with a column separator character inside the data