preserve keyname case with -r

2025-07-15 01:24:29 +02:00 · 2022-06-15 11:12:43 -07:00
parent 247c43278c
commit 4f148469d7
4 changed files with 93 additions and 4 deletions
--- a/jc/parsers/asciitable.py
+++ b/jc/parsers/asciitable.py
@ -54,6 +54,9 @@ etc...
 Headers (keys) are converted to snake-case. All values are returned as
 strings, except empty strings, which are converted to None/null.

+> Note: To preserve the case of the keys use the `-r` cli option or
+> `raw=True` argument in `parse()`.
+
 Usage (cli):

    $ cat table.txt | jc --asciitable
@ -122,7 +125,7 @@ from jc.parsers.universal import sparse_table_parse

 class info():
    """Provides parser metadata (version, author, etc.)"""
-    version = '1.1'
+    version = '1.2'
    description = 'ASCII and Unicode table parser'
    author = 'Kelly Brazil'
    author_email = 'kellyjonbrazil@gmail.com'
@ -144,6 +147,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:

        List of Dictionaries. Structured to conform to the schema.
    """
+    # normalize keys: convert to lowercase
+    for item in proc_data:
+        for key in item.copy():
+            k_new = key.lower()
+            item[k_new] = item.pop(key)
+
    return proc_data


@ -232,7 +241,7 @@ def _snake_case(line: str) -> str:
    padding.
    """
    line = re.sub(r'[^a-zA-Z0-9� ]', '_', line)  # special characters
-    line = re.sub(r'\b \b', '_', line).lower()   # spaces betwee words
+    line = re.sub(r'\b \b', '_', line)           # spaces between words
    return line


--- a/jc/parsers/asciitable_m.py
+++ b/jc/parsers/asciitable_m.py
@ -24,6 +24,9 @@ Headers (keys) are converted to snake-case and newlines between multi-line
 headers are joined with an underscore. All values are returned as strings,
 except empty strings, which are converted to None/null.

+> Note: To preserve the case of the keys use the `-r` cli option or
+> `raw=True` argument in `parse()`.
+
 > Note: table column separator characters (e.g. `|`) cannot be present
 > inside the cell data. If detected, a warning message will be printed to
 > `STDERR` and the line will be skipped. The warning message can be
@ -107,7 +110,7 @@ from jc.exceptions import ParseError

 class info():
    """Provides parser metadata (version, author, etc.)"""
-    version = '1.1'
+    version = '1.2'
    description = 'multi-line ASCII and Unicode table parser'
    author = 'Kelly Brazil'
    author_email = 'kellyjonbrazil@gmail.com'
@ -129,6 +132,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:

        List of Dictionaries. Structured to conform to the schema.
    """
+    # normalize keys: convert to lowercase
+    for item in proc_data:
+        for key in item.copy():
+            k_new = key.lower()
+            item[k_new] = item.pop(key)
+
    return proc_data


@ -238,7 +247,7 @@ def _snake_case(line: str) -> str:
    """
    # must include all column separator characters in regex
    line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
-    return re.sub(r'\b \b', '_', line).lower()
+    return re.sub(r'\b \b', '_', line)


 def _fixup_separators(line: str) -> str:
--- a/tests/test_asciitable.py
+++ b/tests/test_asciitable.py
@ -344,6 +344,49 @@ Internet  10.12.13.4       198   0950.5C8A.5c41  ARPA   GigabitEthernet2.17

        self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)

+    def test_asciitable_no_lower_raw(self):
+        """
+        Test 'asciitable' with a pure ASCII table that has special
+        characters and mixed case in the header. These should be converted to underscores
+        and no trailing or consecutive underscores should end up in the
+        resulting key names. Using `raw` in this test to preserve case. (no lower)
+        """
+        input = '''
+Protocol  Address     Age (min)  Hardware Addr   Type   Interface
+Internet  10.12.13.1        98   0950.5785.5cd1  ARPA   FastEthernet2.13
+Internet  10.12.13.3       131   0150.7685.14d5  ARPA   GigabitEthernet2.13
+Internet  10.12.13.4       198   0950.5C8A.5c41  ARPA   GigabitEthernet2.17
+        '''
+
+        expected = [
+            {
+                "Protocol": "Internet",
+                "Address": "10.12.13.1",
+                "Age_min": "98",
+                "Hardware_Addr": "0950.5785.5cd1",
+                "Type": "ARPA",
+                "Interface": "FastEthernet2.13"
+            },
+            {
+                "Protocol": "Internet",
+                "Address": "10.12.13.3",
+                "Age_min": "131",
+                "Hardware_Addr": "0150.7685.14d5",
+                "Type": "ARPA",
+                "Interface": "GigabitEthernet2.13"
+            },
+            {
+                "Protocol": "Internet",
+                "Address": "10.12.13.4",
+                "Age_min": "198",
+                "Hardware_Addr": "0950.5C8A.5c41",
+                "Type": "ARPA",
+                "Interface": "GigabitEthernet2.17"
+            }
+        ]
+
+        self.assertEqual(jc.parsers.asciitable.parse(input, raw=True, quiet=True), expected)
+
    def test_asciitable_centered_col_header(self):
        """
        Test 'asciitable' with long centered column header which can break
--- a/tests/test_asciitable_m.py
+++ b/tests/test_asciitable_m.py
@ -270,6 +270,34 @@ class MyTests(unittest.TestCase):

        self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)

+    def test_asciitable_no_lower_raw(self):
+        """
+        Test 'asciitable_m' with a pure ASCII table that has special
+        characters and mixed case in the header. These should be converted to underscores
+        and no trailing or consecutive underscores should end up in the
+        resulting key names. Using `raw` in this test to preserve case. (no lower)
+        """
+        input = '''
+----------+------------+-----------+----------------+-------+--------------------+
+| Protocol | Address    | Age (min) | Hardware Addr  | Type  | Interface          |
+|          |            | of int    |                |       |                    |
+----------+------------+-----------+----------------+-------+--------------------+
+| Internet | 10.12.13.1 |       98  | 0950.5785.5cd1 | ARPA  | FastEthernet2.13   |
+----------+------------+-----------+----------------+-------+--------------------+
+        '''
+        expected = [
+            {
+                "Protocol": "Internet",
+                "Address": "10.12.13.1",
+                "Age_min_of_int": "98",
+                "Hardware_Addr": "0950.5785.5cd1",
+                "Type": "ARPA",
+                "Interface": "FastEthernet2.13"
+            }
+        ]
+
+        self.assertEqual(jc.parsers.asciitable_m.parse(input, raw=True, quiet=True), expected)
+
    def test_asciitable_m_sep_char_in_cell(self):
        """
        Test 'asciitable_m' with a column separator character inside the data