preserve keyname case with -r

2025-07-15 01:24:29 +02:00 · 2022-06-15 11:12:43 -07:00
parent 247c43278c
commit 4f148469d7
4 changed files with 93 additions and 4 deletions
--- a/jc/parsers/asciitable.py
+++ b/jc/parsers/asciitable.py
@ -54,6 +54,9 @@ etc...
 Headers (keys) are converted to snake-case. All values are returned as
 strings, except empty strings, which are converted to None/null.
 > Note: To preserve the case of the keys use the `-r` cli option or
 > `raw=True` argument in `parse()`.
 Usage (cli):
    $ cat table.txt | jc --asciitable
@ -122,7 +125,7 @@ from jc.parsers.universal import sparse_table_parse
 class info():
    """Provides parser metadata (version, author, etc.)"""
-    version = '1.1'
+    version = '1.2'
    description = 'ASCII and Unicode table parser'
    author = 'Kelly Brazil'
    author_email = 'kellyjonbrazil@gmail.com'
@ -144,6 +147,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
        List of Dictionaries. Structured to conform to the schema.
    """
    # normalize keys: convert to lowercase
    for item in proc_data:
        for key in item.copy():
            k_new = key.lower()
            item[k_new] = item.pop(key)
    return proc_data
@ -232,7 +241,7 @@ def _snake_case(line: str) -> str:
    padding.
    """
    line = re.sub(r'[^a-zA-Z0-9� ]', '_', line)  # special characters
-    line = re.sub(r'\b \b', '_', line).lower()   # spaces betwee words
+    line = re.sub(r'\b \b', '_', line)           # spaces between words
    return line
--- a/jc/parsers/asciitable_m.py
+++ b/jc/parsers/asciitable_m.py
@ -24,6 +24,9 @@ Headers (keys) are converted to snake-case and newlines between multi-line
 headers are joined with an underscore. All values are returned as strings,
 except empty strings, which are converted to None/null.
 > Note: To preserve the case of the keys use the `-r` cli option or
 > `raw=True` argument in `parse()`.
 > Note: table column separator characters (e.g. `|`) cannot be present
 > inside the cell data. If detected, a warning message will be printed to
 > `STDERR` and the line will be skipped. The warning message can be
@ -107,7 +110,7 @@ from jc.exceptions import ParseError
 class info():
    """Provides parser metadata (version, author, etc.)"""
-    version = '1.1'
+    version = '1.2'
    description = 'multi-line ASCII and Unicode table parser'
    author = 'Kelly Brazil'
    author_email = 'kellyjonbrazil@gmail.com'
@ -129,6 +132,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
        List of Dictionaries. Structured to conform to the schema.
    """
    # normalize keys: convert to lowercase
    for item in proc_data:
        for key in item.copy():
            k_new = key.lower()
            item[k_new] = item.pop(key)
    return proc_data
@ -238,7 +247,7 @@ def _snake_case(line: str) -> str:
    """
    # must include all column separator characters in regex
    line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
-    return re.sub(r'\b \b', '_', line).lower()
+    return re.sub(r'\b \b', '_', line)
 def _fixup_separators(line: str) -> str:
--- a/tests/test_asciitable.py
+++ b/tests/test_asciitable.py
@ -344,6 +344,49 @@ Internet  10.12.13.4       198   0950.5C8A.5c41  ARPA   GigabitEthernet2.17
        self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
    def test_asciitable_no_lower_raw(self):
        """
        Test 'asciitable' with a pure ASCII table that has special
        characters and mixed case in the header. These should be converted to underscores
        and no trailing or consecutive underscores should end up in the
        resulting key names. Using `raw` in this test to preserve case. (no lower)
        """
        input = '''
 Protocol  Address     Age (min)  Hardware Addr   Type   Interface
 Internet  10.12.13.1        98   0950.5785.5cd1  ARPA   FastEthernet2.13
 Internet  10.12.13.3       131   0150.7685.14d5  ARPA   GigabitEthernet2.13
 Internet  10.12.13.4       198   0950.5C8A.5c41  ARPA   GigabitEthernet2.17
        '''
        expected = [
            {
                "Protocol": "Internet",
                "Address": "10.12.13.1",
                "Age_min": "98",
                "Hardware_Addr": "0950.5785.5cd1",
                "Type": "ARPA",
                "Interface": "FastEthernet2.13"
            },
            {
                "Protocol": "Internet",
                "Address": "10.12.13.3",
                "Age_min": "131",
                "Hardware_Addr": "0150.7685.14d5",
                "Type": "ARPA",
                "Interface": "GigabitEthernet2.13"
            },
            {
                "Protocol": "Internet",
                "Address": "10.12.13.4",
                "Age_min": "198",
                "Hardware_Addr": "0950.5C8A.5c41",
                "Type": "ARPA",
                "Interface": "GigabitEthernet2.17"
            }
        ]
        self.assertEqual(jc.parsers.asciitable.parse(input, raw=True, quiet=True), expected)
    def test_asciitable_centered_col_header(self):
        """
        Test 'asciitable' with long centered column header which can break
--- a/tests/test_asciitable_m.py
+++ b/tests/test_asciitable_m.py
@ -270,6 +270,34 @@ class MyTests(unittest.TestCase):
        self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
    def test_asciitable_no_lower_raw(self):
        """
        Test 'asciitable_m' with a pure ASCII table that has special
        characters and mixed case in the header. These should be converted to underscores
        and no trailing or consecutive underscores should end up in the
        resulting key names. Using `raw` in this test to preserve case. (no lower)
        """
        input = '''
 +----------+------------+-----------+----------------+-------+--------------------+
 | Protocol | Address    | Age (min) | Hardware Addr  | Type  | Interface          |
 |          |            | of int    |                |       |                    |
 +----------+------------+-----------+----------------+-------+--------------------+
 | Internet | 10.12.13.1 |       98  | 0950.5785.5cd1 | ARPA  | FastEthernet2.13   |
 +----------+------------+-----------+----------------+-------+--------------------+
        '''
        expected = [
            {
                "Protocol": "Internet",
                "Address": "10.12.13.1",
                "Age_min_of_int": "98",
                "Hardware_Addr": "0950.5785.5cd1",
                "Type": "ARPA",
                "Interface": "FastEthernet2.13"
            }
        ]
        self.assertEqual(jc.parsers.asciitable_m.parse(input, raw=True, quiet=True), expected)
    def test_asciitable_m_sep_char_in_cell(self):
        """
        Test 'asciitable_m' with a column separator character inside the data