fix for special characters in headers

2025-08-10 22:41:51 +02:00 · 2022-03-23 15:08:33 -07:00
parent e66a82ff49
commit 0a462978b7
4 changed files with 101 additions and 3 deletions
--- a/jc/parsers/asciitable.py
+++ b/jc/parsers/asciitable.py
@@ -211,7 +211,11 @@ def _is_separator(line: str) -> bool:
 def _snake_case(line: str) -> str:
-    """replace spaces between words with an underscore and set to lowercase"""
+    """
    replace spaces between words and special characters with an underscore
    and set to lowercase
    """
    line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
    return re.sub(r'\b \b', '_', line).lower()
@@ -246,6 +250,22 @@ def _normalize_rows(table: str) -> List[str]:
    return result
 def _fixup_headers(table: List[Dict]) -> List[Dict]:
    """remove consecutive underscores and any trailing underscores"""
    new_table = []
    for row in table:
        new_row = row.copy()
        for k, v in row.items():
            k_new = k
            # remove consecutive underscores
            k_new = re.sub(r'__+', '_', k_new)
            # remove trailing underscores
            k_new = re.sub(r'_+$', '', k_new)
            new_row[k_new] = new_row.pop(k)
        new_table.append(new_row)
    return new_table
 def parse(
    data: str,
    raw: bool = False,
@@ -273,6 +293,7 @@ def parse(
        data = _remove_ansi(data)
        data = _strip(data)
        data_list = _normalize_rows(data)
-        raw_output = sparse_table_parse(data_list)
+        raw_table = sparse_table_parse(data_list)
        raw_output = _fixup_headers(raw_table)
    return raw_output if raw else _process(raw_output)
--- a/jc/parsers/asciitable_m.py
+++ b/jc/parsers/asciitable_m.py
@@ -222,7 +222,12 @@ def _is_separator(line: str) -> bool:
 def _snake_case(line: str) -> str:
-    """replace spaces between words with an underscore and set to lowercase"""
+    """
    replace spaces between words and special characters with an underscore
    and set to lowercase
    """
    # must include all column separator characters in regex
    line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
    return re.sub(r'\b \b', '_', line).lower()
@@ -360,6 +365,7 @@ def _collapse_headers(table: List[List[str]]) -> List[str]:
        for i, header in enumerate(line):
            if header:
                new_header = result[i] + '_' + header
                # remove consecutive underscores
                new_header = re.sub(r'__+', '_', new_header)
                new_line.append(new_header)
            else:
--- a/tests/test_asciitable.py
+++ b/tests/test_asciitable.py
@@ -301,6 +301,49 @@ class MyTests(unittest.TestCase):
        self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
    def test_asciitable_special_chars_in_header(self):
        """
        Test 'asciitable' with a pure ASCII table that has special
        characters in the header. These should be converted to underscores
        and no trailing or consecutive underscores should end up in the
        resulting key names.
        """
        input = '''
 Protocol  Address     Age (min)  Hardware Addr   Type   Interface
 Internet  10.12.13.1        98   0950.5785.5cd1  ARPA   FastEthernet2.13
 Internet  10.12.13.3       131   0150.7685.14d5  ARPA   GigabitEthernet2.13
 Internet  10.12.13.4       198   0950.5C8A.5c41  ARPA   GigabitEthernet2.17
        '''
        expected = [
            {
                "protocol": "Internet",
                "address": "10.12.13.1",
                "age_min": "98",
                "hardware_addr": "0950.5785.5cd1",
                "type": "ARPA",
                "interface": "FastEthernet2.13"
            },
            {
                "protocol": "Internet",
                "address": "10.12.13.3",
                "age_min": "131",
                "hardware_addr": "0150.7685.14d5",
                "type": "ARPA",
                "interface": "GigabitEthernet2.13"
            },
            {
                "protocol": "Internet",
                "address": "10.12.13.4",
                "age_min": "198",
                "hardware_addr": "0950.5C8A.5c41",
                "type": "ARPA",
                "interface": "GigabitEthernet2.17"
            }
        ]
        self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_asciitable_m.py
+++ b/tests/test_asciitable_m.py
@@ -242,6 +242,34 @@ class MyTests(unittest.TestCase):
        self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
    def test_asciitable_m_special_chars_in_header(self):
        """
        Test 'asciitable_m' with a pure ASCII table that has special
        characters in the header. These should be converted to underscores
        and no trailing or consecutive underscores should end up in the
        resulting key names.
        """
        input = '''
 +----------+------------+-----------+----------------+-------+--------------------+
 | Protocol | Address    | Age (min) | Hardware Addr  | Type  | Interface          |
 |          |            | of int    |                |       |                    |
 +----------+------------+-----------+----------------+-------+--------------------+
 | Internet | 10.12.13.1 |       98  | 0950.5785.5cd1 | ARPA  | FastEthernet2.13   |
 +----------+------------+-----------+----------------+-------+--------------------+
        '''
        expected = [
            {
                "protocol": "Internet",
                "address": "10.12.13.1",
                "age_min_of_int": "98",
                "hardware_addr": "0950.5785.5cd1",
                "type": "ARPA",
                "interface": "FastEthernet2.13"
            }
        ]
        self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
    def test_asciitable_m_markdown(self):
        """
        Test 'asciitable_m' with a markdown table. Should raise a ParseError