1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-08-08 22:36:48 +02:00

fix for special characters in headers

This commit is contained in:
Kelly Brazil
2022-03-23 15:08:33 -07:00
parent e66a82ff49
commit 0a462978b7
4 changed files with 101 additions and 3 deletions

View File

@ -211,7 +211,11 @@ def _is_separator(line: str) -> bool:
def _snake_case(line: str) -> str:
"""replace spaces between words with an underscore and set to lowercase"""
"""
replace spaces between words and special characters with an underscore
and set to lowercase
"""
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
return re.sub(r'\b \b', '_', line).lower()
@ -246,6 +250,22 @@ def _normalize_rows(table: str) -> List[str]:
return result
def _fixup_headers(table: List[Dict]) -> List[Dict]:
"""remove consecutive underscores and any trailing underscores"""
new_table = []
for row in table:
new_row = row.copy()
for k, v in row.items():
k_new = k
# remove consecutive underscores
k_new = re.sub(r'__+', '_', k_new)
# remove trailing underscores
k_new = re.sub(r'_+$', '', k_new)
new_row[k_new] = new_row.pop(k)
new_table.append(new_row)
return new_table
def parse(
data: str,
raw: bool = False,
@ -273,6 +293,7 @@ def parse(
data = _remove_ansi(data)
data = _strip(data)
data_list = _normalize_rows(data)
raw_output = sparse_table_parse(data_list)
raw_table = sparse_table_parse(data_list)
raw_output = _fixup_headers(raw_table)
return raw_output if raw else _process(raw_output)

View File

@ -222,7 +222,12 @@ def _is_separator(line: str) -> bool:
def _snake_case(line: str) -> str:
"""replace spaces between words with an underscore and set to lowercase"""
"""
replace spaces between words and special characters with an underscore
and set to lowercase
"""
# must include all column separator characters in regex
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
return re.sub(r'\b \b', '_', line).lower()
@ -360,6 +365,7 @@ def _collapse_headers(table: List[List[str]]) -> List[str]:
for i, header in enumerate(line):
if header:
new_header = result[i] + '_' + header
# remove consecutive underscores
new_header = re.sub(r'__+', '_', new_header)
new_line.append(new_header)
else:

View File

@ -301,6 +301,49 @@ class MyTests(unittest.TestCase):
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
def test_asciitable_special_chars_in_header(self):
"""
Test 'asciitable' with a pure ASCII table that has special
characters in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names.
"""
input = '''
Protocol Address Age (min) Hardware Addr Type Interface
Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13
Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13
Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
'''
expected = [
{
"protocol": "Internet",
"address": "10.12.13.1",
"age_min": "98",
"hardware_addr": "0950.5785.5cd1",
"type": "ARPA",
"interface": "FastEthernet2.13"
},
{
"protocol": "Internet",
"address": "10.12.13.3",
"age_min": "131",
"hardware_addr": "0150.7685.14d5",
"type": "ARPA",
"interface": "GigabitEthernet2.13"
},
{
"protocol": "Internet",
"address": "10.12.13.4",
"age_min": "198",
"hardware_addr": "0950.5C8A.5c41",
"type": "ARPA",
"interface": "GigabitEthernet2.17"
}
]
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
if __name__ == '__main__':
unittest.main()

View File

@ -242,6 +242,34 @@ class MyTests(unittest.TestCase):
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
def test_asciitable_m_special_chars_in_header(self):
"""
Test 'asciitable_m' with a pure ASCII table that has special
characters in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names.
"""
input = '''
+----------+------------+-----------+----------------+-------+--------------------+
| Protocol | Address | Age (min) | Hardware Addr | Type | Interface |
| | | of int | | | |
+----------+------------+-----------+----------------+-------+--------------------+
| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 |
+----------+------------+-----------+----------------+-------+--------------------+
'''
expected = [
{
"protocol": "Internet",
"address": "10.12.13.1",
"age_min_of_int": "98",
"hardware_addr": "0950.5785.5cd1",
"type": "ARPA",
"interface": "FastEthernet2.13"
}
]
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
def test_asciitable_m_markdown(self):
"""
Test 'asciitable_m' with a markdown table. Should raise a ParseError