mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-08-08 22:36:48 +02:00
fix for special characters in headers
This commit is contained in:
@ -211,7 +211,11 @@ def _is_separator(line: str) -> bool:
|
||||
|
||||
|
||||
def _snake_case(line: str) -> str:
|
||||
"""replace spaces between words with an underscore and set to lowercase"""
|
||||
"""
|
||||
replace spaces between words and special characters with an underscore
|
||||
and set to lowercase
|
||||
"""
|
||||
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
|
||||
return re.sub(r'\b \b', '_', line).lower()
|
||||
|
||||
|
||||
@ -246,6 +250,22 @@ def _normalize_rows(table: str) -> List[str]:
|
||||
return result
|
||||
|
||||
|
||||
def _fixup_headers(table: List[Dict]) -> List[Dict]:
|
||||
"""remove consecutive underscores and any trailing underscores"""
|
||||
new_table = []
|
||||
for row in table:
|
||||
new_row = row.copy()
|
||||
for k, v in row.items():
|
||||
k_new = k
|
||||
# remove consecutive underscores
|
||||
k_new = re.sub(r'__+', '_', k_new)
|
||||
# remove trailing underscores
|
||||
k_new = re.sub(r'_+$', '', k_new)
|
||||
new_row[k_new] = new_row.pop(k)
|
||||
new_table.append(new_row)
|
||||
|
||||
return new_table
|
||||
|
||||
def parse(
|
||||
data: str,
|
||||
raw: bool = False,
|
||||
@ -273,6 +293,7 @@ def parse(
|
||||
data = _remove_ansi(data)
|
||||
data = _strip(data)
|
||||
data_list = _normalize_rows(data)
|
||||
raw_output = sparse_table_parse(data_list)
|
||||
raw_table = sparse_table_parse(data_list)
|
||||
raw_output = _fixup_headers(raw_table)
|
||||
|
||||
return raw_output if raw else _process(raw_output)
|
||||
|
@ -222,7 +222,12 @@ def _is_separator(line: str) -> bool:
|
||||
|
||||
|
||||
def _snake_case(line: str) -> str:
|
||||
"""replace spaces between words with an underscore and set to lowercase"""
|
||||
"""
|
||||
replace spaces between words and special characters with an underscore
|
||||
and set to lowercase
|
||||
"""
|
||||
# must include all column separator characters in regex
|
||||
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
|
||||
return re.sub(r'\b \b', '_', line).lower()
|
||||
|
||||
|
||||
@ -360,6 +365,7 @@ def _collapse_headers(table: List[List[str]]) -> List[str]:
|
||||
for i, header in enumerate(line):
|
||||
if header:
|
||||
new_header = result[i] + '_' + header
|
||||
# remove consecutive underscores
|
||||
new_header = re.sub(r'__+', '_', new_header)
|
||||
new_line.append(new_header)
|
||||
else:
|
||||
|
@ -301,6 +301,49 @@ class MyTests(unittest.TestCase):
|
||||
|
||||
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||
|
||||
def test_asciitable_special_chars_in_header(self):
|
||||
"""
|
||||
Test 'asciitable' with a pure ASCII table that has special
|
||||
characters in the header. These should be converted to underscores
|
||||
and no trailing or consecutive underscores should end up in the
|
||||
resulting key names.
|
||||
"""
|
||||
input = '''
|
||||
Protocol Address Age (min) Hardware Addr Type Interface
|
||||
Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13
|
||||
Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13
|
||||
Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
|
||||
'''
|
||||
|
||||
expected = [
|
||||
{
|
||||
"protocol": "Internet",
|
||||
"address": "10.12.13.1",
|
||||
"age_min": "98",
|
||||
"hardware_addr": "0950.5785.5cd1",
|
||||
"type": "ARPA",
|
||||
"interface": "FastEthernet2.13"
|
||||
},
|
||||
{
|
||||
"protocol": "Internet",
|
||||
"address": "10.12.13.3",
|
||||
"age_min": "131",
|
||||
"hardware_addr": "0150.7685.14d5",
|
||||
"type": "ARPA",
|
||||
"interface": "GigabitEthernet2.13"
|
||||
},
|
||||
{
|
||||
"protocol": "Internet",
|
||||
"address": "10.12.13.4",
|
||||
"age_min": "198",
|
||||
"hardware_addr": "0950.5C8A.5c41",
|
||||
"type": "ARPA",
|
||||
"interface": "GigabitEthernet2.17"
|
||||
}
|
||||
]
|
||||
|
||||
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -242,6 +242,34 @@ class MyTests(unittest.TestCase):
|
||||
|
||||
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
|
||||
|
||||
def test_asciitable_m_special_chars_in_header(self):
|
||||
"""
|
||||
Test 'asciitable_m' with a pure ASCII table that has special
|
||||
characters in the header. These should be converted to underscores
|
||||
and no trailing or consecutive underscores should end up in the
|
||||
resulting key names.
|
||||
"""
|
||||
input = '''
|
||||
+----------+------------+-----------+----------------+-------+--------------------+
|
||||
| Protocol | Address | Age (min) | Hardware Addr | Type | Interface |
|
||||
| | | of int | | | |
|
||||
+----------+------------+-----------+----------------+-------+--------------------+
|
||||
| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 |
|
||||
+----------+------------+-----------+----------------+-------+--------------------+
|
||||
'''
|
||||
expected = [
|
||||
{
|
||||
"protocol": "Internet",
|
||||
"address": "10.12.13.1",
|
||||
"age_min_of_int": "98",
|
||||
"hardware_addr": "0950.5785.5cd1",
|
||||
"type": "ARPA",
|
||||
"interface": "FastEthernet2.13"
|
||||
}
|
||||
]
|
||||
|
||||
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
|
||||
|
||||
def test_asciitable_m_markdown(self):
|
||||
"""
|
||||
Test 'asciitable_m' with a markdown table. Should raise a ParseError
|
||||
|
Reference in New Issue
Block a user