1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-08-10 22:41:51 +02:00

fix for special characters in headers

This commit is contained in:
Kelly Brazil
2022-03-23 15:08:33 -07:00
parent e66a82ff49
commit 0a462978b7
4 changed files with 101 additions and 3 deletions

View File

@@ -211,7 +211,11 @@ def _is_separator(line: str) -> bool:
def _snake_case(line: str) -> str: def _snake_case(line: str) -> str:
"""replace spaces between words with an underscore and set to lowercase""" """
replace spaces between words and special characters with an underscore
and set to lowercase
"""
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
return re.sub(r'\b \b', '_', line).lower() return re.sub(r'\b \b', '_', line).lower()
@@ -246,6 +250,22 @@ def _normalize_rows(table: str) -> List[str]:
return result return result
def _fixup_headers(table: List[Dict]) -> List[Dict]:
"""remove consecutive underscores and any trailing underscores"""
new_table = []
for row in table:
new_row = row.copy()
for k, v in row.items():
k_new = k
# remove consecutive underscores
k_new = re.sub(r'__+', '_', k_new)
# remove trailing underscores
k_new = re.sub(r'_+$', '', k_new)
new_row[k_new] = new_row.pop(k)
new_table.append(new_row)
return new_table
def parse( def parse(
data: str, data: str,
raw: bool = False, raw: bool = False,
@@ -273,6 +293,7 @@ def parse(
data = _remove_ansi(data) data = _remove_ansi(data)
data = _strip(data) data = _strip(data)
data_list = _normalize_rows(data) data_list = _normalize_rows(data)
raw_output = sparse_table_parse(data_list) raw_table = sparse_table_parse(data_list)
raw_output = _fixup_headers(raw_table)
return raw_output if raw else _process(raw_output) return raw_output if raw else _process(raw_output)

View File

@@ -222,7 +222,12 @@ def _is_separator(line: str) -> bool:
def _snake_case(line: str) -> str: def _snake_case(line: str) -> str:
"""replace spaces between words with an underscore and set to lowercase""" """
replace spaces between words and special characters with an underscore
and set to lowercase
"""
# must include all column separator characters in regex
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
return re.sub(r'\b \b', '_', line).lower() return re.sub(r'\b \b', '_', line).lower()
@@ -360,6 +365,7 @@ def _collapse_headers(table: List[List[str]]) -> List[str]:
for i, header in enumerate(line): for i, header in enumerate(line):
if header: if header:
new_header = result[i] + '_' + header new_header = result[i] + '_' + header
# remove consecutive underscores
new_header = re.sub(r'__+', '_', new_header) new_header = re.sub(r'__+', '_', new_header)
new_line.append(new_header) new_line.append(new_header)
else: else:

View File

@@ -301,6 +301,49 @@ class MyTests(unittest.TestCase):
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
def test_asciitable_special_chars_in_header(self):
"""
Test 'asciitable' with a pure ASCII table that has special
characters in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names.
"""
input = '''
Protocol Address Age (min) Hardware Addr Type Interface
Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13
Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13
Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
'''
expected = [
{
"protocol": "Internet",
"address": "10.12.13.1",
"age_min": "98",
"hardware_addr": "0950.5785.5cd1",
"type": "ARPA",
"interface": "FastEthernet2.13"
},
{
"protocol": "Internet",
"address": "10.12.13.3",
"age_min": "131",
"hardware_addr": "0150.7685.14d5",
"type": "ARPA",
"interface": "GigabitEthernet2.13"
},
{
"protocol": "Internet",
"address": "10.12.13.4",
"age_min": "198",
"hardware_addr": "0950.5C8A.5c41",
"type": "ARPA",
"interface": "GigabitEthernet2.17"
}
]
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -242,6 +242,34 @@ class MyTests(unittest.TestCase):
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected) self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
def test_asciitable_m_special_chars_in_header(self):
"""
Test 'asciitable_m' with a pure ASCII table that has special
characters in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names.
"""
input = '''
+----------+------------+-----------+----------------+-------+--------------------+
| Protocol | Address | Age (min) | Hardware Addr | Type | Interface |
| | | of int | | | |
+----------+------------+-----------+----------------+-------+--------------------+
| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 |
+----------+------------+-----------+----------------+-------+--------------------+
'''
expected = [
{
"protocol": "Internet",
"address": "10.12.13.1",
"age_min_of_int": "98",
"hardware_addr": "0950.5785.5cd1",
"type": "ARPA",
"interface": "FastEthernet2.13"
}
]
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
def test_asciitable_m_markdown(self): def test_asciitable_m_markdown(self):
""" """
Test 'asciitable_m' with a markdown table. Should raise a ParseError Test 'asciitable_m' with a markdown table. Should raise a ParseError