mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-08-10 22:41:51 +02:00
fix for special characters in headers
This commit is contained in:
@@ -211,7 +211,11 @@ def _is_separator(line: str) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _snake_case(line: str) -> str:
|
def _snake_case(line: str) -> str:
|
||||||
"""replace spaces between words with an underscore and set to lowercase"""
|
"""
|
||||||
|
replace spaces between words and special characters with an underscore
|
||||||
|
and set to lowercase
|
||||||
|
"""
|
||||||
|
line = re.sub(r'[^a-zA-Z0-9 ]', '_', line)
|
||||||
return re.sub(r'\b \b', '_', line).lower()
|
return re.sub(r'\b \b', '_', line).lower()
|
||||||
|
|
||||||
|
|
||||||
@@ -246,6 +250,22 @@ def _normalize_rows(table: str) -> List[str]:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _fixup_headers(table: List[Dict]) -> List[Dict]:
|
||||||
|
"""remove consecutive underscores and any trailing underscores"""
|
||||||
|
new_table = []
|
||||||
|
for row in table:
|
||||||
|
new_row = row.copy()
|
||||||
|
for k, v in row.items():
|
||||||
|
k_new = k
|
||||||
|
# remove consecutive underscores
|
||||||
|
k_new = re.sub(r'__+', '_', k_new)
|
||||||
|
# remove trailing underscores
|
||||||
|
k_new = re.sub(r'_+$', '', k_new)
|
||||||
|
new_row[k_new] = new_row.pop(k)
|
||||||
|
new_table.append(new_row)
|
||||||
|
|
||||||
|
return new_table
|
||||||
|
|
||||||
def parse(
|
def parse(
|
||||||
data: str,
|
data: str,
|
||||||
raw: bool = False,
|
raw: bool = False,
|
||||||
@@ -273,6 +293,7 @@ def parse(
|
|||||||
data = _remove_ansi(data)
|
data = _remove_ansi(data)
|
||||||
data = _strip(data)
|
data = _strip(data)
|
||||||
data_list = _normalize_rows(data)
|
data_list = _normalize_rows(data)
|
||||||
raw_output = sparse_table_parse(data_list)
|
raw_table = sparse_table_parse(data_list)
|
||||||
|
raw_output = _fixup_headers(raw_table)
|
||||||
|
|
||||||
return raw_output if raw else _process(raw_output)
|
return raw_output if raw else _process(raw_output)
|
||||||
|
@@ -222,7 +222,12 @@ def _is_separator(line: str) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _snake_case(line: str) -> str:
|
def _snake_case(line: str) -> str:
|
||||||
"""replace spaces between words with an underscore and set to lowercase"""
|
"""
|
||||||
|
replace spaces between words and special characters with an underscore
|
||||||
|
and set to lowercase
|
||||||
|
"""
|
||||||
|
# must include all column separator characters in regex
|
||||||
|
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
|
||||||
return re.sub(r'\b \b', '_', line).lower()
|
return re.sub(r'\b \b', '_', line).lower()
|
||||||
|
|
||||||
|
|
||||||
@@ -360,6 +365,7 @@ def _collapse_headers(table: List[List[str]]) -> List[str]:
|
|||||||
for i, header in enumerate(line):
|
for i, header in enumerate(line):
|
||||||
if header:
|
if header:
|
||||||
new_header = result[i] + '_' + header
|
new_header = result[i] + '_' + header
|
||||||
|
# remove consecutive underscores
|
||||||
new_header = re.sub(r'__+', '_', new_header)
|
new_header = re.sub(r'__+', '_', new_header)
|
||||||
new_line.append(new_header)
|
new_line.append(new_header)
|
||||||
else:
|
else:
|
||||||
|
@@ -301,6 +301,49 @@ class MyTests(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
|
def test_asciitable_special_chars_in_header(self):
|
||||||
|
"""
|
||||||
|
Test 'asciitable' with a pure ASCII table that has special
|
||||||
|
characters in the header. These should be converted to underscores
|
||||||
|
and no trailing or consecutive underscores should end up in the
|
||||||
|
resulting key names.
|
||||||
|
"""
|
||||||
|
input = '''
|
||||||
|
Protocol Address Age (min) Hardware Addr Type Interface
|
||||||
|
Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13
|
||||||
|
Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13
|
||||||
|
Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
|
||||||
|
'''
|
||||||
|
|
||||||
|
expected = [
|
||||||
|
{
|
||||||
|
"protocol": "Internet",
|
||||||
|
"address": "10.12.13.1",
|
||||||
|
"age_min": "98",
|
||||||
|
"hardware_addr": "0950.5785.5cd1",
|
||||||
|
"type": "ARPA",
|
||||||
|
"interface": "FastEthernet2.13"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"protocol": "Internet",
|
||||||
|
"address": "10.12.13.3",
|
||||||
|
"age_min": "131",
|
||||||
|
"hardware_addr": "0150.7685.14d5",
|
||||||
|
"type": "ARPA",
|
||||||
|
"interface": "GigabitEthernet2.13"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"protocol": "Internet",
|
||||||
|
"address": "10.12.13.4",
|
||||||
|
"age_min": "198",
|
||||||
|
"hardware_addr": "0950.5C8A.5c41",
|
||||||
|
"type": "ARPA",
|
||||||
|
"interface": "GigabitEthernet2.17"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -242,6 +242,34 @@ class MyTests(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
|
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
|
def test_asciitable_m_special_chars_in_header(self):
|
||||||
|
"""
|
||||||
|
Test 'asciitable_m' with a pure ASCII table that has special
|
||||||
|
characters in the header. These should be converted to underscores
|
||||||
|
and no trailing or consecutive underscores should end up in the
|
||||||
|
resulting key names.
|
||||||
|
"""
|
||||||
|
input = '''
|
||||||
|
+----------+------------+-----------+----------------+-------+--------------------+
|
||||||
|
| Protocol | Address | Age (min) | Hardware Addr | Type | Interface |
|
||||||
|
| | | of int | | | |
|
||||||
|
+----------+------------+-----------+----------------+-------+--------------------+
|
||||||
|
| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 |
|
||||||
|
+----------+------------+-----------+----------------+-------+--------------------+
|
||||||
|
'''
|
||||||
|
expected = [
|
||||||
|
{
|
||||||
|
"protocol": "Internet",
|
||||||
|
"address": "10.12.13.1",
|
||||||
|
"age_min_of_int": "98",
|
||||||
|
"hardware_addr": "0950.5785.5cd1",
|
||||||
|
"type": "ARPA",
|
||||||
|
"interface": "FastEthernet2.13"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
def test_asciitable_m_markdown(self):
|
def test_asciitable_m_markdown(self):
|
||||||
"""
|
"""
|
||||||
Test 'asciitable_m' with a markdown table. Should raise a ParseError
|
Test 'asciitable_m' with a markdown table. Should raise a ParseError
|
||||||
|
Reference in New Issue
Block a user