1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-07-15 01:24:29 +02:00

preserve keyname case with -r

This commit is contained in:
Kelly Brazil
2022-06-15 11:12:43 -07:00
parent 247c43278c
commit 4f148469d7
4 changed files with 93 additions and 4 deletions

View File

@ -54,6 +54,9 @@ etc...
Headers (keys) are converted to snake-case. All values are returned as
strings, except empty strings, which are converted to None/null.
> Note: To preserve the case of the keys use the `-r` cli option or
> `raw=True` argument in `parse()`.
Usage (cli):
$ cat table.txt | jc --asciitable
@ -122,7 +125,7 @@ from jc.parsers.universal import sparse_table_parse
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.1'
version = '1.2'
description = 'ASCII and Unicode table parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
@ -144,6 +147,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
List of Dictionaries. Structured to conform to the schema.
"""
# normalize keys: convert to lowercase
for item in proc_data:
for key in item.copy():
k_new = key.lower()
item[k_new] = item.pop(key)
return proc_data
@ -232,7 +241,7 @@ def _snake_case(line: str) -> str:
padding.
"""
line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters
line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words
line = re.sub(r'\b \b', '_', line) # spaces between words
return line

View File

@ -24,6 +24,9 @@ Headers (keys) are converted to snake-case and newlines between multi-line
headers are joined with an underscore. All values are returned as strings,
except empty strings, which are converted to None/null.
> Note: To preserve the case of the keys use the `-r` cli option or
> `raw=True` argument in `parse()`.
> Note: table column separator characters (e.g. `|`) cannot be present
> inside the cell data. If detected, a warning message will be printed to
> `STDERR` and the line will be skipped. The warning message can be
@ -107,7 +110,7 @@ from jc.exceptions import ParseError
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.1'
version = '1.2'
description = 'multi-line ASCII and Unicode table parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
@ -129,6 +132,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
List of Dictionaries. Structured to conform to the schema.
"""
# normalize keys: convert to lowercase
for item in proc_data:
for key in item.copy():
k_new = key.lower()
item[k_new] = item.pop(key)
return proc_data
@ -238,7 +247,7 @@ def _snake_case(line: str) -> str:
"""
# must include all column separator characters in regex
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
return re.sub(r'\b \b', '_', line).lower()
return re.sub(r'\b \b', '_', line)
def _fixup_separators(line: str) -> str:

View File

@ -344,6 +344,49 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
def test_asciitable_no_lower_raw(self):
"""
Test 'asciitable' with a pure ASCII table that has special
characters and mixed case in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names. Using `raw` in this test to preserve case. (no lower)
"""
input = '''
Protocol Address Age (min) Hardware Addr Type Interface
Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13
Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13
Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
'''
expected = [
{
"Protocol": "Internet",
"Address": "10.12.13.1",
"Age_min": "98",
"Hardware_Addr": "0950.5785.5cd1",
"Type": "ARPA",
"Interface": "FastEthernet2.13"
},
{
"Protocol": "Internet",
"Address": "10.12.13.3",
"Age_min": "131",
"Hardware_Addr": "0150.7685.14d5",
"Type": "ARPA",
"Interface": "GigabitEthernet2.13"
},
{
"Protocol": "Internet",
"Address": "10.12.13.4",
"Age_min": "198",
"Hardware_Addr": "0950.5C8A.5c41",
"Type": "ARPA",
"Interface": "GigabitEthernet2.17"
}
]
self.assertEqual(jc.parsers.asciitable.parse(input, raw=True, quiet=True), expected)
def test_asciitable_centered_col_header(self):
"""
Test 'asciitable' with long centered column header which can break

View File

@ -270,6 +270,34 @@ class MyTests(unittest.TestCase):
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
def test_asciitable_no_lower_raw(self):
"""
Test 'asciitable_m' with a pure ASCII table that has special
characters and mixed case in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names. Using `raw` in this test to preserve case. (no lower)
"""
input = '''
+----------+------------+-----------+----------------+-------+--------------------+
| Protocol | Address | Age (min) | Hardware Addr | Type | Interface |
| | | of int | | | |
+----------+------------+-----------+----------------+-------+--------------------+
| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 |
+----------+------------+-----------+----------------+-------+--------------------+
'''
expected = [
{
"Protocol": "Internet",
"Address": "10.12.13.1",
"Age_min_of_int": "98",
"Hardware_Addr": "0950.5785.5cd1",
"Type": "ARPA",
"Interface": "FastEthernet2.13"
}
]
self.assertEqual(jc.parsers.asciitable_m.parse(input, raw=True, quiet=True), expected)
def test_asciitable_m_sep_char_in_cell(self):
"""
Test 'asciitable_m' with a column separator character inside the data