1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-07-15 01:24:29 +02:00

preserve keyname case with -r

This commit is contained in:
Kelly Brazil
2022-06-15 11:12:43 -07:00
parent 247c43278c
commit 4f148469d7
4 changed files with 93 additions and 4 deletions

View File

@ -54,6 +54,9 @@ etc...
Headers (keys) are converted to snake-case. All values are returned as Headers (keys) are converted to snake-case. All values are returned as
strings, except empty strings, which are converted to None/null. strings, except empty strings, which are converted to None/null.
> Note: To preserve the case of the keys use the `-r` cli option or
> `raw=True` argument in `parse()`.
Usage (cli): Usage (cli):
$ cat table.txt | jc --asciitable $ cat table.txt | jc --asciitable
@ -122,7 +125,7 @@ from jc.parsers.universal import sparse_table_parse
class info(): class info():
"""Provides parser metadata (version, author, etc.)""" """Provides parser metadata (version, author, etc.)"""
version = '1.1' version = '1.2'
description = 'ASCII and Unicode table parser' description = 'ASCII and Unicode table parser'
author = 'Kelly Brazil' author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com' author_email = 'kellyjonbrazil@gmail.com'
@ -144,6 +147,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
List of Dictionaries. Structured to conform to the schema. List of Dictionaries. Structured to conform to the schema.
""" """
# normalize keys: convert to lowercase
for item in proc_data:
for key in item.copy():
k_new = key.lower()
item[k_new] = item.pop(key)
return proc_data return proc_data
@ -232,7 +241,7 @@ def _snake_case(line: str) -> str:
padding. padding.
""" """
line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters
line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words line = re.sub(r'\b \b', '_', line) # spaces between words
return line return line

View File

@ -24,6 +24,9 @@ Headers (keys) are converted to snake-case and newlines between multi-line
headers are joined with an underscore. All values are returned as strings, headers are joined with an underscore. All values are returned as strings,
except empty strings, which are converted to None/null. except empty strings, which are converted to None/null.
> Note: To preserve the case of the keys use the `-r` cli option or
> `raw=True` argument in `parse()`.
> Note: table column separator characters (e.g. `|`) cannot be present > Note: table column separator characters (e.g. `|`) cannot be present
> inside the cell data. If detected, a warning message will be printed to > inside the cell data. If detected, a warning message will be printed to
> `STDERR` and the line will be skipped. The warning message can be > `STDERR` and the line will be skipped. The warning message can be
@ -107,7 +110,7 @@ from jc.exceptions import ParseError
class info(): class info():
"""Provides parser metadata (version, author, etc.)""" """Provides parser metadata (version, author, etc.)"""
version = '1.1' version = '1.2'
description = 'multi-line ASCII and Unicode table parser' description = 'multi-line ASCII and Unicode table parser'
author = 'Kelly Brazil' author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com' author_email = 'kellyjonbrazil@gmail.com'
@ -129,6 +132,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
List of Dictionaries. Structured to conform to the schema. List of Dictionaries. Structured to conform to the schema.
""" """
# normalize keys: convert to lowercase
for item in proc_data:
for key in item.copy():
k_new = key.lower()
item[k_new] = item.pop(key)
return proc_data return proc_data
@ -238,7 +247,7 @@ def _snake_case(line: str) -> str:
""" """
# must include all column separator characters in regex # must include all column separator characters in regex
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line) line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
return re.sub(r'\b \b', '_', line).lower() return re.sub(r'\b \b', '_', line)
def _fixup_separators(line: str) -> str: def _fixup_separators(line: str) -> str:

View File

@ -344,6 +344,49 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected) self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
def test_asciitable_no_lower_raw(self):
"""
Test 'asciitable' with a pure ASCII table that has special
characters and mixed case in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names. Using `raw` in this test to preserve case. (no lower)
"""
input = '''
Protocol Address Age (min) Hardware Addr Type Interface
Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13
Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13
Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
'''
expected = [
{
"Protocol": "Internet",
"Address": "10.12.13.1",
"Age_min": "98",
"Hardware_Addr": "0950.5785.5cd1",
"Type": "ARPA",
"Interface": "FastEthernet2.13"
},
{
"Protocol": "Internet",
"Address": "10.12.13.3",
"Age_min": "131",
"Hardware_Addr": "0150.7685.14d5",
"Type": "ARPA",
"Interface": "GigabitEthernet2.13"
},
{
"Protocol": "Internet",
"Address": "10.12.13.4",
"Age_min": "198",
"Hardware_Addr": "0950.5C8A.5c41",
"Type": "ARPA",
"Interface": "GigabitEthernet2.17"
}
]
self.assertEqual(jc.parsers.asciitable.parse(input, raw=True, quiet=True), expected)
def test_asciitable_centered_col_header(self): def test_asciitable_centered_col_header(self):
""" """
Test 'asciitable' with long centered column header which can break Test 'asciitable' with long centered column header which can break

View File

@ -270,6 +270,34 @@ class MyTests(unittest.TestCase):
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected) self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
def test_asciitable_no_lower_raw(self):
"""
Test 'asciitable_m' with a pure ASCII table that has special
characters and mixed case in the header. These should be converted to underscores
and no trailing or consecutive underscores should end up in the
resulting key names. Using `raw` in this test to preserve case. (no lower)
"""
input = '''
+----------+------------+-----------+----------------+-------+--------------------+
| Protocol | Address | Age (min) | Hardware Addr | Type | Interface |
| | | of int | | | |
+----------+------------+-----------+----------------+-------+--------------------+
| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 |
+----------+------------+-----------+----------------+-------+--------------------+
'''
expected = [
{
"Protocol": "Internet",
"Address": "10.12.13.1",
"Age_min_of_int": "98",
"Hardware_Addr": "0950.5785.5cd1",
"Type": "ARPA",
"Interface": "FastEthernet2.13"
}
]
self.assertEqual(jc.parsers.asciitable_m.parse(input, raw=True, quiet=True), expected)
def test_asciitable_m_sep_char_in_cell(self): def test_asciitable_m_sep_char_in_cell(self):
""" """
Test 'asciitable_m' with a column separator character inside the data Test 'asciitable_m' with a column separator character inside the data