mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-07-15 01:24:29 +02:00
preserve keyname case with -r
This commit is contained in:
@ -54,6 +54,9 @@ etc...
|
|||||||
Headers (keys) are converted to snake-case. All values are returned as
|
Headers (keys) are converted to snake-case. All values are returned as
|
||||||
strings, except empty strings, which are converted to None/null.
|
strings, except empty strings, which are converted to None/null.
|
||||||
|
|
||||||
|
> Note: To preserve the case of the keys use the `-r` cli option or
|
||||||
|
> `raw=True` argument in `parse()`.
|
||||||
|
|
||||||
Usage (cli):
|
Usage (cli):
|
||||||
|
|
||||||
$ cat table.txt | jc --asciitable
|
$ cat table.txt | jc --asciitable
|
||||||
@ -122,7 +125,7 @@ from jc.parsers.universal import sparse_table_parse
|
|||||||
|
|
||||||
class info():
|
class info():
|
||||||
"""Provides parser metadata (version, author, etc.)"""
|
"""Provides parser metadata (version, author, etc.)"""
|
||||||
version = '1.1'
|
version = '1.2'
|
||||||
description = 'ASCII and Unicode table parser'
|
description = 'ASCII and Unicode table parser'
|
||||||
author = 'Kelly Brazil'
|
author = 'Kelly Brazil'
|
||||||
author_email = 'kellyjonbrazil@gmail.com'
|
author_email = 'kellyjonbrazil@gmail.com'
|
||||||
@ -144,6 +147,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
|
|||||||
|
|
||||||
List of Dictionaries. Structured to conform to the schema.
|
List of Dictionaries. Structured to conform to the schema.
|
||||||
"""
|
"""
|
||||||
|
# normalize keys: convert to lowercase
|
||||||
|
for item in proc_data:
|
||||||
|
for key in item.copy():
|
||||||
|
k_new = key.lower()
|
||||||
|
item[k_new] = item.pop(key)
|
||||||
|
|
||||||
return proc_data
|
return proc_data
|
||||||
|
|
||||||
|
|
||||||
@ -232,7 +241,7 @@ def _snake_case(line: str) -> str:
|
|||||||
padding.
|
padding.
|
||||||
"""
|
"""
|
||||||
line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters
|
line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters
|
||||||
line = re.sub(r'\b \b', '_', line).lower() # spaces betwee words
|
line = re.sub(r'\b \b', '_', line) # spaces between words
|
||||||
return line
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,6 +24,9 @@ Headers (keys) are converted to snake-case and newlines between multi-line
|
|||||||
headers are joined with an underscore. All values are returned as strings,
|
headers are joined with an underscore. All values are returned as strings,
|
||||||
except empty strings, which are converted to None/null.
|
except empty strings, which are converted to None/null.
|
||||||
|
|
||||||
|
> Note: To preserve the case of the keys use the `-r` cli option or
|
||||||
|
> `raw=True` argument in `parse()`.
|
||||||
|
|
||||||
> Note: table column separator characters (e.g. `|`) cannot be present
|
> Note: table column separator characters (e.g. `|`) cannot be present
|
||||||
> inside the cell data. If detected, a warning message will be printed to
|
> inside the cell data. If detected, a warning message will be printed to
|
||||||
> `STDERR` and the line will be skipped. The warning message can be
|
> `STDERR` and the line will be skipped. The warning message can be
|
||||||
@ -107,7 +110,7 @@ from jc.exceptions import ParseError
|
|||||||
|
|
||||||
class info():
|
class info():
|
||||||
"""Provides parser metadata (version, author, etc.)"""
|
"""Provides parser metadata (version, author, etc.)"""
|
||||||
version = '1.1'
|
version = '1.2'
|
||||||
description = 'multi-line ASCII and Unicode table parser'
|
description = 'multi-line ASCII and Unicode table parser'
|
||||||
author = 'Kelly Brazil'
|
author = 'Kelly Brazil'
|
||||||
author_email = 'kellyjonbrazil@gmail.com'
|
author_email = 'kellyjonbrazil@gmail.com'
|
||||||
@ -129,6 +132,12 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
|
|||||||
|
|
||||||
List of Dictionaries. Structured to conform to the schema.
|
List of Dictionaries. Structured to conform to the schema.
|
||||||
"""
|
"""
|
||||||
|
# normalize keys: convert to lowercase
|
||||||
|
for item in proc_data:
|
||||||
|
for key in item.copy():
|
||||||
|
k_new = key.lower()
|
||||||
|
item[k_new] = item.pop(key)
|
||||||
|
|
||||||
return proc_data
|
return proc_data
|
||||||
|
|
||||||
|
|
||||||
@ -238,7 +247,7 @@ def _snake_case(line: str) -> str:
|
|||||||
"""
|
"""
|
||||||
# must include all column separator characters in regex
|
# must include all column separator characters in regex
|
||||||
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
|
line = re.sub(r'[^a-zA-Z0-9 |│┃┆┇┊┋╎╏║]', '_', line)
|
||||||
return re.sub(r'\b \b', '_', line).lower()
|
return re.sub(r'\b \b', '_', line)
|
||||||
|
|
||||||
|
|
||||||
def _fixup_separators(line: str) -> str:
|
def _fixup_separators(line: str) -> str:
|
||||||
|
@ -344,6 +344,49 @@ Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
|
|||||||
|
|
||||||
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
self.assertEqual(jc.parsers.asciitable.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
|
def test_asciitable_no_lower_raw(self):
|
||||||
|
"""
|
||||||
|
Test 'asciitable' with a pure ASCII table that has special
|
||||||
|
characters and mixed case in the header. These should be converted to underscores
|
||||||
|
and no trailing or consecutive underscores should end up in the
|
||||||
|
resulting key names. Using `raw` in this test to preserve case. (no lower)
|
||||||
|
"""
|
||||||
|
input = '''
|
||||||
|
Protocol Address Age (min) Hardware Addr Type Interface
|
||||||
|
Internet 10.12.13.1 98 0950.5785.5cd1 ARPA FastEthernet2.13
|
||||||
|
Internet 10.12.13.3 131 0150.7685.14d5 ARPA GigabitEthernet2.13
|
||||||
|
Internet 10.12.13.4 198 0950.5C8A.5c41 ARPA GigabitEthernet2.17
|
||||||
|
'''
|
||||||
|
|
||||||
|
expected = [
|
||||||
|
{
|
||||||
|
"Protocol": "Internet",
|
||||||
|
"Address": "10.12.13.1",
|
||||||
|
"Age_min": "98",
|
||||||
|
"Hardware_Addr": "0950.5785.5cd1",
|
||||||
|
"Type": "ARPA",
|
||||||
|
"Interface": "FastEthernet2.13"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Protocol": "Internet",
|
||||||
|
"Address": "10.12.13.3",
|
||||||
|
"Age_min": "131",
|
||||||
|
"Hardware_Addr": "0150.7685.14d5",
|
||||||
|
"Type": "ARPA",
|
||||||
|
"Interface": "GigabitEthernet2.13"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Protocol": "Internet",
|
||||||
|
"Address": "10.12.13.4",
|
||||||
|
"Age_min": "198",
|
||||||
|
"Hardware_Addr": "0950.5C8A.5c41",
|
||||||
|
"Type": "ARPA",
|
||||||
|
"Interface": "GigabitEthernet2.17"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
self.assertEqual(jc.parsers.asciitable.parse(input, raw=True, quiet=True), expected)
|
||||||
|
|
||||||
def test_asciitable_centered_col_header(self):
|
def test_asciitable_centered_col_header(self):
|
||||||
"""
|
"""
|
||||||
Test 'asciitable' with long centered column header which can break
|
Test 'asciitable' with long centered column header which can break
|
||||||
|
@ -270,6 +270,34 @@ class MyTests(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
|
self.assertEqual(jc.parsers.asciitable_m.parse(input, quiet=True), expected)
|
||||||
|
|
||||||
|
def test_asciitable_no_lower_raw(self):
|
||||||
|
"""
|
||||||
|
Test 'asciitable_m' with a pure ASCII table that has special
|
||||||
|
characters and mixed case in the header. These should be converted to underscores
|
||||||
|
and no trailing or consecutive underscores should end up in the
|
||||||
|
resulting key names. Using `raw` in this test to preserve case. (no lower)
|
||||||
|
"""
|
||||||
|
input = '''
|
||||||
|
+----------+------------+-----------+----------------+-------+--------------------+
|
||||||
|
| Protocol | Address | Age (min) | Hardware Addr | Type | Interface |
|
||||||
|
| | | of int | | | |
|
||||||
|
+----------+------------+-----------+----------------+-------+--------------------+
|
||||||
|
| Internet | 10.12.13.1 | 98 | 0950.5785.5cd1 | ARPA | FastEthernet2.13 |
|
||||||
|
+----------+------------+-----------+----------------+-------+--------------------+
|
||||||
|
'''
|
||||||
|
expected = [
|
||||||
|
{
|
||||||
|
"Protocol": "Internet",
|
||||||
|
"Address": "10.12.13.1",
|
||||||
|
"Age_min_of_int": "98",
|
||||||
|
"Hardware_Addr": "0950.5785.5cd1",
|
||||||
|
"Type": "ARPA",
|
||||||
|
"Interface": "FastEthernet2.13"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
self.assertEqual(jc.parsers.asciitable_m.parse(input, raw=True, quiet=True), expected)
|
||||||
|
|
||||||
def test_asciitable_m_sep_char_in_cell(self):
|
def test_asciitable_m_sep_char_in_cell(self):
|
||||||
"""
|
"""
|
||||||
Test 'asciitable_m' with a column separator character inside the data
|
Test 'asciitable_m' with a column separator character inside the data
|
||||||
|
Reference in New Issue
Block a user