1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-19 00:17:51 +02:00

add asciitable parser

This commit is contained in:
Kelly Brazil
2022-03-22 12:25:24 -07:00
parent b3b140066b
commit 7d95d679bf
2 changed files with 212 additions and 1 deletions

View File

@ -13,7 +13,7 @@ parsers = [
'airport', 'airport',
'airport-s', 'airport-s',
'arp', 'arp',
# 'asciitable', 'asciitable',
'asciitable-m', 'asciitable-m',
'blkid', 'blkid',
'cksum', 'cksum',

211
jc/parsers/asciitable.py Normal file
View File

@ -0,0 +1,211 @@
"""jc - JSON Convert `asciitable` parser
This parser converts ASCII and Unicode text tables with single-line rows.
Column headers must be at least two spaces apart from each other and must
be unique.
For example:
╒══════════╤═════════╤════════╕
│ foo │ bar │ baz │
╞══════════╪═════════╪════════╡
│ good day │ │ 12345 │
├──────────┼─────────┼────────┤
│ hi there │ abc def │ 3.14 │
╘══════════╧═════════╧════════╛
or
+-----------------------------+
| foo bar baz |
+-----------------------------+
| good day 12345 |
| hi there abc def 3.14 |
+-----------------------------+
or
| foo | bar | baz |
|----------|---------|--------|
| good day | | 12345 |
| hi there | abc def | 3.14 |
or
foo bar baz
--------- -------- ------
good day 12345
hi there abc def
etc.
Usage (cli):
$ cat table.txt | jc --asciitable
Usage (module):
import jc
result = jc.parse('asciitable', asciitable_string)
Schema:
[
{
"column_name1": string, # empty string is null
"column_name2": string # empty string is null
}
]
Examples:
$ asciitable | jc --asciitable -p
[]
$ asciitable | jc --asciitable -p -r
[]
"""
import re
from typing import List, Dict
import jc.utils
from jc.parsers.universal import sparse_table_parse
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.0'
description = 'ASCII and Unicode table parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
__version__ = info.version
def _process(proc_data: List[Dict]) -> List[Dict]:
"""
Final processing to conform to the schema.
Parameters:
proc_data: (List of Dictionaries) raw structured data to process
Returns:
List of Dictionaries. Structured to conform to the schema.
"""
return proc_data
def _remove_ansi(string: str) -> str:
ansi_escape = re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]')
return ansi_escape.sub('', string)
def _lstrip(string: str) -> str:
"""find the leftmost non-whitespace character and lstrip to that index"""
lstrip_list = [x for x in string.splitlines() if not len(x.strip()) == 0]
start_points = (len(x) - len(x.lstrip()) for x in lstrip_list)
min_point = min(start_points)
new_lstrip_list = (x[min_point:] for x in lstrip_list)
return '\n'.join(new_lstrip_list)
def _rstrip(string: str) -> str:
"""find the rightmost non-whitespace character and rstrip and pad to that index"""
rstrip_list = [x for x in string.splitlines() if not len(x.strip()) == 0]
end_points = (len(x.rstrip()) for x in rstrip_list)
max_point = max(end_points)
new_rstrip_list = ((x + ' ' * max_point)[:max_point] for x in rstrip_list)
return '\n'.join(new_rstrip_list)
def _strip(string: str) -> str:
string = _lstrip(string)
string = _rstrip(string)
return string
def _is_separator(line: str) -> bool:
"""Returns true if a table separator line is found"""
strip_line = line.strip()
if any((
strip_line.startswith('╒═') and strip_line.endswith('═╕'),
strip_line.startswith('╞═') and strip_line.endswith('═╡'),
strip_line.startswith('╘═') and strip_line.endswith('═╛'),
strip_line.startswith('┌─') and strip_line.endswith('─┐'),
strip_line.startswith('├─') and strip_line.endswith('─┤'),
strip_line.startswith('└─') and strip_line.endswith('─┘'),
strip_line.startswith('+=') and strip_line.endswith('=+'),
strip_line.startswith('+-') and strip_line.endswith('-+'),
strip_line.startswith('--') and strip_line.endswith('--'),
strip_line.startswith('==') and strip_line.endswith('=='),
strip_line.startswith('|-') and strip_line.endswith('-|')
)):
return True
return False
def _snake_case(line: str) -> str:
"""replace spaces between words with an underscore and set to lowercase"""
return re.sub(r'\b \b', '_', line).lower()
def _normalize_rows(table: str) -> List[str]:
"""
Return a List row strings. Header is snake-cased
"""
result = []
for line in table.splitlines():
# skip blank lines
if not line.strip():
continue
# skip separators
if _is_separator(line):
continue
# data row - remove column separators
line = line.replace('', ' ').replace('|', ' ')
result.append(line)
result[0] = _snake_case(result[0])
return result
def _parse_pretty(table: List[str]) -> List[Dict[str, str]]:
return sparse_table_parse(table)
def parse(
data: str,
raw: bool = False,
quiet: bool = False
) -> List[Dict]:
"""
Main text parsing function
Parameters:
data: (string) text data to parse
raw: (boolean) unprocessed output if True
quiet: (boolean) suppress warning messages if True
Returns:
List of Dictionaries. Raw or processed structured data.
"""
jc.utils.compatibility(__name__, info.compatible, quiet)
jc.utils.input_type_check(data)
raw_output: List = []
if jc.utils.has_data(data):
data = _remove_ansi(data)
data = _strip(data)
data_list = _normalize_rows(data)
raw_output = _parse_pretty(data_list)
return raw_output if raw else _process(raw_output)