jc/jc/parsers/asciitable.py

"""jc - JSON Convert `asciitable` parser

This parser converts ASCII and Unicode text tables with single-line rows.

Column headers must be at least two spaces apart from each other and must
be unique. For best results, column headers should be left-justified. If
column separators are present, then non-left-justified headers will be fixed
automatically.

Row separators are optional and are ignored. Each non-row-separator line is
considered a separate row in the table.

For example:

    ╒══════════╤═════════╤════════╕
    │ foo      │ bar     │ baz    │
    ╞══════════╪═════════╪════════╡
    │ good day │         │ 12345  │
    ├──────────┼─────────┼────────┤
    │ hi there │ abc def │ 3.14   │
    ╘══════════╧═════════╧════════╛

or

    +-----------------------------+
    | foo        bar       baz    |
    +-----------------------------+
    | good day             12345  |
    | hi there   abc def   3.14   |
    +-----------------------------+

or

    | foo      | bar     | baz    |
    |----------|---------|--------|
    | good day |         | 12345  |
    | hi there | abc def | 3.14   |

or

    foo        bar       baz
    ---------  --------  ------
    good day             12345
    hi there   abc def   3.14

or

    foo        bar       baz
    good day             12345
    hi there   abc def   3.14

etc...

Headers (keys) are converted to snake-case. All values are returned as
strings, except empty strings, which are converted to None/null.

> Note: To preserve the case of the keys use the `-r` cli option or
> `raw=True` argument in `parse()`.

Usage (cli):

    $ cat table.txt | jc --asciitable

Usage (module):

    import jc
    result = jc.parse('asciitable', asciitable_string)

Schema:

    [
      {
        "column_name1":     string,    # empty string is null
        "column_name2":     string     # empty string is null
      }
    ]

Examples:

    $ echo '
    >     ╒══════════╤═════════╤════════╕
    >     │ foo      │ bar     │ baz    │
    >     ╞══════════╪═════════╪════════╡
    >     │ good day │         │ 12345  │
    >     ├──────────┼─────────┼────────┤
    >     │ hi there │ abc def │ 3.14   │
    >     ╘══════════╧═════════╧════════╛' | jc --asciitable -p
    [
      {
        "foo": "good day",
        "bar": null,
        "baz": "12345"
      },
      {
        "foo": "hi there",
        "bar": "abc def",
        "baz": "3.14"
      }
    ]

    $ echo '
    >     foo        bar       baz
    >     ---------  --------  ------
    >     good day             12345
    >     hi there   abc def   3.14'  | jc --asciitable -p
    [
      {
        "foo": "good day",
        "bar": null,
        "baz": "12345"
      },
      {
        "foo": "hi there",
        "bar": "abc def",
        "baz": "3.14"
      }
    ]
"""
import re
from functools import lru_cache
from typing import List, Dict
import jc.utils
from jc.parsers.universal import sparse_table_parse


class info():
    """Provides parser metadata (version, author, etc.)"""
    version = '1.2'
    description = 'ASCII and Unicode table parser'
    author = 'Kelly Brazil'
    author_email = 'kellyjonbrazil@gmail.com'
    compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
    tags = ['generic', 'string']


__version__ = info.version


def _process(proc_data: List[Dict]) -> List[Dict]:
    """
    Final processing to conform to the schema.

    Parameters:

        proc_data:   (List of Dictionaries) raw structured data to process

    Returns:

        List of Dictionaries. Structured to conform to the schema.
    """
    # normalize keys: convert to lowercase
    for item in proc_data:
        for key in item.copy():
            k_new = key.lower()
            item[k_new] = item.pop(key)

    return proc_data


def _remove_ansi(string: str) -> str:
    ansi_escape = re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]')
    return ansi_escape.sub('', string)


def _lstrip(string: str) -> str:
    """find the leftmost non-whitespace character and lstrip to that index"""
    lstrip_list = [x for x in string.splitlines() if not len(x.strip()) == 0]
    start_points = (len(x) - len(x.lstrip()) for x in lstrip_list)
    min_point = min(start_points)
    new_lstrip_list = (x[min_point:] for x in lstrip_list)
    return '\n'.join(new_lstrip_list)


def _rstrip(string: str) -> str:
    """find the rightmost non-whitespace character and rstrip and pad to that index"""
    rstrip_list = [x for x in string.splitlines() if not len(x.strip()) == 0]
    end_points = (len(x.rstrip()) for x in rstrip_list)
    max_point = max(end_points)
    new_rstrip_list = ((x + ' ' * max_point)[:max_point] for x in rstrip_list)
    return '\n'.join(new_rstrip_list)


def _strip(string: str) -> str:
    string = _lstrip(string)
    string = _rstrip(string)
    return string

@lru_cache(maxsize=32)
def _is_separator(line: str) -> bool:
    """returns true if a table separator line is found"""
    # This function is cacheable since tables have identical separators
    strip_line = line.strip()
    if any((
        strip_line.startswith('|-') and strip_line.endswith('-|'),
        strip_line.startswith('━━') and strip_line.endswith('━━'),
        strip_line.startswith('──') and strip_line.endswith('──'),
        strip_line.startswith('┄┄') and strip_line.endswith('┄┄'),
        strip_line.startswith('┅┅') and strip_line.endswith('┅┅'),
        strip_line.startswith('┈┈') and strip_line.endswith('┈┈'),
        strip_line.startswith('┉┉') and strip_line.endswith('┉┉'),
        strip_line.startswith('══') and strip_line.endswith('══'),
        strip_line.startswith('--') and strip_line.endswith('--'),
        strip_line.startswith('==') and strip_line.endswith('=='),
        strip_line.startswith('+=') and strip_line.endswith('=+'),
        strip_line.startswith('+-') and strip_line.endswith('-+'),
        strip_line.startswith('╒') and strip_line.endswith('╕'),
        strip_line.startswith('╞') and strip_line.endswith('╡'),
        strip_line.startswith('╘') and strip_line.endswith('╛'),
        strip_line.startswith('┏') and strip_line.endswith('┓'),
        strip_line.startswith('┣') and strip_line.endswith('┫'),
        strip_line.startswith('┗') and strip_line.endswith('┛'),
        strip_line.startswith('┡') and strip_line.endswith('┩'),
        strip_line.startswith('┢') and strip_line.endswith('┪'),
        strip_line.startswith('┟') and strip_line.endswith('┧'),
        strip_line.startswith('┞') and strip_line.endswith('┦'),
        strip_line.startswith('┠') and strip_line.endswith('┨'),
        strip_line.startswith('┝') and strip_line.endswith('┥'),
        strip_line.startswith('┍') and strip_line.endswith('┑'),
        strip_line.startswith('┕') and strip_line.endswith('┙'),
        strip_line.startswith('┎') and strip_line.endswith('┒'),
        strip_line.startswith('┖') and strip_line.endswith('┚'),
        strip_line.startswith('╓') and strip_line.endswith('╖'),
        strip_line.startswith('╟') and strip_line.endswith('╢'),
        strip_line.startswith('╙') and strip_line.endswith('╜'),
        strip_line.startswith('╔') and strip_line.endswith('╗'),
        strip_line.startswith('╠') and strip_line.endswith('╣'),
        strip_line.startswith('╚') and strip_line.endswith('╝'),
        strip_line.startswith('┌') and strip_line.endswith('┐'),
        strip_line.startswith('├') and strip_line.endswith('┤'),
        strip_line.startswith('└') and strip_line.endswith('┘'),
        strip_line.startswith('╭') and strip_line.endswith('╮'),
        strip_line.startswith('╰') and strip_line.endswith('╯')
    )):
        return True
    return False


def _snake_case(line: str) -> str:
    """
    Replace spaces between words and special characters with an underscore.
    Ignore the replacement char (�) used for header padding.
    """
    line = re.sub(r'[^a-zA-Z0-9� ]', '_', line)  # special characters
    line = re.sub(r'\b \b', '_', line)           # spaces between words
    return line


def _normalize_rows(table: str) -> List[str]:
    """
    returns a List of row strings. Header is snake-cased
    """
    result: List[str] = []
    for line in table.splitlines():
        # skip blank lines
        if not line.strip():
            continue

        # skip separators
        if _is_separator(line):
            continue

        # header or data row found - remove column separators
        if not result:  # this is the header row
            # normalize the separator
            line = line.replace('│', '|')\
                       .replace('┃', '|')\
                       .replace('┆', '|')\
                       .replace('┇', '|')\
                       .replace('┊', '|')\
                       .replace('┋', '|')\
                       .replace('╎', '|')\
                       .replace('╏', '|')\
                       .replace('║', '|')

            # find the number of chars to pad in front of headers that are too
            # far away from the separator. Replace spaces with unicode char: �
            # we will remove this char from headers after sparse_table_parse
            problem_header_pattern = re.compile(r'(?:\| )( +)([^|]+)')
            problem_headers = problem_header_pattern.findall(line)
            if problem_headers:
                for p_header in problem_headers:
                    old_header = p_header[0] + p_header[1]
                    sub_chars = '�' * len(p_header[0])
                    new_header = sub_chars + p_header[1]
                    line = line.replace(old_header, new_header)

            line = line.replace('|', ' ')
            result.append(_snake_case(line))
            continue

        # this is a data row
        line = line.replace('|', ' ')\
                   .replace('│', ' ')\
                   .replace('┃', ' ')\
                   .replace('┆', ' ')\
                   .replace('┇', ' ')\
                   .replace('┊', ' ')\
                   .replace('┋', ' ')\
                   .replace('╎', ' ')\
                   .replace('╏', ' ')\
                   .replace('║', ' ')
        result.append(line)

    return result


def _fixup_headers(table: List[Dict]) -> List[Dict]:
    """remove consecutive underscores and any trailing underscores"""
    new_table = []
    for row in table:
        new_row = row.copy()
        for k in row:
            # remove replacement character
            k_new = k.replace('�', '')
            # remove consecutive underscores
            k_new = re.sub(r'__+', '_', k_new)
            # remove trailing underscores
            k_new = re.sub(r'_+$', '', k_new)
            new_row[k_new] = new_row.pop(k)
        new_table.append(new_row)

    return new_table


def parse(
    data: str,
    raw: bool = False,
    quiet: bool = False
) -> List[Dict]:
    """
    Main text parsing function

    Parameters:

        data:        (string)  text data to parse
        raw:         (boolean) unprocessed output if True
        quiet:       (boolean) suppress warning messages if True

    Returns:

        List of Dictionaries. Raw or processed structured data.
    """
    jc.utils.compatibility(__name__, info.compatible, quiet)
    jc.utils.input_type_check(data)

    raw_output: List = []

    if jc.utils.has_data(data):
        data = _remove_ansi(data)
        data = _strip(data)
        data_list = _normalize_rows(data)
        raw_table = sparse_table_parse(data_list)
        raw_output = _fixup_headers(raw_table)

    return raw_output if raw else _process(raw_output)
add asciitable parser 2022-03-22 12:25:24 -07:00			"""jc - JSON Convert `asciitable` parser

			`This parser converts ASCII and Unicode text tables with single-line rows.`

			`Column headers must be at least two spaces apart from each other and must`
doc update 2022-05-26 11:37:16 -07:00			`be unique. For best results, column headers should be left-justified. If`
			`column separators are present, then non-left-justified headers will be fixed`
			`automatically.`

			`Row separators are optional and are ignored. Each non-row-separator line is`
			`considered a separate row in the table.`
add asciitable parser 2022-03-22 12:25:24 -07:00
			`For example:`

			`╒══════════╤═════════╤════════╕`
			`│ foo │ bar │ baz │`
			`╞══════════╪═════════╪════════╡`
			`│ good day │ │ 12345 │`
			`├──────────┼─────────┼────────┤`
			`│ hi there │ abc def │ 3.14 │`
			`╘══════════╧═════════╧════════╛`

doc formatting 2022-03-29 09:35:54 -07:00			`or`
add asciitable parser 2022-03-22 12:25:24 -07:00
			`+-----------------------------+`
			`\| foo bar baz \|`
			`+-----------------------------+`
			`\| good day 12345 \|`
			`\| hi there abc def 3.14 \|`
			`+-----------------------------+`

doc formatting 2022-03-29 09:35:54 -07:00			`or`
add asciitable parser 2022-03-22 12:25:24 -07:00
			`\| foo \| bar \| baz \|`
			`\|----------\|---------\|--------\|`
			`\| good day \| \| 12345 \|`
			`\| hi there \| abc def \| 3.14 \|`

doc formatting 2022-03-29 09:35:54 -07:00			`or`
add asciitable parser 2022-03-22 12:25:24 -07:00
			`foo bar baz`
			`--------- -------- ------`
			`good day 12345`
doc update 2022-03-22 12:42:07 -07:00			`hi there abc def 3.14`
add asciitable parser 2022-03-22 12:25:24 -07:00
formatting 2022-03-29 09:58:44 -07:00			`or`
cache _is_separator function 2022-03-24 11:58:13 -07:00
			`foo bar baz`
			`good day 12345`
			`hi there abc def 3.14`

doc formatting 2022-03-29 09:35:54 -07:00			`etc...`
add asciitable parser 2022-03-22 12:25:24 -07:00
doc formatting 2022-03-29 09:35:54 -07:00			`Headers (keys) are converted to snake-case. All values are returned as`
			`strings, except empty strings, which are converted to None/null.`
doc update 2022-03-22 13:21:10 -07:00
preserve keyname case with -r 2022-06-15 11:12:43 -07:00			> Note: To preserve the case of the keys use the `-r` cli option or
			> `raw=True` argument in `parse()`.

add asciitable parser 2022-03-22 12:25:24 -07:00			`Usage (cli):`

			`$ cat table.txt \| jc --asciitable`

			`Usage (module):`

			`import jc`
			`result = jc.parse('asciitable', asciitable_string)`

			`Schema:`

			`[`
			`{`
			`"column_name1": string, # empty string is null`
			`"column_name2": string # empty string is null`
			`}`
			`]`

			`Examples:`

doc update 2022-03-22 12:35:56 -07:00			`$ echo '`
			`> ╒══════════╤═════════╤════════╕`
			`> │ foo │ bar │ baz │`
			`> ╞══════════╪═════════╪════════╡`
			`> │ good day │ │ 12345 │`
			`> ├──────────┼─────────┼────────┤`
			`> │ hi there │ abc def │ 3.14 │`
			`> ╘══════════╧═════════╧════════╛' \| jc --asciitable -p`
			`[`
			`{`
			`"foo": "good day",`
			`"bar": null,`
			`"baz": "12345"`
			`},`
			`{`
			`"foo": "hi there",`
			`"bar": "abc def",`
			`"baz": "3.14"`
			`}`
			`]`

			`$ echo '`
			`> foo bar baz`
			`> --------- -------- ------`
			`> good day 12345`
doc update 2022-03-22 12:42:07 -07:00			`> hi there abc def 3.14' \| jc --asciitable -p`
doc update 2022-03-22 12:35:56 -07:00			`[`
			`{`
			`"foo": "good day",`
			`"bar": null,`
			`"baz": "12345"`
			`},`
			`{`
			`"foo": "hi there",`
			`"bar": "abc def",`
doc update 2022-03-22 12:42:07 -07:00			`"baz": "3.14"`
doc update 2022-03-22 12:35:56 -07:00			`}`
			`]`
add asciitable parser 2022-03-22 12:25:24 -07:00			`"""`
			`import re`
cache _is_separator function 2022-03-24 11:58:13 -07:00			`from functools import lru_cache`
add asciitable parser 2022-03-22 12:25:24 -07:00			`from typing import List, Dict`
			`import jc.utils`
			`from jc.parsers.universal import sparse_table_parse`


			`class info():`
			`"""Provides parser metadata (version, author, etc.)"""`
preserve keyname case with -r 2022-06-15 11:12:43 -07:00			`version = '1.2'`
add asciitable parser 2022-03-22 12:25:24 -07:00			`description = 'ASCII and Unicode table parser'`
			`author = 'Kelly Brazil'`
			`author_email = 'kellyjonbrazil@gmail.com'`
			`compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']`
add parser metadata tags and category view to help 2022-12-27 13:59:10 -08:00			`tags = ['generic', 'string']`
add asciitable parser 2022-03-22 12:25:24 -07:00

			`__version__ = info.version`


			`def _process(proc_data: List[Dict]) -> List[Dict]:`
			`"""`
			`Final processing to conform to the schema.`

			`Parameters:`

			`proc_data: (List of Dictionaries) raw structured data to process`

			`Returns:`

			`List of Dictionaries. Structured to conform to the schema.`
			`"""`
preserve keyname case with -r 2022-06-15 11:12:43 -07:00			`# normalize keys: convert to lowercase`
			`for item in proc_data:`
			`for key in item.copy():`
			`k_new = key.lower()`
			`item[k_new] = item.pop(key)`

add asciitable parser 2022-03-22 12:25:24 -07:00			`return proc_data`


			`def _remove_ansi(string: str) -> str:`
			`ansi_escape = re.compile(r'(\x9B\|\x1B\[)[0-?][ -\/][@-~]')`
			`return ansi_escape.sub('', string)`


			`def _lstrip(string: str) -> str:`
			`"""find the leftmost non-whitespace character and lstrip to that index"""`
			`lstrip_list = [x for x in string.splitlines() if not len(x.strip()) == 0]`
			`start_points = (len(x) - len(x.lstrip()) for x in lstrip_list)`
			`min_point = min(start_points)`
			`new_lstrip_list = (x[min_point:] for x in lstrip_list)`
			`return '\n'.join(new_lstrip_list)`


			`def _rstrip(string: str) -> str:`
			`"""find the rightmost non-whitespace character and rstrip and pad to that index"""`
			`rstrip_list = [x for x in string.splitlines() if not len(x.strip()) == 0]`
			`end_points = (len(x.rstrip()) for x in rstrip_list)`
			`max_point = max(end_points)`
			`new_rstrip_list = ((x + ' ' * max_point)[:max_point] for x in rstrip_list)`
			`return '\n'.join(new_rstrip_list)`


			`def _strip(string: str) -> str:`
			`string = _lstrip(string)`
			`string = _rstrip(string)`
			`return string`

cache _is_separator function 2022-03-24 11:58:13 -07:00			`@lru_cache(maxsize=32)`
add asciitable parser 2022-03-22 12:25:24 -07:00			`def _is_separator(line: str) -> bool:`
formatting 2022-03-24 09:39:53 -07:00			`"""returns true if a table separator line is found"""`
cache _is_separator function 2022-03-24 11:58:13 -07:00			`# This function is cacheable since tables have identical separators`
add asciitable parser 2022-03-22 12:25:24 -07:00			`strip_line = line.strip()`
			`if any((`
minor optimization by reordering expressions 2022-03-24 12:37:46 -07:00			`strip_line.startswith('\|-') and strip_line.endswith('-\|'),`
			`strip_line.startswith('━━') and strip_line.endswith('━━'),`
			`strip_line.startswith('──') and strip_line.endswith('──'),`
			`strip_line.startswith('┄┄') and strip_line.endswith('┄┄'),`
			`strip_line.startswith('┅┅') and strip_line.endswith('┅┅'),`
			`strip_line.startswith('┈┈') and strip_line.endswith('┈┈'),`
			`strip_line.startswith('┉┉') and strip_line.endswith('┉┉'),`
			`strip_line.startswith('══') and strip_line.endswith('══'),`
			`strip_line.startswith('--') and strip_line.endswith('--'),`
			`strip_line.startswith('==') and strip_line.endswith('=='),`
			`strip_line.startswith('+=') and strip_line.endswith('=+'),`
			`strip_line.startswith('+-') and strip_line.endswith('-+'),`
remove lines from corner detection and add rounded corners 2022-03-24 09:31:12 -07:00			`strip_line.startswith('╒') and strip_line.endswith('╕'),`
			`strip_line.startswith('╞') and strip_line.endswith('╡'),`
			`strip_line.startswith('╘') and strip_line.endswith('╛'),`
			`strip_line.startswith('┏') and strip_line.endswith('┓'),`
			`strip_line.startswith('┣') and strip_line.endswith('┫'),`
			`strip_line.startswith('┗') and strip_line.endswith('┛'),`
			`strip_line.startswith('┡') and strip_line.endswith('┩'),`
			`strip_line.startswith('┢') and strip_line.endswith('┪'),`
			`strip_line.startswith('┟') and strip_line.endswith('┧'),`
			`strip_line.startswith('┞') and strip_line.endswith('┦'),`
			`strip_line.startswith('┠') and strip_line.endswith('┨'),`
			`strip_line.startswith('┝') and strip_line.endswith('┥'),`
			`strip_line.startswith('┍') and strip_line.endswith('┑'),`
			`strip_line.startswith('┕') and strip_line.endswith('┙'),`
			`strip_line.startswith('┎') and strip_line.endswith('┒'),`
			`strip_line.startswith('┖') and strip_line.endswith('┚'),`
			`strip_line.startswith('╓') and strip_line.endswith('╖'),`
			`strip_line.startswith('╟') and strip_line.endswith('╢'),`
			`strip_line.startswith('╙') and strip_line.endswith('╜'),`
			`strip_line.startswith('╔') and strip_line.endswith('╗'),`
			`strip_line.startswith('╠') and strip_line.endswith('╣'),`
			`strip_line.startswith('╚') and strip_line.endswith('╝'),`
			`strip_line.startswith('┌') and strip_line.endswith('┐'),`
			`strip_line.startswith('├') and strip_line.endswith('┤'),`
			`strip_line.startswith('└') and strip_line.endswith('┘'),`
			`strip_line.startswith('╭') and strip_line.endswith('╮'),`
minor optimization by reordering expressions 2022-03-24 12:37:46 -07:00			`strip_line.startswith('╰') and strip_line.endswith('╯')`
add asciitable parser 2022-03-22 12:25:24 -07:00			`)):`
			`return True`
			`return False`


			`def _snake_case(line: str) -> str:`
fix for special characters in headers 2022-03-23 15:08:33 -07:00			`"""`
update _snake_case comment 2022-06-15 11:15:26 -07:00			`Replace spaces between words and special characters with an underscore.`
			`Ignore the replacement char (�) used for header padding.`
fix for special characters in headers 2022-03-23 15:08:33 -07:00			`"""`
fix asciitable parser for cases where centered rows cause misaligned fields 2022-05-26 08:57:35 -07:00			`line = re.sub(r'[^a-zA-Z0-9� ]', '_', line) # special characters`
preserve keyname case with -r 2022-06-15 11:12:43 -07:00			`line = re.sub(r'\b \b', '_', line) # spaces between words`
fix asciitable parser for cases where centered rows cause misaligned fields 2022-05-26 08:57:35 -07:00			`return line`
add asciitable parser 2022-03-22 12:25:24 -07:00

			`def _normalize_rows(table: str) -> List[str]:`
			`"""`
formatting 2022-03-24 09:39:53 -07:00			`returns a List of row strings. Header is snake-cased`
add asciitable parser 2022-03-22 12:25:24 -07:00			`"""`
fix asciitable parser for cases where centered rows cause misaligned fields 2022-05-26 08:57:35 -07:00			`result: List[str] = []`
add asciitable parser 2022-03-22 12:25:24 -07:00			`for line in table.splitlines():`
			`# skip blank lines`
			`if not line.strip():`
			`continue`

			`# skip separators`
			`if _is_separator(line):`
			`continue`

fix asciitable parser for cases where centered rows cause misaligned fields 2022-05-26 08:57:35 -07:00			`# header or data row found - remove column separators`
			`if not result: # this is the header row`
			`# normalize the separator`
			`line = line.replace('│', '\|')\`
			`.replace('┃', '\|')\`
			`.replace('┆', '\|')\`
			`.replace('┇', '\|')\`
			`.replace('┊', '\|')\`
			`.replace('┋', '\|')\`
			`.replace('╎', '\|')\`
			`.replace('╏', '\|')\`
			`.replace('║', '\|')`

			`# find the number of chars to pad in front of headers that are too`
			`# far away from the separator. Replace spaces with unicode char: �`
			`# we will remove this char from headers after sparse_table_parse`
			`problem_header_pattern = re.compile(r'(?:\\| )( +)([^\|]+)')`
			`problem_headers = problem_header_pattern.findall(line)`
			`if problem_headers:`
			`for p_header in problem_headers:`
			`old_header = p_header[0] + p_header[1]`
			`sub_chars = '�' * len(p_header[0])`
			`new_header = sub_chars + p_header[1]`
			`line = line.replace(old_header, new_header)`

			`line = line.replace('\|', ' ')`
			`result.append(_snake_case(line))`
			`continue`

			`# this is a data row`
add more pretty table separators 2022-03-22 17:47:19 -07:00			`line = line.replace('\|', ' ')\`
			`.replace('│', ' ')\`
			`.replace('┃', ' ')\`
			`.replace('┆', ' ')\`
			`.replace('┇', ' ')\`
			`.replace('┊', ' ')\`
			`.replace('┋', ' ')\`
			`.replace('╎', ' ')\`
			`.replace('╏', ' ')\`
			`.replace('║', ' ')`
add asciitable parser 2022-03-22 12:25:24 -07:00			`result.append(line)`

			`return result`


fix for special characters in headers 2022-03-23 15:08:33 -07:00			`def _fixup_headers(table: List[Dict]) -> List[Dict]:`
			`"""remove consecutive underscores and any trailing underscores"""`
			`new_table = []`
			`for row in table:`
			`new_row = row.copy()`
formatting 2022-03-24 09:39:53 -07:00			`for k in row:`
fix asciitable parser for cases where centered rows cause misaligned fields 2022-05-26 08:57:35 -07:00			`# remove replacement character`
			`k_new = k.replace('�', '')`
fix for special characters in headers 2022-03-23 15:08:33 -07:00			`# remove consecutive underscores`
			`k_new = re.sub(r'__+', '_', k_new)`
			`# remove trailing underscores`
			`k_new = re.sub(r'_+$', '', k_new)`
			`new_row[k_new] = new_row.pop(k)`
			`new_table.append(new_row)`

			`return new_table`

formatting 2022-03-24 09:39:53 -07:00
add asciitable parser 2022-03-22 12:25:24 -07:00			`def parse(`
			`data: str,`
			`raw: bool = False,`
			`quiet: bool = False`
			`) -> List[Dict]:`
			`"""`
			`Main text parsing function`

			`Parameters:`

			`data: (string) text data to parse`
			`raw: (boolean) unprocessed output if True`
			`quiet: (boolean) suppress warning messages if True`

			`Returns:`

			`List of Dictionaries. Raw or processed structured data.`
			`"""`
			`jc.utils.compatibility(__name__, info.compatible, quiet)`
			`jc.utils.input_type_check(data)`

			`raw_output: List = []`

			`if jc.utils.has_data(data):`
			`data = _remove_ansi(data)`
			`data = _strip(data)`
			`data_list = _normalize_rows(data)`
fix for special characters in headers 2022-03-23 15:08:33 -07:00			`raw_table = sparse_table_parse(data_list)`
			`raw_output = _fixup_headers(raw_table)`
add asciitable parser 2022-03-22 12:25:24 -07:00
			`return raw_output if raw else _process(raw_output)`