new universal parsers to limit code duplication

2025-07-13 01:20:24 +02:00 · 2019-12-09 14:01:47 -08:00
parent bbba1fe477
commit 50a6b256b8
1 changed files with 83 additions and 0 deletions
--- a/jc/parsers/universal.py
+++ b/jc/parsers/universal.py
@ -0,0 +1,83 @@
 """jc - JSON CLI output utility universal Parsers"""
 import string
 def sparse_table_parse(data, delim='\u2063'):
    """
    Parse tables with missing column data or with spaces in column data.
    Parameters:
        data:        (list)  Text data to parse that has been split into lines via .splitlines().
                             Item 0 must be the header row. Any spaces in header names should be
                             changed to underscore '_'. You should also ensure headers are
                             lowercase by using .lower(). Do not change the position of header
                             names as the positions are used to find the data.
                             Also, ensure there are no blank lines (list items) in the data.
        delim:      (string) Delimiter to use. By default 'u\2063' (invisible separator) is used
                             since this is unlikely to ever be seen in terminal output. You can
                             change this for troubleshooting purposes or if there is a delimiter
                             conflict with your data.
    Returns:
        dictionary   raw structured data
    """
    output = []
    header_text = data.pop(0)
    header_text = header_text + ' '
    header_list = header_text.split()
    # find each column index and end position
    header_search = [header_list[0]]
    for h in header_list[1:]:
        header_search.append(' ' + h + ' ')
    header_spec_list = []
    for i, column in enumerate(header_list[0:len(header_list) - 1]):
        header_spec = {
            'name': column,
            'end': header_text.find(header_search[i + 1])
        }
        header_spec_list.append(header_spec)
    # parse lines
    if data:
        for entry in data:
            output_line = {}
            # insert new separator since data can contain spaces
            for col in reversed(header_list):
                # find the right header_spec
                for h_spec in header_spec_list:
                    if h_spec['name'] == col:
                        h_end = h_spec['end']
                        # check if the location contains whitespace. if not
                        # then move to the left until a space is found
                        while h_end > 0 and entry[h_end] not in string.whitespace:
                            h_end -= 1
                        # insert custom delimiter
                        entry = entry[:h_end] + delim + entry[h_end + 1:]
            # create the entry list from the new custom delimiter
            entry_list = entry.split(delim, maxsplit=len(header_list) - 1)
            # clean up leading and trailing spaces in entry
            clean_entry_list = []
            for col in entry_list:
                clean_entry = col.strip()
                if clean_entry == '':
                    clean_entry = None
                clean_entry_list.append(clean_entry)
            output_line = dict(zip(header_list, clean_entry_list))
            output.append(output_line)
    return output