"""jc - JSON CLI output utility ls Parser Usage: specify --ls as the first argument if the piped input is coming from ls ls options supported: - None - laR --time-style=full-iso - h file sizes will be available in text form with -r but larger file sizes with human readable suffixes will be converted to Null in default view since the parser attempts to convert this field to an integer. Compatibility: 'linux', 'darwin', 'cygwin', 'aix', 'freebsd' Examples: $ ls /usr/bin | jc --ls -p [ { "filename": "apropos" }, { "filename": "arch" }, { "filename": "awk" }, { "filename": "base64" }, ... ] $ ls -l /usr/bin | jc --ls -p [ { "filename": "apropos", "link_to": "whatis", "flags": "lrwxrwxrwx.", "links": 1, "owner": "root", "group": "root", "size": 6, "date": "Aug 15 10:53" }, { "filename": "ar", "flags": "-rwxr-xr-x.", "links": 1, "owner": "root", "group": "root", "size": 62744, "date": "Aug 8 16:14" }, { "filename": "arch", "flags": "-rwxr-xr-x.", "links": 1, "owner": "root", "group": "root", "size": 33080, "date": "Aug 19 23:25" }, ... ] $ ls -l /usr/bin | jc --ls -p -r [ { "filename": "apropos", "link_to": "whatis", "flags": "lrwxrwxrwx.", "links": "1", "owner": "root", "group": "root", "size": "6", "date": "Aug 15 10:53" }, { "filename": "arch", "flags": "-rwxr-xr-x.", "links": "1", "owner": "root", "group": "root", "size": "33080", "date": "Aug 19 23:25" }, { "filename": "awk", "link_to": "gawk", "flags": "lrwxrwxrwx.", "links": "1", "owner": "root", "group": "root", "size": "4", "date": "Aug 15 10:53" }, { "filename": "base64", "flags": "-rwxr-xr-x.", "links": "1", "owner": "root", "group": "root", "size": "37360", "date": "Aug 19 23:25" }, { "filename": "basename", "flags": "-rwxr-xr-x.", "links": "1", "owner": "root", "group": "root", "size": "29032", "date": "Aug 19 23:25" }, { "filename": "bash", "flags": "-rwxr-xr-x.", "links": "1", "owner": "root", "group": "root", "size": "964600", "date": "Aug 8 05:06" }, ... ] $ ls -l /usr/bin | jc --ls | jq '.[] | select(.size > 50000000)' { "filename": "emacs", "flags": "-r-xr-xr-x", "links": 1, "owner": "root", "group": "wheel", "size": 117164432, "date": "May 3 2019" } """ import re import jc.utils class info(): version = '1.1' description = 'ls command parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' # compatible options: linux, darwin, cygwin, win32, aix, freebsd compatible = ['linux', 'darwin', 'cygwin', 'aix', 'freebsd'] magic_commands = ['ls'] __version__ = info.version def process(proc_data): """ Final processing to conform to the schema. Parameters: proc_data: (dictionary) raw structured data to process Returns: List of dictionaries. Structured data with the following schema: [ { "filename": string, "flags": string, "links": integer, "parent": string, "owner": string, "group": string, "size": integer, "date": string } ] """ for entry in proc_data: int_list = ['links', 'size'] for key in int_list: if key in entry: try: key_int = int(entry[key]) entry[key] = key_int except (ValueError): entry[key] = None return proc_data def parse(data, raw=False, quiet=False): """ Main text parsing function Parameters: data: (string) text data to parse raw: (boolean) output preprocessed JSON if True quiet: (boolean) suppress warning messages if True Returns: List of dictionaries. Raw or processed structured data. """ if not quiet: jc.utils.compatibility(__name__, info.compatible) raw_output = [] linedata = data.splitlines() # Delete first line if it starts with 'total 1234' if linedata: if re.match('^total [0-9]+', linedata[0]): linedata.pop(0) parent = '' next_is_parent = False # Look for parent line if glob or -R is used if not re.match('^[-dclpsbDCMnP?]([-r][-w][-xsS]){2}([-r][-w][-xtT])[+]?', linedata[0]) \ and linedata[0].endswith(':'): parent = linedata.pop(0)[:-1] # Pop following total line linedata.pop(0) if linedata: # Check if -l was used to parse extra data if re.match('^[-dclpsbDCMnP?]([-r][-w][-xsS]){2}([-r][-w][-xtT])[+]?', linedata[0]): for entry in linedata: output_line = {} parsed_line = entry.split(maxsplit=8) if not re.match('^[-dclpsbDCMnP?]([-r][-w][-xsS]){2}([-r][-w][-xtT])[+]?', entry) \ and entry.endswith(':'): parent = entry[:-1] continue if re.match('^total [0-9]+', entry): continue if entry == '': continue # split filenames and links filename_field = parsed_line[8].split(' -> ') # create list of dictionaries output_line['filename'] = filename_field[0] if len(filename_field) > 1: output_line['link_to'] = filename_field[1] if parent: output_line['parent'] = parent output_line['flags'] = parsed_line[0] output_line['links'] = parsed_line[1] output_line['owner'] = parsed_line[2] output_line['group'] = parsed_line[3] output_line['size'] = parsed_line[4] output_line['date'] = ' '.join(parsed_line[5:8]) raw_output.append(output_line) else: for entry in linedata: output_line = {} if entry == '': next_is_parent = True continue if next_is_parent: parent = entry[:-1] next_is_parent = False continue output_line['filename'] = entry if parent: output_line['parent'] = parent raw_output.append(output_line) if raw: return raw_output else: return process(raw_output)