From 13910632173e7c815ff1161cf52fd1d17153c4a9 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Wed, 2 Feb 2022 11:49:39 -0800 Subject: [PATCH] first draft rsync streaming parser --- jc/lib.py | 1 + jc/parsers/rsync_s.py | 366 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 367 insertions(+) create mode 100644 jc/parsers/rsync_s.py diff --git a/jc/lib.py b/jc/lib.py index 3e0310ff..2c95985c 100644 --- a/jc/lib.py +++ b/jc/lib.py @@ -70,6 +70,7 @@ parsers = [ 'route', 'rpm-qi', 'rsync', + 'rsync-s', 'sfdisk', 'shadow', 'ss', diff --git a/jc/parsers/rsync_s.py b/jc/parsers/rsync_s.py new file mode 100644 index 00000000..98ba0e10 --- /dev/null +++ b/jc/parsers/rsync_s.py @@ -0,0 +1,366 @@ +"""jc - JSON CLI output utility `rsync` command output streaming parser + +> This streaming parser outputs JSON Lines + +<> + +Usage (cli): + + $ rsync | jc --rsync-s + +Usage (module): + + import jc + # result is an iterable object (generator) + result = jc.parse('rsync_s', rsync_command_output.splitlines()) + for item in result: + # do something + + or + + import jc.parsers.rsync_s + # result is an iterable object (generator) + result = jc.parsers.rsync_s.parse(rsync_command_output.splitlines()) + for item in result: + # do something + +Schema: + + { + "rsync": string, + + # Below object only exists if using -qq or ignore_exceptions=True + + "_jc_meta": + { + "success": boolean, # false if error parsing + "error": string, # exists if "success" is false + "line": string # exists if "success" is false + } + } + +Examples: + + $ rsync | jc --rsync-s + {example output} + ... + + $ rsync | jc --rsync-s -r + {example output} + ... +""" +import re +from typing import Dict, Iterable +import jc.utils +from jc.utils import stream_success, stream_error +from jc.exceptions import ParseError + + +class info(): + """Provides parser metadata (version, author, etc.)""" + version = '1.0' + description = '`rsync` command streaming parser' + author = 'Kelly Brazil' + author_email = 'kellyjonbrazil@gmail.com' + compatible = ['linux', 'darwin', 'freebsd'] + streaming = True + + +__version__ = info.version + + +def _process(proc_data: Dict) -> Dict: + """ + Final processing to conform to the schema. + + Parameters: + + proc_data: (Dictionary) raw structured data to process + + Returns: + + Dictionary. Structured data to conform to the schema. + """ + + # process the data here + # rebuild output for added semantic information + # use helper functions in jc.utils for int, float, + # bool conversions and timestamps + + return proc_data + + +def parse( + data: Iterable[str], + raw: bool = False, + quiet: bool = False, + ignore_exceptions: bool = False +) -> Iterable[Dict]: + """ + Main text parsing generator function. Returns an iterator object. + + Parameters: + + data: (iterable) line-based text data to parse + (e.g. sys.stdin or str.splitlines()) + + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + ignore_exceptions: (boolean) ignore parsing exceptions if True + + Yields: + + Dictionary. Raw or processed structured data. + + Returns: + + Iterator object + """ + jc.utils.compatibility(__name__, info.compatible, quiet) + jc.utils.streaming_input_type_check(data) + + for line in data: + output_line: Dict = {} + summary: Dict = {} + + try: + jc.utils.streaming_line_input_type_check(line) + + update_type = { + '<': 'file sent', + '>': 'file received', + 'c': 'local change or creation', + 'h': 'hard link', + '.': 'not updated', + '*': 'message', + '+': None + } + + file_type = { + 'f': 'file', + 'd': 'directory', + 'L': 'symlink', + 'D': 'device', + 'S': 'special file', + '+': None + } + + checksum_or_value_different = { + 'c': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + size_different = { + 's': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + modification_time_different = { + 't': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + permissions_different = { + 'p': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + owner_different = { + 'o': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + group_different = { + 'g': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + acl_different = { + 'a': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + extended_attribute_different = { + 'x': True, + '.': False, + '+': None, + ' ': None, + '?': None + } + + file_line_re = re.compile(r'(?P[<>ch.*][fdlDS][c.+ ?][s.+ ?][t.+ ?][p.+ ?][o.+ ?][g.+ ?][u.+ ?][a.+ ?][x.+ ?]) (?P.+)') + file_line_mac_re = re.compile(r'(?P[<>ch.*][fdlDS][c.+ ?][s.+ ?][t.+ ?][p.+ ?][o.+ ?][g.+ ?][x.+ ?]) (?P.+)') + stat1_line_re = re.compile(r'(sent)\s+(?P[0-9,]+)\s+(bytes)\s+(received)\s+(?P[0-9,]+)\s+(bytes)\s+(?P[0-9,.]+)\s+(bytes/sec)') + stat2_line_re = re.compile(r'(total size is)\s+(?P[0-9,]+)\s+(speedup is)\s+(?P[0-9,.]+)') + + file_line_log_re = re.compile(r'(?P\d\d\d\d/\d\d/\d\d)\s+(?P