diff --git a/docs/parsers/rsync_s.md b/docs/parsers/rsync_s.md new file mode 100644 index 00000000..61f791eb --- /dev/null +++ b/docs/parsers/rsync_s.md @@ -0,0 +1,124 @@ +[Home](https://kellyjonbrazil.github.io/jc/) + + +# jc.parsers.rsync\_s + +jc - JSON CLI output utility `rsync` command output streaming parser + +> This streaming parser outputs JSON Lines + +Supports the `-i` or `--itemize-changes` options with all levels of +verbosity. + +Will also process the rsync log file generated with the `--log-file` +option. + +Usage (cli): + + $ rsync -i -a source/ dest | jc --rsync-s + +Usage (module): + + import jc + # result is an iterable object (generator) + result = jc.parse('rsync_s', rsync_command_output.splitlines()) + for item in result: + # do something + + or + + import jc.parsers.rsync_s + # result is an iterable object (generator) + result = jc.parsers.rsync_s.parse(rsync_command_output.splitlines()) + for item in result: + # do something + +Schema: + + { + "type": string, # 'file' or 'summary' + "date": string, + "time": string, + "process": integer, + "sent": integer, + "received": integer, + "total_size": integer, + "matches": integer, + "hash_hits": integer, + "false_alarms": integer, + "data": integer, + "bytes_sec": float, + "speedup": float, + "filename": string, + "date": string, + "time": string, + "process": integer, + "metadata": string, + "update_type": string/null, [0] + "file_type": string/null, [1] + "checksum_or_value_different": bool/null, + "size_different": bool/null, + "modification_time_different": bool/null, + "permissions_different": bool/null, + "owner_different": bool/null, + "group_different": bool/null, + "acl_different": bool/null, + "extended_attribute_different": bool/null, + "epoch": int, [2] + + # Below object only exists if using -qq or ignore_exceptions=True + + "_jc_meta": + { + "success": boolean, # false if error parsing + "error": string, # exists if "success" is false + "line": string # exists if "success" is false + } + } + + [0] 'file sent', 'file received', 'local change or creation', + 'hard link', 'not updated', 'message' + [1] 'file', 'directory', 'symlink', 'device', 'special file' + [2] naive timestamp if time and date fields exist and can be converted. + +Examples: + + $ rsync | jc --rsync-s + {example output} + ... + + $ rsync | jc --rsync-s -r + {example output} + ... + + + +### parse + +```python +def parse(data: Iterable[str], raw: bool = False, quiet: bool = False, ignore_exceptions: bool = False) -> Iterable[Dict] +``` + +Main text parsing generator function. Returns an iterator object. + +Parameters: + + data: (iterable) line-based text data to parse + (e.g. sys.stdin or str.splitlines()) + + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + ignore_exceptions: (boolean) ignore parsing exceptions if True + +Yields: + + Dictionary. Raw or processed structured data. + +Returns: + + Iterator object + +### Parser Information +Compatibility: linux, darwin, freebsd + +Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com) diff --git a/jc/parsers/rsync_s.py b/jc/parsers/rsync_s.py index 98ba0e10..43a04207 100644 --- a/jc/parsers/rsync_s.py +++ b/jc/parsers/rsync_s.py @@ -2,11 +2,15 @@ > This streaming parser outputs JSON Lines -<> +Supports the `-i` or `--itemize-changes` options with all levels of +verbosity. + +Will also process the rsync log file generated with the `--log-file` +option. Usage (cli): - $ rsync | jc --rsync-s + $ rsync -i -a source/ dest | jc --rsync-s Usage (module): @@ -27,7 +31,35 @@ Usage (module): Schema: { - "rsync": string, + "type": string, # 'file' or 'summary' + "date": string, + "time": string, + "process": integer, + "sent": integer, + "received": integer, + "total_size": integer, + "matches": integer, + "hash_hits": integer, + "false_alarms": integer, + "data": integer, + "bytes_sec": float, + "speedup": float, + "filename": string, + "date": string, + "time": string, + "process": integer, + "metadata": string, + "update_type": string/null, [0] + "file_type": string/null, [1] + "checksum_or_value_different": bool/null, + "size_different": bool/null, + "modification_time_different": bool/null, + "permissions_different": bool/null, + "owner_different": bool/null, + "group_different": bool/null, + "acl_different": bool/null, + "extended_attribute_different": bool/null, + "epoch": int, [2] # Below object only exists if using -qq or ignore_exceptions=True @@ -39,6 +71,11 @@ Schema: } } + [0] 'file sent', 'file received', 'local change or creation', + 'hard link', 'not updated', 'message' + [1] 'file', 'directory', 'symlink', 'device', 'special file' + [2] naive timestamp if time and date fields exist and can be converted. + Examples: $ rsync | jc --rsync-s @@ -81,11 +118,24 @@ def _process(proc_data: Dict) -> Dict: Dictionary. Structured data to conform to the schema. """ + int_list = [ + 'process', 'sent', 'received', 'total_size', 'matches', 'hash_hits', + 'false_alarms', 'data' + ] + float_list = ['bytes_sec', 'speedup'] - # process the data here - # rebuild output for added semantic information - # use helper functions in jc.utils for int, float, - # bool conversions and timestamps + for key in proc_data.copy(): + if key in int_list: + proc_data[key] = jc.utils.convert_to_int(proc_data[key]) + if key in float_list: + proc_data[key] = jc.utils.convert_to_float(proc_data[key]) + + # add timestamp + if 'date' in proc_data and 'time' in proc_data: + date = proc_data['date'].replace('/', '-') + date_time = f'{date} {proc_data["time"]}' + ts = jc.utils.timestamp(date_time) + proc_data['epoch'] = ts.naive return proc_data @@ -119,11 +169,16 @@ def parse( jc.utils.compatibility(__name__, info.compatible, quiet) jc.utils.streaming_input_type_check(data) - for line in data: - output_line: Dict = {} - summary: Dict = {} + summary: Dict = {} + process: str = '' + last_process: str = '' + line: str = '' + + try: + + for line in data: + output_line: Dict = {} - try: jc.utils.streaming_line_input_type_check(line) update_type = { @@ -228,6 +283,7 @@ def parse( meta = file_line.group('meta') output_line = { + 'type': 'file', 'filename': filename, 'metadata': meta, 'update_type': update_type[meta[0]], @@ -242,12 +298,16 @@ def parse( 'extended_attribute_different': extended_attribute_different[meta[10]] } + yield stream_success(output_line, ignore_exceptions) if raw else stream_success(_process(output_line), ignore_exceptions) + continue + file_line_mac = file_line_mac_re.match(line) if file_line_mac: filename = file_line_mac.group('name') meta = file_line_mac.group('meta') output_line = { + 'type': 'file', 'filename': filename, 'metadata': meta, 'update_type': update_type[meta[0]], @@ -260,8 +320,16 @@ def parse( 'group_different': group_different[meta[7]] } + yield stream_success(output_line, ignore_exceptions) if raw else stream_success(_process(output_line), ignore_exceptions) + continue + file_line_log = file_line_log_re.match(line) if file_line_log: + if process != last_process: + yield stream_success(summary, ignore_exceptions) if raw else stream_success(_process(summary), ignore_exceptions) + last_process = process + summary = {} + filename = file_line_log.group('name') date = file_line_log.group('date') time = file_line_log.group('time') @@ -269,6 +337,7 @@ def parse( meta = file_line_log.group('meta') output_line = { + 'type': 'file', 'filename': filename, 'date': date, 'time': time, @@ -286,8 +355,16 @@ def parse( 'extended_attribute_different': extended_attribute_different[meta[10]] } + yield stream_success(output_line, ignore_exceptions) if raw else stream_success(_process(output_line), ignore_exceptions) + continue + file_line_log_mac = file_line_log_mac_re.match(line) if file_line_log_mac: + if process != last_process: + yield stream_success(summary, ignore_exceptions) if raw else stream_success(_process(summary), ignore_exceptions) + last_process = process + summary = {} + filename = file_line_log_mac.group('name') date = file_line_log_mac.group('date') time = file_line_log_mac.group('time') @@ -295,6 +372,7 @@ def parse( meta = file_line_log_mac.group('meta') output_line = { + 'type': 'file', 'filename': filename, 'date': date, 'time': time, @@ -310,22 +388,29 @@ def parse( 'group_different': group_different[meta[7]] } + yield stream_success(output_line, ignore_exceptions) if raw else stream_success(_process(output_line), ignore_exceptions) + continue + stat1_line = stat1_line_re.match(line) if stat1_line: summary = { + 'type': 'summary', 'sent': stat1_line.group('sent'), 'received': stat1_line.group('received'), 'bytes_sec': stat1_line.group('bytes_sec') } + continue stat2_line = stat2_line_re.match(line) if stat2_line: summary['total_size'] = stat2_line.group('total_size') summary['speedup'] = stat2_line.group('speedup') + continue stat_line_log = stat_line_log_re.match(line) if stat_line_log: summary = { + 'type': 'summary', 'date': stat_line_log.group('date'), 'time': stat_line_log.group('time'), 'process': stat_line_log.group('process'), @@ -333,10 +418,12 @@ def parse( 'received': stat_line_log.group('received'), 'total_size': stat_line_log.group('total_size') } + continue stat1_line_log_v = stat1_line_log_v_re.match(line) if stat1_line_log_v: summary = { + 'type': 'summary', 'date': stat1_line_log_v.group('date'), 'time': stat1_line_log_v.group('time'), 'process': stat1_line_log_v.group('process'), @@ -345,22 +432,23 @@ def parse( 'false_alarms': stat1_line_log_v.group('false_alarms'), 'data': stat1_line_log_v.group('data') } + continue stat2_line_log_v = stat2_line_log_v_re.match(line) if stat2_line_log_v: summary['sent'] = stat2_line_log_v.group('sent') summary['received'] = stat2_line_log_v.group('received') summary['bytes_sec'] = stat2_line_log_v.group('bytes_sec') + continue stat3_line_log_v = stat3_line_log_v_re.match(line) if stat3_line_log_v: summary['total_size'] = stat3_line_log_v.group('total_size') summary['speedup'] = stat3_line_log_v.group('speedup') - - if output_line: - yield stream_success(output_line, ignore_exceptions) if raw else stream_success(_process(output_line), ignore_exceptions) - else: continue - except Exception as e: - yield stream_error(e, ignore_exceptions, line) + if summary: + yield stream_success(summary, ignore_exceptions) if raw else stream_success(_process(summary), ignore_exceptions) + + except Exception as e: + yield stream_error(e, ignore_exceptions, line)