diff --git a/README.md b/README.md index 841434c5..89c06721 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ option. | `--free` | `free` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/free) | | `--fstab` | `/etc/fstab` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/fstab) | | `--git-log` | `git log` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/git_log) | +| `--git-log-s` | `git log` command streaming parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/git_log_s) | | `--group` | `/etc/group` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/group) | | `--gshadow` | `/etc/gshadow` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/gshadow) | | `--hash` | `hash` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/hash) | diff --git a/docs/parsers/git_log_s.md b/docs/parsers/git_log_s.md new file mode 100644 index 00000000..1bee665f --- /dev/null +++ b/docs/parsers/git_log_s.md @@ -0,0 +1,77 @@ +[Home](https://kellyjonbrazil.github.io/jc/) + + +# jc.parsers.git\_log\_s + +jc - JSON Convert `git log` command output streaming parser + +> This streaming parser outputs JSON Lines (cli) or returns an Iterable of + Dictionaries (module) + +Usage (cli): + + $ git log | jc --git-log-s + +Usage (module): + + import jc + + result = jc.parse('git_log_s', git_log_command_output.splitlines()) + for item in result: + # do something + +Schema: + + { + "git_log_s": string, + + # below object only exists if using -qq or ignore_exceptions=True + "_jc_meta": { + "success": boolean, # false if error parsing + "error": string, # exists if "success" is false + "line": string # exists if "success" is false + } + } + +Examples: + + $ git log | jc --git-log-s + {example output} + ... + + $ git log | jc --git-log-s -r + {example output} + ... + + + +### parse + +```python +@add_jc_meta +def parse(data: Iterable[str], + raw: bool = False, + quiet: bool = False, + ignore_exceptions: bool = False) -> Union[Iterable[Dict], tuple] +``` + +Main text parsing generator function. Returns an iterable object. + +Parameters: + + data: (iterable) line-based text data to parse + (e.g. sys.stdin or str.splitlines()) + + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + ignore_exceptions: (boolean) ignore parsing exceptions if True + + +Returns: + + Iterable of Dictionaries + +### Parser Information +Compatibility: linux, darwin, cygwin, win32, aix, freebsd + +Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com) diff --git a/jc/parsers/git_log_s.py b/jc/parsers/git_log_s.py new file mode 100644 index 00000000..d321928a --- /dev/null +++ b/jc/parsers/git_log_s.py @@ -0,0 +1,248 @@ +"""jc - JSON Convert `git log` command output streaming parser + +> This streaming parser outputs JSON Lines (cli) or returns an Iterable of + Dictionaries (module) + +Usage (cli): + + $ git log | jc --git-log-s + +Usage (module): + + import jc + + result = jc.parse('git_log_s', git_log_command_output.splitlines()) + for item in result: + # do something + +Schema: + + { + "git_log_s": string, + + # below object only exists if using -qq or ignore_exceptions=True + "_jc_meta": { + "success": boolean, # false if error parsing + "error": string, # exists if "success" is false + "line": string # exists if "success" is false + } + } + +Examples: + + $ git log | jc --git-log-s + {example output} + ... + + $ git log | jc --git-log-s -r + {example output} + ... +""" +import re +from typing import List, Dict, Iterable, Union +import jc.utils +from jc.streaming import ( + add_jc_meta, streaming_input_type_check, streaming_line_input_type_check, raise_or_yield +) +from jc.exceptions import ParseError + + +hash_pattern = re.compile(r'([0-9]|[a-f])+') +changes_pattern = re.compile(r'\s(?P\d+)\s+(files? changed),\s+(?P\d+)\s(insertions?\(\+\))?(,\s+)?(?P\d+)?(\s+deletions?\(\-\))?') + + +class info(): + """Provides parser metadata (version, author, etc.)""" + version = '1.0' + description = '`git log` command streaming parser' + author = 'Kelly Brazil' + author_email = 'kellyjonbrazil@gmail.com' + compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd'] + streaming = True + + +__version__ = info.version + + +def _process(proc_data: Dict) -> Dict: + """ + Final processing to conform to the schema. + + Parameters: + + proc_data: (Dictionary) raw structured data to process + + Returns: + + Dictionary. Structured data to conform to the schema. + """ + int_list = ['files_changed', 'insertions', 'deletions'] + + if 'date' in proc_data: + ts = jc.utils.timestamp(proc_data['date'], format_hint=(1100,)) + proc_data['epoch'] = ts.naive + proc_data['epoch_utc'] = ts.utc + + if 'stats' in proc_data: + for key in proc_data['stats']: + if key in int_list: + proc_data['stats'][key] = jc.utils.convert_to_int(proc_data['stats'][key]) + + return proc_data + + +def _is_commit_hash(hash_string: str) -> bool: + # 0c55240e9da30ac4293dc324f1094de2abd3da91 + if len(hash_string) != 40: + return False + + if hash_pattern.match(hash_string): + return True + + return False + + +@add_jc_meta +def parse( + data: Iterable[str], + raw: bool = False, + quiet: bool = False, + ignore_exceptions: bool = False +) -> Union[Iterable[Dict], tuple]: + """ + Main text parsing generator function. Returns an iterable object. + + Parameters: + + data: (iterable) line-based text data to parse + (e.g. sys.stdin or str.splitlines()) + + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + ignore_exceptions: (boolean) ignore parsing exceptions if True + + + Returns: + + Iterable of Dictionaries + """ + jc.utils.compatibility(__name__, info.compatible, quiet) + streaming_input_type_check(data) + + output_line: Dict = {} + message_lines: List[str] = [] + file_list: List[str] = [] + + for line in data: + try: + streaming_line_input_type_check(line) + + if line == '' or line == '\n': + continue + + line_list = line.rstrip().split(maxsplit=1) + + # oneline style + if line_list and _is_commit_hash(line_list[0]): + if output_line: + if file_list: + output_line['stats']['files'] = file_list + + yield output_line if raw else _process(output_line) + + output_line = {} + message_lines = [] + file_list = [] + output_line = { + 'commit': line_list[0], + 'message': line_list[1] + } + continue + + # all other styles + if line.startswith('commit '): + if output_line: + if message_lines: + output_line['message'] = '\n'.join(message_lines) + + if file_list: + output_line['stats']['files'] = file_list + + yield output_line if raw else _process(output_line) + + output_line = {} + message_lines = [] + file_list = [] + output_line['commit'] = line_list[1] + continue + + if line.startswith('Merge: '): + output_line['merge'] = line_list[1] + continue + + if line.startswith('Author: '): + values = line_list[1].rsplit(maxsplit=1) + output_line['author'] = values[0] + output_line['author_email'] = values[1].strip('<').strip('>') + continue + + if line.startswith('Date: '): + output_line['date'] = line_list[1] + continue + + if line.startswith('AuthorDate: '): + output_line['date'] = line_list[1] + continue + + if line.startswith('CommitDate: '): + output_line['commit_by_date'] = line_list[1] + continue + + if line.startswith('Commit: '): + values = line_list[1].rsplit(maxsplit=1) + output_line['commit_by'] = values[0] + output_line['commit_by_email'] = values[1].strip('<').strip('>') + continue + + if line.startswith(' '): + message_lines.append(line.strip()) + continue + + if line.startswith(' ') and 'changed, ' not in line: + # this is a file name + file_name = line.split('|')[0].strip() + file_list.append(file_name) + continue + + if line.startswith(' ') and 'changed, ' in line: + # this is the stat summary + changes = changes_pattern.match(line) + if changes: + files = changes['files'] + insertions = changes['insertions'] + deletions = changes['deletions'] + + output_line['stats'] = { + 'files_changed': files or '0', + 'insertions': insertions or '0', + 'deletions': deletions or '0' + } + continue + + raise ParseError('Not git_log_s data') + + except Exception as e: + yield raise_or_yield(ignore_exceptions, e, line) + + try: + if output_line: + if message_lines: + output_line['message'] = '\n'.join(message_lines) + + if file_list: + output_line['stats']['files'] = file_list + + yield output_line if raw else _process(output_line) + + except Exception as e: + yield raise_or_yield(ignore_exceptions, e, line) diff --git a/man/jc.1 b/man/jc.1 index 74713009..2f824ee5 100644 --- a/man/jc.1 +++ b/man/jc.1 @@ -1,4 +1,4 @@ -.TH jc 1 2022-04-28 1.18.8 "JSON Convert" +.TH jc 1 2022-04-28 1.19.0 "JSON Convert" .SH NAME jc \- JSONifies the output of many CLI tools and file-types .SH SYNOPSIS @@ -142,6 +142,11 @@ CSV file streaming parser \fB--git-log\fP `git log` command parser +.TP +.B +\fB--git-log-s\fP +`git log` command streaming parser + .TP .B \fB--group\fP