1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-19 00:17:51 +02:00

add git-log-s parser

This commit is contained in:
Kelly Brazil
2022-04-28 16:23:56 -07:00
parent 97ac965ba5
commit a824ccaff3
4 changed files with 332 additions and 1 deletions

View File

@ -171,6 +171,7 @@ option.
| `--free` | `free` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/free) | | `--free` | `free` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/free) |
| `--fstab` | `/etc/fstab` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/fstab) | | `--fstab` | `/etc/fstab` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/fstab) |
| `--git-log` | `git log` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/git_log) | | `--git-log` | `git log` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/git_log) |
| `--git-log-s` | `git log` command streaming parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/git_log_s) |
| `--group` | `/etc/group` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/group) | | `--group` | `/etc/group` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/group) |
| `--gshadow` | `/etc/gshadow` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/gshadow) | | `--gshadow` | `/etc/gshadow` file parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/gshadow) |
| `--hash` | `hash` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/hash) | | `--hash` | `hash` command parser | [📃](https://kellyjonbrazil.github.io/jc/docs/parsers/hash) |

77
docs/parsers/git_log_s.md Normal file
View File

@ -0,0 +1,77 @@
[Home](https://kellyjonbrazil.github.io/jc/)
<a id="jc.parsers.git_log_s"></a>
# jc.parsers.git\_log\_s
jc - JSON Convert `git log` command output streaming parser
> This streaming parser outputs JSON Lines (cli) or returns an Iterable of
Dictionaries (module)
Usage (cli):
$ git log | jc --git-log-s
Usage (module):
import jc
result = jc.parse('git_log_s', git_log_command_output.splitlines())
for item in result:
# do something
Schema:
{
"git_log_s": string,
# below object only exists if using -qq or ignore_exceptions=True
"_jc_meta": {
"success": boolean, # false if error parsing
"error": string, # exists if "success" is false
"line": string # exists if "success" is false
}
}
Examples:
$ git log | jc --git-log-s
{example output}
...
$ git log | jc --git-log-s -r
{example output}
...
<a id="jc.parsers.git_log_s.parse"></a>
### parse
```python
@add_jc_meta
def parse(data: Iterable[str],
raw: bool = False,
quiet: bool = False,
ignore_exceptions: bool = False) -> Union[Iterable[Dict], tuple]
```
Main text parsing generator function. Returns an iterable object.
Parameters:
data: (iterable) line-based text data to parse
(e.g. sys.stdin or str.splitlines())
raw: (boolean) unprocessed output if True
quiet: (boolean) suppress warning messages if True
ignore_exceptions: (boolean) ignore parsing exceptions if True
Returns:
Iterable of Dictionaries
### Parser Information
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com)

248
jc/parsers/git_log_s.py Normal file
View File

@ -0,0 +1,248 @@
"""jc - JSON Convert `git log` command output streaming parser
> This streaming parser outputs JSON Lines (cli) or returns an Iterable of
Dictionaries (module)
Usage (cli):
$ git log | jc --git-log-s
Usage (module):
import jc
result = jc.parse('git_log_s', git_log_command_output.splitlines())
for item in result:
# do something
Schema:
{
"git_log_s": string,
# below object only exists if using -qq or ignore_exceptions=True
"_jc_meta": {
"success": boolean, # false if error parsing
"error": string, # exists if "success" is false
"line": string # exists if "success" is false
}
}
Examples:
$ git log | jc --git-log-s
{example output}
...
$ git log | jc --git-log-s -r
{example output}
...
"""
import re
from typing import List, Dict, Iterable, Union
import jc.utils
from jc.streaming import (
add_jc_meta, streaming_input_type_check, streaming_line_input_type_check, raise_or_yield
)
from jc.exceptions import ParseError
hash_pattern = re.compile(r'([0-9]|[a-f])+')
changes_pattern = re.compile(r'\s(?P<files>\d+)\s+(files? changed),\s+(?P<insertions>\d+)\s(insertions?\(\+\))?(,\s+)?(?P<deletions>\d+)?(\s+deletions?\(\-\))?')
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.0'
description = '`git log` command streaming parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
streaming = True
__version__ = info.version
def _process(proc_data: Dict) -> Dict:
"""
Final processing to conform to the schema.
Parameters:
proc_data: (Dictionary) raw structured data to process
Returns:
Dictionary. Structured data to conform to the schema.
"""
int_list = ['files_changed', 'insertions', 'deletions']
if 'date' in proc_data:
ts = jc.utils.timestamp(proc_data['date'], format_hint=(1100,))
proc_data['epoch'] = ts.naive
proc_data['epoch_utc'] = ts.utc
if 'stats' in proc_data:
for key in proc_data['stats']:
if key in int_list:
proc_data['stats'][key] = jc.utils.convert_to_int(proc_data['stats'][key])
return proc_data
def _is_commit_hash(hash_string: str) -> bool:
# 0c55240e9da30ac4293dc324f1094de2abd3da91
if len(hash_string) != 40:
return False
if hash_pattern.match(hash_string):
return True
return False
@add_jc_meta
def parse(
data: Iterable[str],
raw: bool = False,
quiet: bool = False,
ignore_exceptions: bool = False
) -> Union[Iterable[Dict], tuple]:
"""
Main text parsing generator function. Returns an iterable object.
Parameters:
data: (iterable) line-based text data to parse
(e.g. sys.stdin or str.splitlines())
raw: (boolean) unprocessed output if True
quiet: (boolean) suppress warning messages if True
ignore_exceptions: (boolean) ignore parsing exceptions if True
Returns:
Iterable of Dictionaries
"""
jc.utils.compatibility(__name__, info.compatible, quiet)
streaming_input_type_check(data)
output_line: Dict = {}
message_lines: List[str] = []
file_list: List[str] = []
for line in data:
try:
streaming_line_input_type_check(line)
if line == '' or line == '\n':
continue
line_list = line.rstrip().split(maxsplit=1)
# oneline style
if line_list and _is_commit_hash(line_list[0]):
if output_line:
if file_list:
output_line['stats']['files'] = file_list
yield output_line if raw else _process(output_line)
output_line = {}
message_lines = []
file_list = []
output_line = {
'commit': line_list[0],
'message': line_list[1]
}
continue
# all other styles
if line.startswith('commit '):
if output_line:
if message_lines:
output_line['message'] = '\n'.join(message_lines)
if file_list:
output_line['stats']['files'] = file_list
yield output_line if raw else _process(output_line)
output_line = {}
message_lines = []
file_list = []
output_line['commit'] = line_list[1]
continue
if line.startswith('Merge: '):
output_line['merge'] = line_list[1]
continue
if line.startswith('Author: '):
values = line_list[1].rsplit(maxsplit=1)
output_line['author'] = values[0]
output_line['author_email'] = values[1].strip('<').strip('>')
continue
if line.startswith('Date: '):
output_line['date'] = line_list[1]
continue
if line.startswith('AuthorDate: '):
output_line['date'] = line_list[1]
continue
if line.startswith('CommitDate: '):
output_line['commit_by_date'] = line_list[1]
continue
if line.startswith('Commit: '):
values = line_list[1].rsplit(maxsplit=1)
output_line['commit_by'] = values[0]
output_line['commit_by_email'] = values[1].strip('<').strip('>')
continue
if line.startswith(' '):
message_lines.append(line.strip())
continue
if line.startswith(' ') and 'changed, ' not in line:
# this is a file name
file_name = line.split('|')[0].strip()
file_list.append(file_name)
continue
if line.startswith(' ') and 'changed, ' in line:
# this is the stat summary
changes = changes_pattern.match(line)
if changes:
files = changes['files']
insertions = changes['insertions']
deletions = changes['deletions']
output_line['stats'] = {
'files_changed': files or '0',
'insertions': insertions or '0',
'deletions': deletions or '0'
}
continue
raise ParseError('Not git_log_s data')
except Exception as e:
yield raise_or_yield(ignore_exceptions, e, line)
try:
if output_line:
if message_lines:
output_line['message'] = '\n'.join(message_lines)
if file_list:
output_line['stats']['files'] = file_list
yield output_line if raw else _process(output_line)
except Exception as e:
yield raise_or_yield(ignore_exceptions, e, line)

View File

@ -1,4 +1,4 @@
.TH jc 1 2022-04-28 1.18.8 "JSON Convert" .TH jc 1 2022-04-28 1.19.0 "JSON Convert"
.SH NAME .SH NAME
jc \- JSONifies the output of many CLI tools and file-types jc \- JSONifies the output of many CLI tools and file-types
.SH SYNOPSIS .SH SYNOPSIS
@ -142,6 +142,11 @@ CSV file streaming parser
\fB--git-log\fP \fB--git-log\fP
`git log` command parser `git log` command parser
.TP
.B
\fB--git-log-s\fP
`git log` command streaming parser
.TP .TP
.B .B
\fB--group\fP \fB--group\fP