From 49a3a7db3b2b0b292489a41691c9dcbce816cfed Mon Sep 17 00:00:00 2001
From: Kelly Brazil <kellyjonbrazil@gmail.com>
Date: Tue, 19 Apr 2022 09:30:38 -0400
Subject: [PATCH] initial git-log parser

---
 jc/lib.py             |   1 +
 jc/parsers/git_log.py | 174 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 175 insertions(+)
 create mode 100644 jc/parsers/git_log.py
diff --git a/jc/lib.py b/jc/lib.py
index 48c90abf..cb164352 100644
--- a/jc/lib.py
+++ b/jc/lib.py
@@ -33,6 +33,7 @@ parsers = [
     'finger',
     'free',
     'fstab',
+    'git-log',
     'group',
     'gshadow',
     'hash',
diff --git a/jc/parsers/git_log.py b/jc/parsers/git_log.py
new file mode 100644
index 00000000..fa4bb3aa
--- /dev/null
+++ b/jc/parsers/git_log.py
@@ -0,0 +1,174 @@
+"""jc - JSON Convert `git log` command output parser
+
+<<Short git-log description and caveats>>
+
+Usage (cli):
+
+    $ git log | jc --git-log
+
+    or
+
+    $ jc git log
+
+Usage (module):
+
+    import jc
+    result = jc.parse('git_log', git_log_command_output)
+
+Schema:
+
+    [
+      {
+        "git-log":     string,
+        "bar":     boolean,
+        "baz":     integer
+      }
+    ]
+
+Examples:
+
+    $ git-log | jc --git-log -p
+    []
+
+    $ git-log | jc --git-log -p -r
+    []
+"""
+import re
+from typing import List, Dict
+import jc.utils
+
+
+class info():
+    """Provides parser metadata (version, author, etc.)"""
+    version = '1.0'
+    description = '`git log` command parser'
+    author = 'Kelly Brazil'
+    author_email = 'kellyjonbrazil@gmail.com'
+    compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
+    magic_commands = ['git log']
+
+
+__version__ = info.version
+
+
+def _process(proc_data: List[Dict]) -> List[Dict]:
+    """
+    Final processing to conform to the schema.
+
+    Parameters:
+
+        proc_data:   (List of Dictionaries) raw structured data to process
+
+    Returns:
+
+        List of Dictionaries. Structured to conform to the schema.
+    """
+
+    # process the data here
+    # rebuild output for added semantic information
+    # use helper functions in jc.utils for int, float, bool
+    # conversions and timestamps
+
+    return proc_data
+
+
+def _is_commit_hash(hash_string: str) -> bool:
+    # 0c55240e9da30ac4293cc324f1094de2abd3da91
+    if len(hash_string) != 40:
+        return False
+
+    hash_pattern = r'([0-9]|[a-f])+'
+    if re.match(hash_pattern, hash_string):
+        return True
+
+    return False
+
+
+def parse(
+    data: str,
+    raw: bool = False,
+    quiet: bool = False
+) -> List[Dict]:
+    """
+    Main text parsing function
+
+    Parameters:
+
+        data:        (string)  text data to parse
+        raw:         (boolean) unprocessed output if True
+        quiet:       (boolean) suppress warning messages if True
+
+    Returns:
+
+        List of Dictionaries. Raw or processed structured data.
+    """
+    jc.utils.compatibility(__name__, info.compatible, quiet)
+    jc.utils.input_type_check(data)
+
+    raw_output: List = []
+    output_line = {}
+    message_lines = []
+
+    if jc.utils.has_data(data):
+
+        for line in data.splitlines():
+            line_list = line.split(maxsplit=1)
+
+            # oneline style
+            if line_list and _is_commit_hash(line_list[0]):
+                output_line = {
+                    'commit': line_list[0],
+                    'message': line_list[1]
+                }
+                raw_output.append(output_line)
+                continue
+
+            # all other styles
+            if line.startswith('commit '):
+                if output_line:
+                    if message_lines:
+                        output_line['message'] = '\n'.join(message_lines)
+                    raw_output.append(output_line)
+                    output_line = {}
+                    message_lines = []
+                output_line['commit'] = line_list[1]
+                continue
+
+            if line.startswith('Merge: '):
+                output_line['merge'] = line_list[1]
+                continue
+
+            if line.startswith('Author: '):
+                values = line_list[1].rsplit(maxsplit=1)
+                output_line['author'] = values[0]
+                output_line['author_email'] = values[1].strip('<').strip('>')
+                continue
+
+            if line.startswith('Date: '):
+                output_line['date'] = line_list[1]
+                continue
+
+            if line.startswith('AuthorDate: '):
+                output_line['date'] = line_list[1]
+                continue
+
+            if line.startswith('CommitDate: '):
+                output_line['commit_by_date'] = line_list[1]
+                continue
+
+            if line.startswith('Commit: '):
+                values = line_list[1].rsplit(maxsplit=1)
+                output_line['commit_by'] = values[0]
+                output_line['commit_by_email'] = values[1].strip('<').strip('>')
+                continue
+
+            if line.startswith('    '):
+                message_lines.append(line.strip())
+
+
+    if output_line:
+        if message_lines:
+            output_line['message'] = '\n'.join(message_lines)
+        raw_output.append(output_line)
+
+    return raw_output if raw else _process(raw_output)