From e4cdfa13cabbdd0e7f00cad75d54aa208f86a5a3 Mon Sep 17 00:00:00 2001 From: Adam Wolf Date: Fri, 11 Nov 2022 13:33:01 -0600 Subject: [PATCH] Fix git log parsing with empty name or email Sometimes, folks leave their name or email blank in on their git commits. Previously, a blank name crashed the git log parser. --- jc/parsers/git_log.py | 30 +++++++++++++++---- jc/parsers/git_log_s.py | 9 ++---- .../git-log-blank-author-fix-streaming.json | 1 + .../generic/git-log-blank-author-fix.json | 1 + .../generic/git-log-blank-author-fix.out | 17 +++++++++++ tests/test_git_log.py | 13 ++++++++ tests/test_git_log_s.py | 13 ++++++++ 7 files changed, 72 insertions(+), 12 deletions(-) create mode 100644 tests/fixtures/generic/git-log-blank-author-fix-streaming.json create mode 100644 tests/fixtures/generic/git-log-blank-author-fix.json create mode 100644 tests/fixtures/generic/git-log-blank-author-fix.out diff --git a/jc/parsers/git_log.py b/jc/parsers/git_log.py index 088b813e..0d727f6d 100644 --- a/jc/parsers/git_log.py +++ b/jc/parsers/git_log.py @@ -202,6 +202,28 @@ def _is_commit_hash(hash_string: str) -> bool: return False +def _parse_name_email(line): + values = line.rsplit(maxsplit=1) + name = None + email = None + + if len(values) == 2: + name = values[0] + if values[1].startswith('<') and values[1].endswith('>'): + email = values[1][1:-1] + else: + if values[0].lstrip().startswith('<') and values[0].endswith('>'): + email = values[0].lstrip()[1:-1] + else: + name = values[0] + + if not name: + name = None + if not email: + email = None # covers '<>' case turning into null, not '' + + return name, email + def parse( data: str, @@ -271,9 +293,7 @@ def parse( continue if line.startswith('Author: '): - values = line_list[1].rsplit(maxsplit=1) - output_line['author'] = values[0] - output_line['author_email'] = values[1].strip('<').strip('>') + output_line['author'], output_line['author_email'] = _parse_name_email(line_list[1]) continue if line.startswith('Date: '): @@ -289,9 +309,7 @@ def parse( continue if line.startswith('Commit: '): - values = line_list[1].rsplit(maxsplit=1) - output_line['commit_by'] = values[0] - output_line['commit_by_email'] = values[1].strip('<').strip('>') + output_line['commit_by'], output_line['commit_by_email'] = _parse_name_email(line_list[1]) continue if line.startswith(' '): diff --git a/jc/parsers/git_log_s.py b/jc/parsers/git_log_s.py index 63a27a1b..f8406954 100644 --- a/jc/parsers/git_log_s.py +++ b/jc/parsers/git_log_s.py @@ -75,6 +75,7 @@ Examples: import re from typing import List, Dict, Iterable, Union import jc.utils +from jc.parsers.git_log import _parse_name_email from jc.streaming import ( add_jc_meta, streaming_input_type_check, streaming_line_input_type_check, raise_or_yield ) @@ -215,9 +216,7 @@ def parse( continue if line.startswith('Author: '): - values = line_list[1].rsplit(maxsplit=1) - output_line['author'] = values[0] - output_line['author_email'] = values[1].strip('<').strip('>') + output_line['author'], output_line['author_email'] = _parse_name_email(line_list[1]) continue if line.startswith('Date: '): @@ -233,9 +232,7 @@ def parse( continue if line.startswith('Commit: '): - values = line_list[1].rsplit(maxsplit=1) - output_line['commit_by'] = values[0] - output_line['commit_by_email'] = values[1].strip('<').strip('>') + output_line['commit_by'], output_line['commit_by_email'] = _parse_name_email(line_list[1]) continue if line.startswith(' '): diff --git a/tests/fixtures/generic/git-log-blank-author-fix-streaming.json b/tests/fixtures/generic/git-log-blank-author-fix-streaming.json new file mode 100644 index 00000000..9803e3d5 --- /dev/null +++ b/tests/fixtures/generic/git-log-blank-author-fix-streaming.json @@ -0,0 +1 @@ +[{"commit":"096fffdb79807d34b99985b38df0a3df7f6a86c7","author":null,"author_email":"foo@example.com","date":"Wed Apr 20 10:03:36 2022 -0400","message":"commit by an author with a blank name","epoch":1650474216,"epoch_utc":null},{"commit":"728d882ed007b3c8b785018874a0eb06e1143b66","author":null,"author_email":null,"date":"Wed Apr 20 09:50:19 2022 -0400","message":"this author has a blank name and an empty email","epoch":1650473419,"epoch_utc":null},{"commit":"b53e42aca623181aa9bc72194e6eeef1e9a3a237","author":"Bob Committer","author_email":null,"date":"Wed Apr 20 09:44:42 2022 -0400","message":"this author has a name, but no email","epoch":1650473082,"epoch_utc":null}] \ No newline at end of file diff --git a/tests/fixtures/generic/git-log-blank-author-fix.json b/tests/fixtures/generic/git-log-blank-author-fix.json new file mode 100644 index 00000000..9803e3d5 --- /dev/null +++ b/tests/fixtures/generic/git-log-blank-author-fix.json @@ -0,0 +1 @@ +[{"commit":"096fffdb79807d34b99985b38df0a3df7f6a86c7","author":null,"author_email":"foo@example.com","date":"Wed Apr 20 10:03:36 2022 -0400","message":"commit by an author with a blank name","epoch":1650474216,"epoch_utc":null},{"commit":"728d882ed007b3c8b785018874a0eb06e1143b66","author":null,"author_email":null,"date":"Wed Apr 20 09:50:19 2022 -0400","message":"this author has a blank name and an empty email","epoch":1650473419,"epoch_utc":null},{"commit":"b53e42aca623181aa9bc72194e6eeef1e9a3a237","author":"Bob Committer","author_email":null,"date":"Wed Apr 20 09:44:42 2022 -0400","message":"this author has a name, but no email","epoch":1650473082,"epoch_utc":null}] \ No newline at end of file diff --git a/tests/fixtures/generic/git-log-blank-author-fix.out b/tests/fixtures/generic/git-log-blank-author-fix.out new file mode 100644 index 00000000..a5130d06 --- /dev/null +++ b/tests/fixtures/generic/git-log-blank-author-fix.out @@ -0,0 +1,17 @@ +commit 096fffdb79807d34b99985b38df0a3df7f6a86c7 +Author: +Date: Wed Apr 20 10:03:36 2022 -0400 + + commit by an author with a blank name + +commit 728d882ed007b3c8b785018874a0eb06e1143b66 +Author: <> +Date: Wed Apr 20 09:50:19 2022 -0400 + + this author has a blank name and an empty email + +commit b53e42aca623181aa9bc72194e6eeef1e9a3a237 +Author: Bob Committer <> +Date: Wed Apr 20 09:44:42 2022 -0400 + + this author has a name, but no email \ No newline at end of file diff --git a/tests/test_git_log.py b/tests/test_git_log.py index 41f34a51..967bac69 100644 --- a/tests/test_git_log.py +++ b/tests/test_git_log.py @@ -63,6 +63,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix.out'), 'r', encoding='utf-8') as f: git_log_fuller_is_hash_regex_fix = f.read() + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix.out'), 'r', encoding='utf-8') as f: + git_log_blank_author_fix = f.read() + # output with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log.json'), 'r', encoding='utf-8') as f: git_log_json = json.loads(f.read()) @@ -118,6 +121,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix.json'), 'r', encoding='utf-8') as f: git_log_fuller_is_hash_regex_fix_json = json.loads(f.read()) + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix.json'), 'r', encoding='utf-8') as f: + git_log_blank_author_fix_json = json.loads(f.read()) + def test_git_log_nodata(self): """ @@ -235,6 +241,13 @@ class MyTests(unittest.TestCase): """ self.assertEqual(jc.parsers.git_log.parse(self.git_log_fuller_is_hash_regex_fix, quiet=True), self.git_log_fuller_is_hash_regex_fix_json) + def test_git_log_blank_author_fix(self): + """ + Test 'git_log' fix for when a commit author has a blank name, + empty email, or both + """ + self.assertEqual(jc.parsers.git_log.parse(self.git_log_blank_author_fix, quiet=True), self.git_log_blank_author_fix_json) + if __name__ == '__main__': unittest.main() diff --git a/tests/test_git_log_s.py b/tests/test_git_log_s.py index 8626f372..884d30e4 100644 --- a/tests/test_git_log_s.py +++ b/tests/test_git_log_s.py @@ -67,6 +67,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix.out'), 'r', encoding='utf-8') as f: generic_git_log_fuller_is_hash_regex_fix = f.read() + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix.out'), 'r', encoding='utf-8') as f: + generic_git_log_blank_author_fix = f.read() + # output with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-streaming.json'), 'r', encoding='utf-8') as f: generic_git_log_streaming_json = json.loads(f.read()) @@ -125,6 +128,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix-streaming.json'), 'r', encoding='utf-8') as f: generic_git_log_fuller_is_hash_regex_fix_streaming_json = json.loads(f.read()) + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix-streaming.json'), 'r', encoding='utf-8') as f: + generic_git_log_blank_author_fix_streaming_json = json.loads(f.read()) + def test_git_log_s_nodata(self): """ @@ -262,6 +268,13 @@ class MyTests(unittest.TestCase): """ self.assertEqual(list(jc.parsers.git_log_s.parse(self.generic_git_log_fuller_is_hash_regex_fix.splitlines(), quiet=True)), self.generic_git_log_fuller_is_hash_regex_fix_streaming_json) + def test_git_log_blank_author_fix(self): + """ + Test 'git_log' fix for when a commit author has a blank name, + empty email, or both + """ + self.assertEqual(list(jc.parsers.git_log_s.parse(self.generic_git_log_blank_author_fix.splitlines(), quiet=True)), self.generic_git_log_blank_author_fix_streaming_json) + if __name__ == '__main__': unittest.main()