1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00

Fix git log parsing with empty name or email

Sometimes, folks leave their name or email blank in on their
git commits.  Previously, a blank name crashed the git log
parser.
This commit is contained in:
Adam Wolf
2022-11-11 13:33:01 -06:00
parent 299b0faf7c
commit e4cdfa13ca
7 changed files with 72 additions and 12 deletions

View File

@ -202,6 +202,28 @@ def _is_commit_hash(hash_string: str) -> bool:
return False
def _parse_name_email(line):
values = line.rsplit(maxsplit=1)
name = None
email = None
if len(values) == 2:
name = values[0]
if values[1].startswith('<') and values[1].endswith('>'):
email = values[1][1:-1]
else:
if values[0].lstrip().startswith('<') and values[0].endswith('>'):
email = values[0].lstrip()[1:-1]
else:
name = values[0]
if not name:
name = None
if not email:
email = None # covers '<>' case turning into null, not ''
return name, email
def parse(
data: str,
@ -271,9 +293,7 @@ def parse(
continue
if line.startswith('Author: '):
values = line_list[1].rsplit(maxsplit=1)
output_line['author'] = values[0]
output_line['author_email'] = values[1].strip('<').strip('>')
output_line['author'], output_line['author_email'] = _parse_name_email(line_list[1])
continue
if line.startswith('Date: '):
@ -289,9 +309,7 @@ def parse(
continue
if line.startswith('Commit: '):
values = line_list[1].rsplit(maxsplit=1)
output_line['commit_by'] = values[0]
output_line['commit_by_email'] = values[1].strip('<').strip('>')
output_line['commit_by'], output_line['commit_by_email'] = _parse_name_email(line_list[1])
continue
if line.startswith(' '):

View File

@ -75,6 +75,7 @@ Examples:
import re
from typing import List, Dict, Iterable, Union
import jc.utils
from jc.parsers.git_log import _parse_name_email
from jc.streaming import (
add_jc_meta, streaming_input_type_check, streaming_line_input_type_check, raise_or_yield
)
@ -215,9 +216,7 @@ def parse(
continue
if line.startswith('Author: '):
values = line_list[1].rsplit(maxsplit=1)
output_line['author'] = values[0]
output_line['author_email'] = values[1].strip('<').strip('>')
output_line['author'], output_line['author_email'] = _parse_name_email(line_list[1])
continue
if line.startswith('Date: '):
@ -233,9 +232,7 @@ def parse(
continue
if line.startswith('Commit: '):
values = line_list[1].rsplit(maxsplit=1)
output_line['commit_by'] = values[0]
output_line['commit_by_email'] = values[1].strip('<').strip('>')
output_line['commit_by'], output_line['commit_by_email'] = _parse_name_email(line_list[1])
continue
if line.startswith(' '):

View File

@ -0,0 +1 @@
[{"commit":"096fffdb79807d34b99985b38df0a3df7f6a86c7","author":null,"author_email":"foo@example.com","date":"Wed Apr 20 10:03:36 2022 -0400","message":"commit by an author with a blank name","epoch":1650474216,"epoch_utc":null},{"commit":"728d882ed007b3c8b785018874a0eb06e1143b66","author":null,"author_email":null,"date":"Wed Apr 20 09:50:19 2022 -0400","message":"this author has a blank name and an empty email","epoch":1650473419,"epoch_utc":null},{"commit":"b53e42aca623181aa9bc72194e6eeef1e9a3a237","author":"Bob Committer","author_email":null,"date":"Wed Apr 20 09:44:42 2022 -0400","message":"this author has a name, but no email","epoch":1650473082,"epoch_utc":null}]

View File

@ -0,0 +1 @@
[{"commit":"096fffdb79807d34b99985b38df0a3df7f6a86c7","author":null,"author_email":"foo@example.com","date":"Wed Apr 20 10:03:36 2022 -0400","message":"commit by an author with a blank name","epoch":1650474216,"epoch_utc":null},{"commit":"728d882ed007b3c8b785018874a0eb06e1143b66","author":null,"author_email":null,"date":"Wed Apr 20 09:50:19 2022 -0400","message":"this author has a blank name and an empty email","epoch":1650473419,"epoch_utc":null},{"commit":"b53e42aca623181aa9bc72194e6eeef1e9a3a237","author":"Bob Committer","author_email":null,"date":"Wed Apr 20 09:44:42 2022 -0400","message":"this author has a name, but no email","epoch":1650473082,"epoch_utc":null}]

View File

@ -0,0 +1,17 @@
commit 096fffdb79807d34b99985b38df0a3df7f6a86c7
Author: <foo@example.com>
Date: Wed Apr 20 10:03:36 2022 -0400
commit by an author with a blank name
commit 728d882ed007b3c8b785018874a0eb06e1143b66
Author: <>
Date: Wed Apr 20 09:50:19 2022 -0400
this author has a blank name and an empty email
commit b53e42aca623181aa9bc72194e6eeef1e9a3a237
Author: Bob Committer <>
Date: Wed Apr 20 09:44:42 2022 -0400
this author has a name, but no email

View File

@ -63,6 +63,9 @@ class MyTests(unittest.TestCase):
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix.out'), 'r', encoding='utf-8') as f:
git_log_fuller_is_hash_regex_fix = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix.out'), 'r', encoding='utf-8') as f:
git_log_blank_author_fix = f.read()
# output
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log.json'), 'r', encoding='utf-8') as f:
git_log_json = json.loads(f.read())
@ -118,6 +121,9 @@ class MyTests(unittest.TestCase):
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix.json'), 'r', encoding='utf-8') as f:
git_log_fuller_is_hash_regex_fix_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix.json'), 'r', encoding='utf-8') as f:
git_log_blank_author_fix_json = json.loads(f.read())
def test_git_log_nodata(self):
"""
@ -235,6 +241,13 @@ class MyTests(unittest.TestCase):
"""
self.assertEqual(jc.parsers.git_log.parse(self.git_log_fuller_is_hash_regex_fix, quiet=True), self.git_log_fuller_is_hash_regex_fix_json)
def test_git_log_blank_author_fix(self):
"""
Test 'git_log' fix for when a commit author has a blank name,
empty email, or both
"""
self.assertEqual(jc.parsers.git_log.parse(self.git_log_blank_author_fix, quiet=True), self.git_log_blank_author_fix_json)
if __name__ == '__main__':
unittest.main()

View File

@ -67,6 +67,9 @@ class MyTests(unittest.TestCase):
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix.out'), 'r', encoding='utf-8') as f:
generic_git_log_fuller_is_hash_regex_fix = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix.out'), 'r', encoding='utf-8') as f:
generic_git_log_blank_author_fix = f.read()
# output
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-streaming.json'), 'r', encoding='utf-8') as f:
generic_git_log_streaming_json = json.loads(f.read())
@ -125,6 +128,9 @@ class MyTests(unittest.TestCase):
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-is-hash-regex-fix-streaming.json'), 'r', encoding='utf-8') as f:
generic_git_log_fuller_is_hash_regex_fix_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/git-log-blank-author-fix-streaming.json'), 'r', encoding='utf-8') as f:
generic_git_log_blank_author_fix_streaming_json = json.loads(f.read())
def test_git_log_s_nodata(self):
"""
@ -262,6 +268,13 @@ class MyTests(unittest.TestCase):
"""
self.assertEqual(list(jc.parsers.git_log_s.parse(self.generic_git_log_fuller_is_hash_regex_fix.splitlines(), quiet=True)), self.generic_git_log_fuller_is_hash_regex_fix_streaming_json)
def test_git_log_blank_author_fix(self):
"""
Test 'git_log' fix for when a commit author has a blank name,
empty email, or both
"""
self.assertEqual(list(jc.parsers.git_log_s.parse(self.generic_git_log_blank_author_fix.splitlines(), quiet=True)), self.generic_git_log_blank_author_fix_streaming_json)
if __name__ == '__main__':
unittest.main()