diff --git a/CHANGELOG b/CHANGELOG index ac1acbca..80f6715f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ jc changelog +20211002 v1.17.1 *** in progress *** +- Fix file parser for gzip files + 20210923 v1.17.0 - Note to Package Maintainers: please see note at 20210720 v1.16.0 - Add wrapping of warning and error messages diff --git a/jc/__init__.py b/jc/__init__.py index a0551ac0..86a29419 100644 --- a/jc/__init__.py +++ b/jc/__init__.py @@ -73,4 +73,4 @@ Module Example: """ name = 'jc' -__version__ = '1.17.0' +__version__ = '1.17.1' diff --git a/jc/parsers/file.py b/jc/parsers/file.py index d34258c6..2af927c7 100644 --- a/jc/parsers/file.py +++ b/jc/parsers/file.py @@ -63,7 +63,7 @@ import jc.parsers.universal class info(): """Provides parser metadata (version, author, etc.)""" - version = '1.3' + version = '1.4' description = '`file` command parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' @@ -116,7 +116,14 @@ def parse(data, raw=False, quiet=False): if jc.utils.has_data(data): for line in filter(None, data.splitlines()): - linedata = line.rsplit(': ', maxsplit=1) + + # fix case for gzip files where description contains ': ' delimiter + if 'gzip compressed data, last modified: ' in line: + linedata = line.split(': ', maxsplit=1) + + # use rsplit to correctly grab filenames containing ': ' delimiter text + else: + linedata = line.rsplit(': ', maxsplit=1) try: filename = linedata[0].strip() diff --git a/setup.py b/setup.py index dfe9d25a..1a5adef4 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open('README.md', 'r') as f: setuptools.setup( name='jc', - version='1.17.0', + version='1.17.1', author='Kelly Brazil', author_email='kellyjonbrazil@gmail.com', description='Converts the output of popular command-line tools and file-types to JSON.', diff --git a/tests/fixtures/osx-10.14.6/file3.json b/tests/fixtures/osx-10.14.6/file3.json new file mode 100644 index 00000000..9bb933d3 --- /dev/null +++ b/tests/fixtures/osx-10.14.6/file3.json @@ -0,0 +1 @@ +[{"filename":"Applications","type":"directory"},{"filename":"Desktop","type":"directory"},{"filename":"Documents","type":"directory"},{"filename":"Downloads","type":"directory"},{"filename":"Library","type":"directory"},{"filename":"Movies","type":"directory"},{"filename":"Music","type":"directory"},{"filename":"Pictures","type":"directory"},{"filename":"Postman","type":"directory"},{"filename":"Public","type":"directory"},{"filename":"Virtual Machines.localized","type":"directory"},{"filename":"ansible","type":"directory"},{"filename":"api","type":"directory"},{"filename":"centosserial.sh","type":"Bourne-Again shell script text executable, UTF-8 Unicode text"},{"filename":"coreosserial.sh","type":"Bourne-Again shell script text executable, UTF-8 Unicode text"},{"filename":"data.json","type":"UTF-8 Unicode text, with very long lines"},{"filename":"fazserial.sh","type":"Bourne-Again shell script text executable, UTF-8 Unicode text"},{"filename":"fgt1serial.sh","type":"Bourne-Again shell script text executable, UTF-8 Unicode text"},{"filename":"fgt2serial.sh","type":"Bourne-Again shell script text executable, UTF-8 Unicode text"},{"filename":"file with colon: in the name","type":"empty"},{"filename":"fnditer.py","type":"ASCII text"},{"filename":"fortiweb-docker.sh","type":"Bourne-Again shell script text executable, ASCII text"},{"filename":"ftmgr.sh","type":"Bourne-Again shell script text executable, ASCII text"},{"filename":"git","type":"directory"},{"filename":"google-cloud-sdk","type":"directory"},{"filename":"ipaddr.json","type":"ASCII text, with very long lines"},{"filename":"jc-1.16.0-linux-x86_64.tar.gz","type":"gzip compressed data, last modified: Tue Jul 20 17:32:10 2021, from Unix, original size 93992960"},{"filename":"jc-1.16.1-linux-x86_64.tar.gz","type":"gzip compressed data, last modified: Tue Aug 17 20:42:30 2021, from Unix, original size 94033920"},{"filename":"jc-1.17.0-linux-x86_64.sha256","type":"ASCII text"},{"filename":"jc-1.17.0-linux-x86_64.tar.gz","type":"gzip compressed data, last modified: Sun Sep 26 22:16:00 2021, from Unix, original size 94136320"},{"filename":"jc-jq-jp-ps-cpu.gif","type":"GIF image data, version 89a, 1572 x 1212"},{"filename":"jc-jq-jp-uptime-small.gif","type":"GIF image data, version 89a, 800 x 617"},{"filename":"jc-jq-jp-uptime.gif","type":"GIF image data, version 89a, 1572 x 1212"},{"filename":"jcparsers.jlines","type":"ASCII text, with very long lines"},{"filename":"jcparsers.json","type":"ASCII text, with very long lines"},{"filename":"jello-1.4.0-linux-x86_64.tar.gz","type":"gzip compressed data, last modified: Sat Jun 19 18:19:55 2021, from Unix, original size 92006400"},{"filename":"jello-1.4.4-linux-x86_64.tar.gz","type":"gzip compressed data, last modified: Fri Jun 25 08:53:06 2021, from Unix, original size 92016640"},{"filename":"jp","type":"Mach-O 64-bit executable x86_64"},{"filename":"json-stream.py","type":"Python script text executable, ASCII text"},{"filename":"json-stream2.py","type":"Python script text executable, ASCII text"},{"filename":"jupyter.sh","type":"ASCII text"},{"filename":"kb-fortinet.pem","type":"PEM RSA private key"},{"filename":"kelly-aws.pem","type":"PEM RSA private key"},{"filename":"kelly-aws2.pem","type":"PEM RSA private key"},{"filename":"kellytest.sh","type":"Bourne-Again shell script text executable, ASCII text"},{"filename":"kping.out","type":"ASCII text"},{"filename":"loadplot.sh","type":"Bourne-Again shell script text executable, ASCII text"},{"filename":"ls.jlines","type":"UTF-8 Unicode text, with very long lines"},{"filename":"myrecording","type":"data"},{"filename":"nse-certs","type":"directory"},{"filename":"ping1.out","type":"ASCII text"},{"filename":"reading.py","type":"Python script text executable, ASCII text"},{"filename":"rpmbuild","type":"directory"},{"filename":"scroll.py","type":"Python script text executable, ASCII text"},{"filename":"state_test.py","type":"Python script text executable, ASCII text"},{"filename":"stream-profiling.xlsx","type":"Microsoft Excel 2007+"},{"filename":"test-certs.xls","type":"HTML document text, ISO-8859 text, with very long lines"},{"filename":"test-certs.xlsx","type":"Microsoft Excel 2007+"},{"filename":"test-output","type":"directory"},{"filename":"testcsv.csv","type":"ASCII text, with CR, LF line terminators"},{"filename":"testedit.py","type":"Python script text executable, ASCII text"},{"filename":"testfile.json","type":"ASCII text"},{"filename":"testfile.kb","type":"ASCII text"},{"filename":"testfile2.json","type":"ASCII text"},{"filename":"tr2dot.py","type":"Python script text executable, ASCII text"},{"filename":"traceroute.gv","type":"ASCII text"},{"filename":"traceroute.gv.pdf","type":"PDF document, version 1.5"},{"filename":"trtwitter.out","type":"ASCII text"},{"filename":"twitterdata.jlines","type":"HTML document text, ASCII text, with very long lines"},{"filename":"ubuntuserial.sh","type":"Bourne-Again shell script text executable, UTF-8 Unicode text"},{"filename":"utils","type":"directory"},{"filename":"win32.csv","type":"ASCII text, with CRLF, LF line terminators"}] diff --git a/tests/fixtures/osx-10.14.6/file3.out b/tests/fixtures/osx-10.14.6/file3.out new file mode 100644 index 00000000..19061c5a --- /dev/null +++ b/tests/fixtures/osx-10.14.6/file3.out @@ -0,0 +1,72 @@ +Applications: directory +Desktop: directory +Documents: directory +Downloads: directory +Library: directory +Movies: directory +Music: directory +Pictures: directory +Postman: directory +Public: directory +Virtual Machines.localized: directory +ansible: directory +api: directory +centosserial.sh: Bourne-Again shell script text executable, UTF-8 Unicode text +coreosserial.sh: Bourne-Again shell script text executable, UTF-8 Unicode text +data.json: UTF-8 Unicode text, with very long lines +fazserial.sh: Bourne-Again shell script text executable, UTF-8 Unicode text +fgt1serial.sh: Bourne-Again shell script text executable, UTF-8 Unicode text +fgt2serial.sh: Bourne-Again shell script text executable, UTF-8 Unicode text +file with colon: in the name: empty +fnditer.py: ASCII text +fortiweb-docker.sh: Bourne-Again shell script text executable, ASCII text +ftmgr.sh: Bourne-Again shell script text executable, ASCII text +git: directory +google-cloud-sdk: directory +ipaddr.json: ASCII text, with very long lines +jc-1.16.0-linux-x86_64.tar.gz: gzip compressed data, last modified: Tue Jul 20 17:32:10 2021, from Unix, original size 93992960 +jc-1.16.1-linux-x86_64.tar.gz: gzip compressed data, last modified: Tue Aug 17 20:42:30 2021, from Unix, original size 94033920 +jc-1.17.0-linux-x86_64.sha256: ASCII text +jc-1.17.0-linux-x86_64.tar.gz: gzip compressed data, last modified: Sun Sep 26 22:16:00 2021, from Unix, original size 94136320 +jc-jq-jp-ps-cpu.gif: GIF image data, version 89a, 1572 x 1212 +jc-jq-jp-uptime-small.gif: GIF image data, version 89a, 800 x 617 +jc-jq-jp-uptime.gif: GIF image data, version 89a, 1572 x 1212 +jcparsers.jlines: ASCII text, with very long lines +jcparsers.json: ASCII text, with very long lines +jello-1.4.0-linux-x86_64.tar.gz: gzip compressed data, last modified: Sat Jun 19 18:19:55 2021, from Unix, original size 92006400 +jello-1.4.4-linux-x86_64.tar.gz: gzip compressed data, last modified: Fri Jun 25 08:53:06 2021, from Unix, original size 92016640 +jp: Mach-O 64-bit executable x86_64 +json-stream.py: Python script text executable, ASCII text +json-stream2.py: Python script text executable, ASCII text +jupyter.sh: ASCII text +kb-fortinet.pem: PEM RSA private key +kelly-aws.pem: PEM RSA private key +kelly-aws2.pem: PEM RSA private key +kellytest.sh: Bourne-Again shell script text executable, ASCII text +kping.out: ASCII text +loadplot.sh: Bourne-Again shell script text executable, ASCII text +ls.jlines: UTF-8 Unicode text, with very long lines +myrecording: data +nse-certs: directory +ping1.out: ASCII text +reading.py: Python script text executable, ASCII text +rpmbuild: directory +scroll.py: Python script text executable, ASCII text +state_test.py: Python script text executable, ASCII text +stream-profiling.xlsx: Microsoft Excel 2007+ +test-certs.xls: HTML document text, ISO-8859 text, with very long lines +test-certs.xlsx: Microsoft Excel 2007+ +test-output: directory +testcsv.csv: ASCII text, with CR, LF line terminators +testedit.py: Python script text executable, ASCII text +testfile.json: ASCII text +testfile.kb: ASCII text +testfile2.json: ASCII text +tr2dot.py: Python script text executable, ASCII text +traceroute.gv: ASCII text +traceroute.gv.pdf: PDF document, version 1.5 +trtwitter.out: ASCII text +twitterdata.jlines: HTML document text, ASCII text, with very long lines +ubuntuserial.sh: Bourne-Again shell script text executable, UTF-8 Unicode text +utils: directory +win32.csv: ASCII text, with CRLF, LF line terminators diff --git a/tests/test_file.py b/tests/test_file.py index a8df77cf..e0e76e9f 100644 --- a/tests/test_file.py +++ b/tests/test_file.py @@ -22,6 +22,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/osx-10.14.6/file2.out'), 'r', encoding='utf-8') as f: self.osx_10_14_6_file2 = f.read() + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/osx-10.14.6/file3.out'), 'r', encoding='utf-8') as f: + self.osx_10_14_6_file3 = f.read() + # output with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/centos-7.7/file.json'), 'r', encoding='utf-8') as f: self.centos_7_7_file_json = json.loads(f.read()) @@ -35,6 +38,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/osx-10.14.6/file2.json'), 'r', encoding='utf-8') as f: self.osx_10_14_6_file2_json = json.loads(f.read()) + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/osx-10.14.6/file3.json'), 'r', encoding='utf-8') as f: + self.osx_10_14_6_file3_json = json.loads(f.read()) + def test_file_nodata(self): """ Test 'file' with no data @@ -65,6 +71,12 @@ class MyTests(unittest.TestCase): """ self.assertEqual(jc.parsers.file.parse(self.osx_10_14_6_file2, quiet=True), self.osx_10_14_6_file2_json) + def test_file3_osx_10_14_6(self): + """ + Test 'file *' with gzip filetpe descriptions including ': ' on OSX 10.14.6 + """ + self.assertEqual(jc.parsers.file.parse(self.osx_10_14_6_file3, quiet=True), self.osx_10_14_6_file3_json) + if __name__ == '__main__': unittest.main()