From 975b4f5e4ffd5bdf557a8e65ce24015e5e3bc81e Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Tue, 22 Nov 2022 13:10:15 -0800 Subject: [PATCH] add clf-s parser tests --- jc/parsers/clf_s.py | 2 +- .../generic/common-log-format-streaming.json | 1 + tests/test_clf_s.py | 50 +++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/generic/common-log-format-streaming.json create mode 100644 tests/test_clf_s.py diff --git a/jc/parsers/clf_s.py b/jc/parsers/clf_s.py index 5a5b2a55..6fc08313 100644 --- a/jc/parsers/clf_s.py +++ b/jc/parsers/clf_s.py @@ -197,7 +197,7 @@ def parse( streaming_line_input_type_check(line) output_line: Dict = {} - if line == '' or line == '\n': + if not line.strip(): continue clf_match = re.match(clf_pattern, line) diff --git a/tests/fixtures/generic/common-log-format-streaming.json b/tests/fixtures/generic/common-log-format-streaming.json new file mode 100644 index 00000000..0f0d58f2 --- /dev/null +++ b/tests/fixtures/generic/common-log-format-streaming.json @@ -0,0 +1 @@ +[{"host":"127.0.0.1","ident":"user-identifier","authuser":"frank","date":"10/Oct/2000:13:55:36 -0700","day":10,"month":"Oct","year":2000,"hour":13,"minute":55,"second":36,"tz":"-0700","request":"GET /apache_pb.gif HTTPS/1.0","status":200,"bytes":2326,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/apache_pb.gif","request_version":"HTTPS/1.0","epoch":971211336,"epoch_utc":null},{"host":"1.1.1.2","ident":null,"authuser":null,"date":"11/Nov/2016:03:04:55 +0100","day":11,"month":"Nov","year":2016,"hour":3,"minute":4,"second":55,"tz":"+0100","request":"GET /","status":200,"bytes":83,"referer":null,"user_agent":null,"extra":"- 9221 1.1.1.1","request_method":"GET","request_url":"/","request_version":null,"epoch":1478862295,"epoch_utc":null},{"host":"127.0.0.1","ident":null,"authuser":null,"date":"11/Nov/2016:14:24:21 +0100","day":11,"month":"Nov","year":2016,"hour":14,"minute":24,"second":21,"tz":"+0100","request":"GET /uno dos","status":404,"bytes":298,"referer":null,"user_agent":null,"extra":"- 400233 1.1.1.1","request_method":"GET","request_url":"/uno dos","request_version":null,"epoch":1478903061,"epoch_utc":null},{"host":"127.0.0.1","ident":null,"authuser":null,"date":"11/Nov/2016:14:23:37 +0100","day":11,"month":"Nov","year":2016,"hour":14,"minute":23,"second":37,"tz":"+0100","request":"GET /uno dos HTTP/1.0","status":404,"bytes":298,"referer":null,"user_agent":null,"extra":"- 385111 1.1.1.1","request_method":"GET","request_url":"/uno dos","request_version":"HTTP/1.0","epoch":1478903017,"epoch_utc":null},{"host":"1.1.1.1","ident":null,"authuser":null,"date":"11/Nov/2016:00:00:11 +0100","day":11,"month":"Nov","year":2016,"hour":0,"minute":0,"second":11,"tz":"+0100","request":"GET /icc HTTP/1.1","status":302,"bytes":null,"referer":null,"user_agent":"XXX XXX XXX","extra":"- 6160 11.1.1.1","request_method":"GET","request_url":"/icc","request_version":"HTTP/1.1","epoch":1478851211,"epoch_utc":null},{"host":"1.1.1.1","ident":null,"authuser":null,"date":"11/Nov/2016:00:00:11 +0100","day":11,"month":"Nov","year":2016,"hour":0,"minute":0,"second":11,"tz":"+0100","request":"GET /icc/ HTTP/1.1","status":302,"bytes":null,"referer":null,"user_agent":"XXX XXX XXX","extra":"- 2981 1.1.1.1","request_method":"GET","request_url":"/icc/","request_version":"HTTP/1.1","epoch":1478851211,"epoch_utc":null},{"unparsable":"unparsable line"},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:55 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":55,"tz":"+0000","request":"GET index.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"index.htm","request_version":"HTTP/1.0","epoch":821507875,"epoch_utc":821479075},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:56 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":56,"tz":"+0000","request":"POST products.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"POST","request_url":"products.htm","request_version":"HTTP/1.0","epoch":821507876,"epoch_utc":821479076},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:57 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":57,"tz":"+0000","request":"PUT sales.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"PUT","request_url":"sales.htm","request_version":"HTTP/1.0","epoch":821507877,"epoch_utc":821479077},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:58 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":58,"tz":"+0000","request":"GET /images/log.gif HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/images/log.gif","request_version":"HTTP/1.0","epoch":821507878,"epoch_utc":821479078},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:59 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":59,"tz":"+0000","request":"GET /buttons/form.gif HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/buttons/form.gif","request_version":"HTTP/1.0","epoch":821507879,"epoch_utc":821479079},{"host":"66.249.66.1","ident":null,"authuser":null,"date":"01/Jan/2017:09:00:00 +0000","day":1,"month":"Jan","year":2017,"hour":9,"minute":0,"second":0,"tz":"+0000","request":"GET /contact.html HTTP/1.1","status":200,"bytes":250,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/contact.html","request_version":"HTTP/1.1","epoch":1483290000,"epoch_utc":1483261200},{"unparsable":"another unparsable line"},{"host":"66.249.66.1","ident":null,"authuser":null,"date":"01/Jan/2017:09:00:00 +0000","day":1,"month":"Jan","year":2017,"hour":9,"minute":0,"second":0,"tz":"+0000","request":"GET /contact.html HTTP/1.1","status":200,"bytes":250,"referer":"http://www.example.com/","user_agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","extra":null,"request_method":"GET","request_url":"/contact.html","request_version":"HTTP/1.1","epoch":1483290000,"epoch_utc":1483261200},{"host":"127.0.0.1","ident":null,"authuser":"frank","date":"10/Oct/2000:13:55:36 -0700","day":10,"month":"Oct","year":2000,"hour":13,"minute":55,"second":36,"tz":"-0700","request":"GET /apache_pb.gif HTTP/1.0","status":200,"bytes":2326,"referer":"http://www.example.com/start.html","user_agent":"Mozilla/4.08 [en] (Win98; I ;Nav)","extra":null,"request_method":"GET","request_url":"/apache_pb.gif","request_version":"HTTP/1.0","epoch":971211336,"epoch_utc":null},{"host":"jay.bird.com","ident":null,"authuser":"fred","date":"25/Dec/1998:17:45:35 +0000","day":25,"month":"Dec","year":1998,"hour":17,"minute":45,"second":35,"tz":"+0000","request":"GET /~sret1/ HTTP/1.0","status":200,"bytes":1243,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/~sret1/","request_version":"HTTP/1.0","epoch":914636735,"epoch_utc":914607935},{"host":"127.0.0.1","ident":null,"authuser":"peter","date":"9/Feb/2017:10:34:12 -0700","day":9,"month":"Feb","year":2017,"hour":10,"minute":34,"second":12,"tz":"-0700","request":"GET /sample-image.png HTTP/2","status":200,"bytes":1479,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/sample-image.png","request_version":"HTTP/2","epoch":1486665252,"epoch_utc":null},{"host":"10.1.2.3","ident":null,"authuser":"rehg","date":"10/Nov/2021:19:22:12 -0000","day":10,"month":"Nov","year":2021,"hour":19,"minute":22,"second":12,"tz":"-0000","request":"GET /sematext.png HTTP/1.1","status":200,"bytes":3423,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/sematext.png","request_version":"HTTP/1.1","epoch":1636600932,"epoch_utc":1636572132}] diff --git a/tests/test_clf_s.py b/tests/test_clf_s.py new file mode 100644 index 00000000..ace95fcc --- /dev/null +++ b/tests/test_clf_s.py @@ -0,0 +1,50 @@ +import os +import json +import unittest +from typing import Dict +from jc.parsers.clf_s import parse + +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) + +# To create streaming output use: +# $ cat clf.out | jc --clf-s | jello -c > clf-streaming.json + + +class MyTests(unittest.TestCase): + f_in: Dict = {} + f_json: Dict = {} + + @classmethod + def setUpClass(cls): + fixtures = { + 'clf_s': ( + 'fixtures/generic/common-log-format.log', + 'fixtures/generic/common-log-format-streaming.json') + } + + for file, filepaths in fixtures.items(): + with open(os.path.join(THIS_DIR, filepaths[0]), 'r', encoding='utf-8') as a, \ + open(os.path.join(THIS_DIR, filepaths[1]), 'r', encoding='utf-8') as b: + cls.f_in[file] = a.read() + cls.f_json[file] = json.loads(b.read()) + + + def test_clf_s_nodata(self): + """ + Test 'clf-s' with no data + """ + self.assertEqual(list(parse([], quiet=True)), []) + + + def test_clf_s_centos_7_7(self): + """ + Test 'clf-s' with various logs + """ + self.assertEqual( + list(parse(self.f_in['clf_s'].splitlines(), quiet=True)), + self.f_json['clf_s'] + ) + + +if __name__ == '__main__': + unittest.main()