diff --git a/docs/parsers/clf.md b/docs/parsers/clf.md index 37fe88e8..bb8cc599 100644 --- a/docs/parsers/clf.md +++ b/docs/parsers/clf.md @@ -13,6 +13,9 @@ Combined Log Format is also supported. (Referer and User Agent fields added) Extra fields may be present and will be enclosed in the `extra` field as a single string. +If a log line cannot be parsed, an object with an `unparsable` field will +be present with a value of the original line. + The `epoch` calculated timestamp field is naive. (i.e. based on the local time of the system the parser is run on) @@ -56,11 +59,13 @@ Empty strings and `-` values are converted to `null`/`None`. "extra": string, "epoch": integer, # [0] "epoch_utc": integer # [1] + "unparsable": string # [2] } ] [0] naive timestamp [1] timezone-aware timestamp. Only available if timezone field is UTC + [2] exists if the line was not able to be parsed Examples: diff --git a/jc/parsers/clf.py b/jc/parsers/clf.py index ef054142..5891182c 100644 --- a/jc/parsers/clf.py +++ b/jc/parsers/clf.py @@ -8,6 +8,9 @@ Combined Log Format is also supported. (Referer and User Agent fields added) Extra fields may be present and will be enclosed in the `extra` field as a single string. +If a log line cannot be parsed, an object with an `unparsable` field will +be present with a value of the original line. + The `epoch` calculated timestamp field is naive. (i.e. based on the local time of the system the parser is run on) @@ -51,11 +54,13 @@ Empty strings and `-` values are converted to `null`/`None`. "extra": string, "epoch": integer, # [0] "epoch_utc": integer # [1] + "unparsable": string # [2] } ] [0] naive timestamp [1] timezone-aware timestamp. Only available if timezone field is UTC + [2] exists if the line was not able to be parsed Examples: @@ -189,4 +194,9 @@ def parse( raw_output.append(output_line) + else: + raw_output.append( + {"unparsable": line} + ) + return raw_output if raw else _process(raw_output) diff --git a/tests/fixtures/generic/common-log-format.json b/tests/fixtures/generic/common-log-format.json index 397cd84a..0f0d58f2 100644 --- a/tests/fixtures/generic/common-log-format.json +++ b/tests/fixtures/generic/common-log-format.json @@ -1 +1 @@ -[{"host":"127.0.0.1","ident":"user-identifier","authuser":"frank","date":"10/Oct/2000:13:55:36 -0700","day":10,"month":"Oct","year":2000,"hour":13,"minute":55,"second":36,"tz":"-0700","request":"GET /apache_pb.gif HTTPS/1.0","status":200,"bytes":2326,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/apache_pb.gif","request_version":"HTTPS/1.0","epoch":971211336,"epoch_utc":null},{"host":"1.1.1.2","ident":null,"authuser":null,"date":"11/Nov/2016:03:04:55 +0100","day":11,"month":"Nov","year":2016,"hour":3,"minute":4,"second":55,"tz":"+0100","request":"GET /","status":200,"bytes":83,"referer":null,"user_agent":null,"extra":"- 9221 1.1.1.1","request_method":"GET","request_url":"/","request_version":null,"epoch":1478862295,"epoch_utc":null},{"host":"127.0.0.1","ident":null,"authuser":null,"date":"11/Nov/2016:14:24:21 +0100","day":11,"month":"Nov","year":2016,"hour":14,"minute":24,"second":21,"tz":"+0100","request":"GET /uno dos","status":404,"bytes":298,"referer":null,"user_agent":null,"extra":"- 400233 1.1.1.1","request_method":"GET","request_url":"/uno dos","request_version":null,"epoch":1478903061,"epoch_utc":null},{"host":"127.0.0.1","ident":null,"authuser":null,"date":"11/Nov/2016:14:23:37 +0100","day":11,"month":"Nov","year":2016,"hour":14,"minute":23,"second":37,"tz":"+0100","request":"GET /uno dos HTTP/1.0","status":404,"bytes":298,"referer":null,"user_agent":null,"extra":"- 385111 1.1.1.1","request_method":"GET","request_url":"/uno dos","request_version":"HTTP/1.0","epoch":1478903017,"epoch_utc":null},{"host":"1.1.1.1","ident":null,"authuser":null,"date":"11/Nov/2016:00:00:11 +0100","day":11,"month":"Nov","year":2016,"hour":0,"minute":0,"second":11,"tz":"+0100","request":"GET /icc HTTP/1.1","status":302,"bytes":null,"referer":null,"user_agent":"XXX XXX XXX","extra":"- 6160 11.1.1.1","request_method":"GET","request_url":"/icc","request_version":"HTTP/1.1","epoch":1478851211,"epoch_utc":null},{"host":"1.1.1.1","ident":null,"authuser":null,"date":"11/Nov/2016:00:00:11 +0100","day":11,"month":"Nov","year":2016,"hour":0,"minute":0,"second":11,"tz":"+0100","request":"GET /icc/ HTTP/1.1","status":302,"bytes":null,"referer":null,"user_agent":"XXX XXX XXX","extra":"- 2981 1.1.1.1","request_method":"GET","request_url":"/icc/","request_version":"HTTP/1.1","epoch":1478851211,"epoch_utc":null},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:55 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":55,"tz":"+0000","request":"GET index.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"index.htm","request_version":"HTTP/1.0","epoch":821507875,"epoch_utc":821479075},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:56 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":56,"tz":"+0000","request":"POST products.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"POST","request_url":"products.htm","request_version":"HTTP/1.0","epoch":821507876,"epoch_utc":821479076},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:57 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":57,"tz":"+0000","request":"PUT sales.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"PUT","request_url":"sales.htm","request_version":"HTTP/1.0","epoch":821507877,"epoch_utc":821479077},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:58 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":58,"tz":"+0000","request":"GET /images/log.gif HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/images/log.gif","request_version":"HTTP/1.0","epoch":821507878,"epoch_utc":821479078},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:59 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":59,"tz":"+0000","request":"GET /buttons/form.gif HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/buttons/form.gif","request_version":"HTTP/1.0","epoch":821507879,"epoch_utc":821479079},{"host":"66.249.66.1","ident":null,"authuser":null,"date":"01/Jan/2017:09:00:00 +0000","day":1,"month":"Jan","year":2017,"hour":9,"minute":0,"second":0,"tz":"+0000","request":"GET /contact.html HTTP/1.1","status":200,"bytes":250,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/contact.html","request_version":"HTTP/1.1","epoch":1483290000,"epoch_utc":1483261200},{"host":"66.249.66.1","ident":null,"authuser":null,"date":"01/Jan/2017:09:00:00 +0000","day":1,"month":"Jan","year":2017,"hour":9,"minute":0,"second":0,"tz":"+0000","request":"GET /contact.html HTTP/1.1","status":200,"bytes":250,"referer":"http://www.example.com/","user_agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","extra":null,"request_method":"GET","request_url":"/contact.html","request_version":"HTTP/1.1","epoch":1483290000,"epoch_utc":1483261200},{"host":"127.0.0.1","ident":null,"authuser":"frank","date":"10/Oct/2000:13:55:36 -0700","day":10,"month":"Oct","year":2000,"hour":13,"minute":55,"second":36,"tz":"-0700","request":"GET /apache_pb.gif HTTP/1.0","status":200,"bytes":2326,"referer":"http://www.example.com/start.html","user_agent":"Mozilla/4.08 [en] (Win98; I ;Nav)","extra":null,"request_method":"GET","request_url":"/apache_pb.gif","request_version":"HTTP/1.0","epoch":971211336,"epoch_utc":null},{"host":"jay.bird.com","ident":null,"authuser":"fred","date":"25/Dec/1998:17:45:35 +0000","day":25,"month":"Dec","year":1998,"hour":17,"minute":45,"second":35,"tz":"+0000","request":"GET /~sret1/ HTTP/1.0","status":200,"bytes":1243,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/~sret1/","request_version":"HTTP/1.0","epoch":914636735,"epoch_utc":914607935},{"host":"127.0.0.1","ident":null,"authuser":"peter","date":"9/Feb/2017:10:34:12 -0700","day":9,"month":"Feb","year":2017,"hour":10,"minute":34,"second":12,"tz":"-0700","request":"GET /sample-image.png HTTP/2","status":200,"bytes":1479,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/sample-image.png","request_version":"HTTP/2","epoch":1486665252,"epoch_utc":null},{"host":"10.1.2.3","ident":null,"authuser":"rehg","date":"10/Nov/2021:19:22:12 -0000","day":10,"month":"Nov","year":2021,"hour":19,"minute":22,"second":12,"tz":"-0000","request":"GET /sematext.png HTTP/1.1","status":200,"bytes":3423,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/sematext.png","request_version":"HTTP/1.1","epoch":1636600932,"epoch_utc":1636572132}] +[{"host":"127.0.0.1","ident":"user-identifier","authuser":"frank","date":"10/Oct/2000:13:55:36 -0700","day":10,"month":"Oct","year":2000,"hour":13,"minute":55,"second":36,"tz":"-0700","request":"GET /apache_pb.gif HTTPS/1.0","status":200,"bytes":2326,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/apache_pb.gif","request_version":"HTTPS/1.0","epoch":971211336,"epoch_utc":null},{"host":"1.1.1.2","ident":null,"authuser":null,"date":"11/Nov/2016:03:04:55 +0100","day":11,"month":"Nov","year":2016,"hour":3,"minute":4,"second":55,"tz":"+0100","request":"GET /","status":200,"bytes":83,"referer":null,"user_agent":null,"extra":"- 9221 1.1.1.1","request_method":"GET","request_url":"/","request_version":null,"epoch":1478862295,"epoch_utc":null},{"host":"127.0.0.1","ident":null,"authuser":null,"date":"11/Nov/2016:14:24:21 +0100","day":11,"month":"Nov","year":2016,"hour":14,"minute":24,"second":21,"tz":"+0100","request":"GET /uno dos","status":404,"bytes":298,"referer":null,"user_agent":null,"extra":"- 400233 1.1.1.1","request_method":"GET","request_url":"/uno dos","request_version":null,"epoch":1478903061,"epoch_utc":null},{"host":"127.0.0.1","ident":null,"authuser":null,"date":"11/Nov/2016:14:23:37 +0100","day":11,"month":"Nov","year":2016,"hour":14,"minute":23,"second":37,"tz":"+0100","request":"GET /uno dos HTTP/1.0","status":404,"bytes":298,"referer":null,"user_agent":null,"extra":"- 385111 1.1.1.1","request_method":"GET","request_url":"/uno dos","request_version":"HTTP/1.0","epoch":1478903017,"epoch_utc":null},{"host":"1.1.1.1","ident":null,"authuser":null,"date":"11/Nov/2016:00:00:11 +0100","day":11,"month":"Nov","year":2016,"hour":0,"minute":0,"second":11,"tz":"+0100","request":"GET /icc HTTP/1.1","status":302,"bytes":null,"referer":null,"user_agent":"XXX XXX XXX","extra":"- 6160 11.1.1.1","request_method":"GET","request_url":"/icc","request_version":"HTTP/1.1","epoch":1478851211,"epoch_utc":null},{"host":"1.1.1.1","ident":null,"authuser":null,"date":"11/Nov/2016:00:00:11 +0100","day":11,"month":"Nov","year":2016,"hour":0,"minute":0,"second":11,"tz":"+0100","request":"GET /icc/ HTTP/1.1","status":302,"bytes":null,"referer":null,"user_agent":"XXX XXX XXX","extra":"- 2981 1.1.1.1","request_method":"GET","request_url":"/icc/","request_version":"HTTP/1.1","epoch":1478851211,"epoch_utc":null},{"unparsable":"unparsable line"},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:55 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":55,"tz":"+0000","request":"GET index.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"index.htm","request_version":"HTTP/1.0","epoch":821507875,"epoch_utc":821479075},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:56 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":56,"tz":"+0000","request":"POST products.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"POST","request_url":"products.htm","request_version":"HTTP/1.0","epoch":821507876,"epoch_utc":821479076},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:57 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":57,"tz":"+0000","request":"PUT sales.htm HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"PUT","request_url":"sales.htm","request_version":"HTTP/1.0","epoch":821507877,"epoch_utc":821479077},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:58 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":58,"tz":"+0000","request":"GET /images/log.gif HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/images/log.gif","request_version":"HTTP/1.0","epoch":821507878,"epoch_utc":821479078},{"host":"tarpon.gulf.net","ident":null,"authuser":null,"date":"12/Jan/1996:20:37:59 +0000","day":12,"month":"Jan","year":1996,"hour":20,"minute":37,"second":59,"tz":"+0000","request":"GET /buttons/form.gif HTTP/1.0","status":200,"bytes":215,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/buttons/form.gif","request_version":"HTTP/1.0","epoch":821507879,"epoch_utc":821479079},{"host":"66.249.66.1","ident":null,"authuser":null,"date":"01/Jan/2017:09:00:00 +0000","day":1,"month":"Jan","year":2017,"hour":9,"minute":0,"second":0,"tz":"+0000","request":"GET /contact.html HTTP/1.1","status":200,"bytes":250,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/contact.html","request_version":"HTTP/1.1","epoch":1483290000,"epoch_utc":1483261200},{"unparsable":"another unparsable line"},{"host":"66.249.66.1","ident":null,"authuser":null,"date":"01/Jan/2017:09:00:00 +0000","day":1,"month":"Jan","year":2017,"hour":9,"minute":0,"second":0,"tz":"+0000","request":"GET /contact.html HTTP/1.1","status":200,"bytes":250,"referer":"http://www.example.com/","user_agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","extra":null,"request_method":"GET","request_url":"/contact.html","request_version":"HTTP/1.1","epoch":1483290000,"epoch_utc":1483261200},{"host":"127.0.0.1","ident":null,"authuser":"frank","date":"10/Oct/2000:13:55:36 -0700","day":10,"month":"Oct","year":2000,"hour":13,"minute":55,"second":36,"tz":"-0700","request":"GET /apache_pb.gif HTTP/1.0","status":200,"bytes":2326,"referer":"http://www.example.com/start.html","user_agent":"Mozilla/4.08 [en] (Win98; I ;Nav)","extra":null,"request_method":"GET","request_url":"/apache_pb.gif","request_version":"HTTP/1.0","epoch":971211336,"epoch_utc":null},{"host":"jay.bird.com","ident":null,"authuser":"fred","date":"25/Dec/1998:17:45:35 +0000","day":25,"month":"Dec","year":1998,"hour":17,"minute":45,"second":35,"tz":"+0000","request":"GET /~sret1/ HTTP/1.0","status":200,"bytes":1243,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/~sret1/","request_version":"HTTP/1.0","epoch":914636735,"epoch_utc":914607935},{"host":"127.0.0.1","ident":null,"authuser":"peter","date":"9/Feb/2017:10:34:12 -0700","day":9,"month":"Feb","year":2017,"hour":10,"minute":34,"second":12,"tz":"-0700","request":"GET /sample-image.png HTTP/2","status":200,"bytes":1479,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/sample-image.png","request_version":"HTTP/2","epoch":1486665252,"epoch_utc":null},{"host":"10.1.2.3","ident":null,"authuser":"rehg","date":"10/Nov/2021:19:22:12 -0000","day":10,"month":"Nov","year":2021,"hour":19,"minute":22,"second":12,"tz":"-0000","request":"GET /sematext.png HTTP/1.1","status":200,"bytes":3423,"referer":null,"user_agent":null,"extra":null,"request_method":"GET","request_url":"/sematext.png","request_version":"HTTP/1.1","epoch":1636600932,"epoch_utc":1636572132}] diff --git a/tests/fixtures/generic/common-log-format.log b/tests/fixtures/generic/common-log-format.log index 8ede8a16..bde67856 100644 --- a/tests/fixtures/generic/common-log-format.log +++ b/tests/fixtures/generic/common-log-format.log @@ -4,12 +4,16 @@ 127.0.0.1 - - [11/Nov/2016:14:23:37 +0100] "GET /uno dos HTTP/1.0" 404 298 "-" "-" - 385111 1.1.1.1 1.1.1.1 - - [11/Nov/2016:00:00:11 +0100] "GET /icc HTTP/1.1" 302 - "-" "XXX XXX XXX" - 6160 11.1.1.1 1.1.1.1 - - [11/Nov/2016:00:00:11 +0100] "GET /icc/ HTTP/1.1" 302 - "-" "XXX XXX XXX" - 2981 1.1.1.1 +unparsable line tarpon.gulf.net - - [12/Jan/1996:20:37:55 +0000] "GET index.htm HTTP/1.0" 200 215 tarpon.gulf.net - - [12/Jan/1996:20:37:56 +0000] "POST products.htm HTTP/1.0" 200 215 tarpon.gulf.net - - [12/Jan/1996:20:37:57 +0000] "PUT sales.htm HTTP/1.0" 200 215 tarpon.gulf.net - - [12/Jan/1996:20:37:58 +0000] "GET /images/log.gif HTTP/1.0" 200 215 tarpon.gulf.net - - [12/Jan/1996:20:37:59 +0000] "GET /buttons/form.gif HTTP/1.0" 200 215 66.249.66.1 - - [01/Jan/2017:09:00:00 +0000] "GET /contact.html HTTP/1.1" 200 250 + +another unparsable line + 66.249.66.1 - - [01/Jan/2017:09:00:00 +0000] "GET /contact.html HTTP/1.1" 200 250 "http://www.example.com/" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)" jay.bird.com - fred [25/Dec/1998:17:45:35 +0000] "GET /~sret1/ HTTP/1.0" 200 1243