1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-23 00:29:59 +02:00

add more processing and timestamp

This commit is contained in:
Kelly Brazil
2022-11-21 09:27:21 -08:00
parent 1e7e22330f
commit 9c8fe80d6d

View File

@ -2,9 +2,18 @@
This parser will handle the Common Log Format standard as specified at This parser will handle the Common Log Format standard as specified at
https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format. https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format.
Combined Log Format is also supported. (Referer and User Agent fields added)
Extra fields may be present and will be enclosed in the `extra` field as Extra fields may be present and will be enclosed in the `extra` field as
a single string. a single string.
The `epoch` calculated timestamp field is naive. (i.e. based on the
local time of the system the parser is run on)
The `epoch_utc` calculated timestamp field is timezone-aware and is
only available if the timezone field is UTC.
Usage (cli): Usage (cli):
$ cat file.log | jc --clf $ cat file.log | jc --clf
@ -16,6 +25,8 @@ Usage (module):
Schema: Schema:
Empty strings and `-` values are converted to `null`/`None`.
[ [
{ {
"host": string, "host": string,
@ -35,6 +46,8 @@ Schema:
"request_version": string, "request_version": string,
"status": integer, "status": integer,
"bytes": integer, "bytes": integer,
"referer": string,
"user_agent": string,
"extra": string, "extra": string,
"epoch": integer, # [0] "epoch": integer, # [0]
"epoch_utc": integer # [1] "epoch_utc": integer # [1]
@ -61,7 +74,7 @@ import jc.utils
class info(): class info():
"""Provides parser metadata (version, author, etc.)""" """Provides parser metadata (version, author, etc.)"""
version = '1.0' version = '1.0'
description = 'Common Log Format file parser' description = 'Common and Combined Log Format file parser'
author = 'Kelly Brazil' author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com' author_email = 'kellyjonbrazil@gmail.com'
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd'] compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
@ -86,8 +99,20 @@ def _process(proc_data: List[JSONDictType]) -> List[JSONDictType]:
for log in proc_data: for log in proc_data:
for key, val in log.items(): for key, val in log.items():
# integer conversions
if key in int_list: if key in int_list:
log[key] = int(val) log[key] = jc.utils.convert_to_int(val)
# convert `-` and blank values to None
if val == '-' or val == '':
log[key] = None
# add unix timestamps
if 'date' in log:
ts = jc.utils.timestamp(log['date'], format_hint=(1800,)) # type: ignore
log['epoch'] = ts.naive
log['epoch_utc'] = ts.utc
return proc_data return proc_data
@ -135,6 +160,8 @@ def parse(
(?P<request_version>HTTPS?/\d\.\d)?)\"\s (?P<request_version>HTTPS?/\d\.\d)?)\"\s
(?P<status>-|\d\d\d)\s (?P<status>-|\d\d\d)\s
(?P<bytes>-|\d+)\s? (?P<bytes>-|\d+)\s?
\"(?P<referer>.*?)\"\s?
\"(?P<user_agent>.*?)\"\s?
(?P<extra>.*) (?P<extra>.*)
''', re.VERBOSE ''', re.VERBOSE
) )