From edff49a44ff38589a884042fe9ab930ef212a543 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Thu, 11 Aug 2022 13:29:53 -0700 Subject: [PATCH] add syslog parsers --- CHANGELOG | 1 + jc/lib.py | 2 + jc/parsers/syslog_3164.py | 126 +++++++++++++++++++++++++++++++++ jc/parsers/syslog_5424.py | 142 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+) create mode 100644 jc/parsers/syslog_3164.py create mode 100644 jc/parsers/syslog_5424.py diff --git a/CHANGELOG b/CHANGELOG index 2ee8d2e5..f6020246 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ jc changelog xxxxxxxx v1.20.5 - Add IP Address string parser +- Add Syslog string parsers (RFC 3164 and RFC 5424) - Add CEF string parser - Add PLIST file parser (XML and binary support) - Add `mdadm` command parser tested on linux (IN PROGRESS) diff --git a/jc/lib.py b/jc/lib.py index 955b4706..1c2a0797 100644 --- a/jc/lib.py +++ b/jc/lib.py @@ -94,6 +94,8 @@ parsers = [ 'stat', 'stat-s', 'sysctl', + 'syslog-3164', + 'syslog-5424', 'systemctl', 'systemctl-lj', 'systemctl-ls', diff --git a/jc/parsers/syslog_3164.py b/jc/parsers/syslog_3164.py new file mode 100644 index 00000000..dd6834b2 --- /dev/null +++ b/jc/parsers/syslog_3164.py @@ -0,0 +1,126 @@ +"""jc - JSON Convert Syslog RFC 3164 string parser + +<> + +Usage (cli): + + $ syslog-3164 | jc --syslog-3164 + + or + + $ jc syslog-3164 + +Usage (module): + + import jc + result = jc.parse('syslog_3164', syslog_command_output) + +Schema: + + [ + { + "syslog-3164": string, + "bar": boolean, + "baz": integer + } + ] + +Examples: + + $ syslog-3164 | jc --syslog-3164 -p + [] + + $ syslog-3164 | jc --syslog-3164 -p -r + [] +""" +import re +from typing import List, Dict +import jc.utils + + +class info(): + """Provides parser metadata (version, author, etc.)""" + version = '1.0' + description = 'Syslog RFC 3164 string parser' + author = 'Kelly Brazil' + author_email = 'kellyjonbrazil@gmail.com' + compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd'] + +__version__ = info.version + + +def _process(proc_data: List[Dict]) -> List[Dict]: + """ + Final processing to conform to the schema. + + Parameters: + + proc_data: (List of Dictionaries) raw structured data to process + + Returns: + + List of Dictionaries. Structured to conform to the schema. + """ + + # process the data here + # rebuild output for added semantic information + # use helper functions in jc.utils for int, float, bool + # conversions and timestamps + + return proc_data + + +def parse( + data: str, + raw: bool = False, + quiet: bool = False +) -> List[Dict]: + """ + Main text parsing function + + Parameters: + + data: (string) text data to parse + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + + Returns: + + List of Dictionaries. Raw or processed structured data. + """ + jc.utils.compatibility(__name__, info.compatible, quiet) + jc.utils.input_type_check(data) + + raw_output: List = [] + + # inspired by https://gist.github.com/miticojo/b16bb13e78572c2d2fac82d9516d5c32 + syslog = re.compile(r''' + (?P<\d*>)? + (?P[A-Z][a-z][a-z]\s{1,2}\d{1,2}\s\d{2}?:\d{2}:\d{2})\s + (?P[\w][\w\d\.@-]*)\s + (?P[\w\d\[\]\.@-]+):?\s + (?P.*) + ''', re.VERBOSE + ) + + if jc.utils.has_data(data): + + for line in filter(None, data.splitlines()): + syslog_match = syslog.match(line) + if syslog_match: + priority = None + if syslog_match.group('priority'): + priority = syslog_match.group('priority')[1:-1] + + syslog_dict = { + 'priority': priority, + 'date': syslog_match.group('date'), + 'hostname': syslog_match.group('host'), + 'tag': syslog_match.group('tag'), + 'message': syslog_match.group('message') + } + + if syslog_dict: + raw_output.append(syslog_dict) + + return raw_output if raw else _process(raw_output) diff --git a/jc/parsers/syslog_5424.py b/jc/parsers/syslog_5424.py new file mode 100644 index 00000000..9fa646a2 --- /dev/null +++ b/jc/parsers/syslog_5424.py @@ -0,0 +1,142 @@ +"""jc - JSON Convert Syslog RFC 5424 string parser + +<> + +Usage (cli): + + $ syslog-5424 | jc --syslog-5424 + + or + + $ jc syslog-5424 + +Usage (module): + + import jc + result = jc.parse('syslog_5424', syslog_command_output) + +Schema: + + [ + { + "syslog-5424": string, + "bar": boolean, + "baz": integer + } + ] + +Examples: + + $ syslog-5424 | jc --syslog-5424 -p + [] + + $ syslog-5424 | jc --syslog-5424 -p -r + [] +""" +import re +from typing import List, Dict +import jc.utils + + +class info(): + """Provides parser metadata (version, author, etc.)""" + version = '1.0' + description = 'Syslog RFC 5424 string parser' + author = 'Kelly Brazil' + author_email = 'kellyjonbrazil@gmail.com' + compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd'] + +__version__ = info.version + + +def _process(proc_data: List[Dict]) -> List[Dict]: + """ + Final processing to conform to the schema. + + Parameters: + + proc_data: (List of Dictionaries) raw structured data to process + + Returns: + + List of Dictionaries. Structured to conform to the schema. + """ + + # process the data here + # rebuild output for added semantic information + # use helper functions in jc.utils for int, float, bool + # conversions and timestamps + + return proc_data + + +def parse( + data: str, + raw: bool = False, + quiet: bool = False +) -> List[Dict]: + """ + Main text parsing function + + Parameters: + + data: (string) text data to parse + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + + Returns: + + List of Dictionaries. Raw or processed structured data. + """ + jc.utils.compatibility(__name__, info.compatible, quiet) + jc.utils.input_type_check(data) + + raw_output: List = [] + syslog_dict = {} + + # inspired by https://regex101.com/library/Wgbxn2 + syslog = re.compile(r''' + (?P<(\d|\d{2}|1[1-8]\d|19[01])>)? + (?P\d{1,2})?\s* + (?P-|(?P[12]\d{3})- + (?P0\d|[1][012])- + (?P[012]\d|3[01])T + (?P[01]\d|2[0-4]): + (?P[0-5]\d): + (?P[0-5]\d|60)(?#60seconds can be used for leap year!)(?:\. + (?P\d{1,6}))? + (?PZ|[+-]\d{2}:\d{2})(?#=timezone))\s + (?P[\S]{1,255})\s + (?P[\S]{1,48})\s + (?P[\S]{1,128})\s + (?P[\S]{1,32})\s + (?P-|(?:\[.+?(?.+))? + ''', re.VERBOSE + ) + + if jc.utils.has_data(data): + + for line in filter(None, data.splitlines()): + syslog_match = syslog.match(line) + if syslog_match: + priority = None + if syslog_match.group('priority'): + priority = syslog_match.group('priority')[1:-1] + + syslog_dict = { + 'priority': priority, + 'version': syslog_match.group('version'), + 'timestamp': syslog_match.group('timestamp'), + 'hostname': syslog_match.group('hostname'), + 'appname': syslog_match.group('appname'), + 'proc_id': syslog_match.group('procid'), + 'msg_id': syslog_match.group('msgid'), + 'struct': syslog_match.group('structureddata'), + 'message': syslog_match.group('msg') + } + + if syslog_dict: + raw_output.append(syslog_dict) + + return raw_output if raw else _process(raw_output)