diff --git a/docs/parsers/cef.md b/docs/parsers/cef.md index a75b3db6..15f602d8 100644 --- a/docs/parsers/cef.md +++ b/docs/parsers/cef.md @@ -16,6 +16,9 @@ Extended fields, as defined in the CEF specification, are relabeled and the values are converted to their respective types. Extra naive and UTC epoch timestamps are added where appropriate per the CEF specification. +A warning message to `STDERR` will be printed if an unparsable line is found +unless `--quiet` or `quiet=True` is used. + To preserve escaping and original keynames and to prevent type conversions use the `--raw` CLI option or `raw=True` param in the `parse()` function. @@ -40,15 +43,17 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors- "deviceProduct": string, "deviceVersion": string, "deviceEventClassId": string, + "deviceEventClassIdNum": integer/null, "name": string, "agentSeverity": string/integer, "agentSeverityString": string, - "agentSeverityNum": integer, + "agentSeverityNum": integer/null, "CEFVersion": integer, string/integer/float, # [0] - "_epoch": integer, # [1] - "_epoch_utc": integer, # [2] - string + "_epoch": integer/null, # [1] + "_epoch_utc": integer/null, # [2] + string, + "unparsable": string # [3] } ] @@ -59,6 +64,8 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors- [2] Timezone-aware calculated epoch timestamp. (UTC only) This value will be null if a UTC timezone cannot be extracted from the original timestamp string value. + [3] this field exists if the CEF line is not parsable. The value + is the original syslog line. Examples: diff --git a/docs/parsers/syslog_bsd_s.md b/docs/parsers/syslog_bsd_s.md index 90217cbd..d7b141c3 100644 --- a/docs/parsers/syslog_bsd_s.md +++ b/docs/parsers/syslog_bsd_s.md @@ -49,10 +49,12 @@ Examples: $ cat syslog.txt | jc --syslog-bsd-s -p {"priority":34,"date":"Oct 11 22:14:15","hostname":"mymachine","t...} + {"priority":34,"date":"Oct 11 22:14:16","hostname":"mymachine","t...} ... $ cat syslog.txt | jc --syslog-bsd-s -p -r {"priority":"34","date":"Oct 11 22:14:15","hostname":"mymachine","...} + {"priority":"34","date":"Oct 11 22:14:16","hostname":"mymachine","...} ... diff --git a/docs/parsers/syslog_s.md b/docs/parsers/syslog_s.md index f2604845..d15b05f7 100644 --- a/docs/parsers/syslog_s.md +++ b/docs/parsers/syslog_s.md @@ -72,10 +72,12 @@ Examples: $ cat syslog.txt | jc --syslog-s -p {"priority":165,"version":1,"timestamp":"2003-08-24T05:14:15.000003-...} + {"priority":165,"version":1,"timestamp":"2003-08-24T05:14:16.000003-...} ... $ cat syslog.txt | jc --syslog-s -p -r {"priority":"165","version":"1","timestamp":"2003-08-24T05:14:15.000...} + {"priority":"165","version":"1","timestamp":"2003-08-24T05:15:15.000...} ... diff --git a/jc/parsers/cef.py b/jc/parsers/cef.py index de7c3f53..ceedf0ab 100644 --- a/jc/parsers/cef.py +++ b/jc/parsers/cef.py @@ -11,6 +11,9 @@ Extended fields, as defined in the CEF specification, are relabeled and the values are converted to their respective types. Extra naive and UTC epoch timestamps are added where appropriate per the CEF specification. +A warning message to `STDERR` will be printed if an unparsable line is found +unless `--quiet` or `quiet=True` is used. + To preserve escaping and original keynames and to prevent type conversions use the `--raw` CLI option or `raw=True` param in the `parse()` function. @@ -35,15 +38,17 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors- "deviceProduct": string, "deviceVersion": string, "deviceEventClassId": string, + "deviceEventClassIdNum": integer/null, "name": string, "agentSeverity": string/integer, "agentSeverityString": string, - "agentSeverityNum": integer, + "agentSeverityNum": integer/null, "CEFVersion": integer, string/integer/float, # [0] - "_epoch": integer, # [1] - "_epoch_utc": integer, # [2] - string + "_epoch": integer/null, # [1] + "_epoch_utc": integer/null, # [2] + string, + "unparsable": string # [3] } ] @@ -54,6 +59,8 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors- [2] Timezone-aware calculated epoch timestamp. (UTC only) This value will be null if a UTC timezone cannot be extracted from the original timestamp string value. + [3] this field exists if the CEF line is not parsable. The value + is the original syslog line. Examples: @@ -357,15 +364,21 @@ def _process(proc_data: List[Dict]) -> List[Dict]: if key in int_list: item[key] = jc.utils.convert_to_int(item[key]) - # set SeverityString and SeverityNum: + # set agentSeverityString and agentSeverityNum: if 'agentSeverity' in item: - if isinstance(item['agentSeverity'], str) and item['agentSeverity'].lower() in severity_set: + if item['agentSeverity'].lower() in severity_set: item['agentSeverityString'] = item['agentSeverity'] item['agentSeverityNum'] = None else: - item['agentSeverity'] = int(item['agentSeverity']) - item['agentSeverityString'] = severity_map[item['agentSeverity']] - item['agentSeverityNum'] = item['agentSeverity'] + try: + item['agentSeverityString'] = severity_map[int(item['agentSeverity'])] + item['agentSeverityNum'] = int(item['agentSeverity']) + except Exception: + pass + + # set deviceEventClassIdNum: + if 'deviceEventClassId' in item: + item['deviceEventClassIdNum'] = jc.utils.convert_to_int(item['deviceEventClassId']) return proc_data @@ -395,6 +408,13 @@ def parse( if jc.utils.has_data(data): for line in filter(None, data.splitlines()): - raw_output.append(_pycef_parse(line)) + try: + raw_output.append(_pycef_parse(line)) + except Exception: + if not quiet: + jc.utils.warning_message( + [f'Unparsable CEF line found: {line}'] + ) + raw_output.append({"unparsable": line}) return raw_output if raw else _process(raw_output) diff --git a/jc/parsers/syslog_bsd_s.py b/jc/parsers/syslog_bsd_s.py index c50e2f98..838953c0 100644 --- a/jc/parsers/syslog_bsd_s.py +++ b/jc/parsers/syslog_bsd_s.py @@ -44,10 +44,12 @@ Examples: $ cat syslog.txt | jc --syslog-bsd-s -p {"priority":34,"date":"Oct 11 22:14:15","hostname":"mymachine","t...} + {"priority":34,"date":"Oct 11 22:14:16","hostname":"mymachine","t...} ... $ cat syslog.txt | jc --syslog-bsd-s -p -r {"priority":"34","date":"Oct 11 22:14:15","hostname":"mymachine","...} + {"priority":"34","date":"Oct 11 22:14:16","hostname":"mymachine","...} ... """ from typing import Dict, Iterable, Union diff --git a/jc/parsers/syslog_s.py b/jc/parsers/syslog_s.py index 3524f7b9..aa790b03 100644 --- a/jc/parsers/syslog_s.py +++ b/jc/parsers/syslog_s.py @@ -67,10 +67,12 @@ Examples: $ cat syslog.txt | jc --syslog-s -p {"priority":165,"version":1,"timestamp":"2003-08-24T05:14:15.000003-...} + {"priority":165,"version":1,"timestamp":"2003-08-24T05:14:16.000003-...} ... $ cat syslog.txt | jc --syslog-s -p -r {"priority":"165","version":"1","timestamp":"2003-08-24T05:14:15.000...} + {"priority":"165","version":"1","timestamp":"2003-08-24T05:15:15.000...} ... """ from typing import List, Dict, Iterable, Union, Optional