diff --git a/README.md b/README.md index 38e65290..e4b0f32d 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ option. | ` --asciitable-m` | multi-line ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable_m) | | ` --blkid` | `blkid` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/blkid) | | ` --cef` | CEF string parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef) | +| ` --cef-s` | CEF string streaming parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef_s) | | ` --chage` | `chage --list` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/chage) | | ` --cksum` | `cksum` and `sum` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cksum) | | ` --crontab` | `crontab` command and file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab) | diff --git a/completions/jc_bash_completion.sh b/completions/jc_bash_completion.sh index 4431b3f8..8b2901d6 100644 --- a/completions/jc_bash_completion.sh +++ b/completions/jc_bash_completion.sh @@ -4,7 +4,7 @@ _jc() jc_about_options jc_about_mod_options jc_help_options jc_special_options jc_commands=(acpi airport arp blkid chage cksum crontab date df dig dmidecode dpkg du env file finger free git gpg hciconfig id ifconfig iostat iptables iw jobs last lastb ls lsblk lsmod lsof lsusb md5 md5sum mdadm mount mpstat netstat nmcli ntpq pidstat ping ping6 pip pip3 postconf printenv ps route rpm rsync sfdisk sha1sum sha224sum sha256sum sha384sum sha512sum shasum ss stat sum sysctl systemctl systeminfo timedatectl top tracepath tracepath6 traceroute traceroute6 ufw uname update-alternatives upower uptime vdir vmstat w wc who xrandr zipinfo) - jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo) + jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo) jc_options=(--force-color -C --debug -d --monochrome -m --pretty -p --quiet -q --raw -r --time-out -t --unbuffer -u --yaml-out -y) jc_about_options=(--about -a) jc_about_mod_options=(--pretty -p --yaml-out -y --monochrome -m --force-color -C) diff --git a/completions/jc_zsh_completion.sh b/completions/jc_zsh_completion.sh index 206e5afc..d89e4c7c 100644 --- a/completions/jc_zsh_completion.sh +++ b/completions/jc_zsh_completion.sh @@ -95,7 +95,7 @@ _jc() { 'xrandr:run "xrandr" command with magic syntax.' 'zipinfo:run "zipinfo" command with magic syntax.' ) - jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo) + jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo) jc_parsers_describe=( '--acpi:`acpi` command parser' '--airport:`airport -I` command parser' @@ -105,6 +105,7 @@ _jc() { '--asciitable-m:multi-line ASCII and Unicode table parser' '--blkid:`blkid` command parser' '--cef:CEF string parser' + '--cef-s:CEF string streaming parser' '--chage:`chage --list` command parser' '--cksum:`cksum` and `sum` command parser' '--crontab:`crontab` command and file parser' diff --git a/docs/parsers/cef.md b/docs/parsers/cef.md index 04e082ae..c264cc54 100644 --- a/docs/parsers/cef.md +++ b/docs/parsers/cef.md @@ -69,7 +69,7 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors- Examples: - $ cef | jc --cef -p + $ cat cef.log | jc --cef -p [ { "deviceVendor": "Trend Micro", @@ -96,7 +96,7 @@ Examples: } ] - $ cef | jc --cef -p -r + $ cat cef.log | jc --cef -p -r [ { "deviceVendor": "Trend Micro", diff --git a/docs/parsers/cef_s.md b/docs/parsers/cef_s.md new file mode 100644 index 00000000..eb403ca6 --- /dev/null +++ b/docs/parsers/cef_s.md @@ -0,0 +1,124 @@ +[Home](https://kellyjonbrazil.github.io/jc/) + + +# jc.parsers.cef\_s + +jc - JSON Convert CEF string output streaming parser + +> This streaming parser outputs JSON Lines (cli) or returns an Iterable of +> Dictionaries (module) + +This parser conforms to the Microfocus Arcsight CEF specification. + +This parser will accept a single CEF string or multiple CEF string lines. +Any text before "CEF" will be ignored. Syslog and CEF escaped characters +(`\\`, `\\"`, `\\]`, `\\|`, `\\=`, `\\%`, `\\#`, `\\n`, and `\\r`) are +unescaped. + +Extended fields, as defined in the CEF specification, are relabeled +and the values are converted to their respective types. Extra naive and +UTC epoch timestamps are added where appropriate per the CEF specification. + +A warning message to `STDERR` will be printed if an unparsable line is found +unless `--quiet` or `quiet=True` is used. + +To preserve escaping and original keynames and to prevent type conversions +use the `--raw` CLI option or `raw=True` param in the `parse()` function. + +Usage (cli): + + $ echo 'CEF:0|Vendor|Product|3.2.0|1|SYSTEM|1|... | jc --cef-s + +Usage (module): + + import jc + + result = jc.parse('cef_s', cef_command_output.splitlines()) + for item in result: + # do something + +Schema: + +See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm + +> Note: Special characters in key names will be converted to underscores. + + { + "deviceVendor": string, + "deviceProduct": string, + "deviceVersion": string, + "deviceEventClassId": string, + "deviceEventClassIdNum": integer/null, + "name": string, + "agentSeverity": string/integer, + "agentSeverityString": string, + "agentSeverityNum": integer/null, + "CEFVersion": integer, + string/integer/float, # [0] + "_epoch": integer/null, # [1] + "_epoch_utc": integer/null, # [2] + string, + "unparsable": string # [3] + + # below object only exists if using -qq or ignore_exceptions=True + "_jc_meta": { + "success": boolean, # false if error parsing + "error": string, # exists if "success" is false + "line": string # exists if "success" is false + } + } + + [0] Will attempt to convert extended fields to the type specified in the + CEF specification. If conversion fails, then the field will remain + a string. + [1] Naive calculated epoch timestamp + [2] Timezone-aware calculated epoch timestamp. (UTC only) This value + will be null if a UTC timezone cannot be extracted from the original + timestamp string value. + [3] This field exists if the CEF line is not parsable. The value + is the original syslog line. + +Examples: + + $ cat cef.log | jc --cef-s + {"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...} + {"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...} + ... + + $ cat cef.log | jc --cef-s -r + {"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...} + {"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...} + ... + + + +### parse + +```python +@add_jc_meta +def parse(data: Iterable[str], + raw: bool = False, + quiet: bool = False, + ignore_exceptions: bool = False) -> Union[Iterable[Dict], tuple] +``` + +Main text parsing generator function. Returns an iterable object. + +Parameters: + + data: (iterable) line-based text data to parse + (e.g. sys.stdin or str.splitlines()) + + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + ignore_exceptions: (boolean) ignore parsing exceptions if True + + +Returns: + + Iterable of Dictionaries + +### Parser Information +Compatibility: linux, darwin, cygwin, win32, aix, freebsd + +Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com) diff --git a/jc/lib.py b/jc/lib.py index 92026dbf..2675a43f 100644 --- a/jc/lib.py +++ b/jc/lib.py @@ -17,6 +17,7 @@ parsers = [ 'asciitable-m', 'blkid', 'cef', + 'cef-s', 'chage', 'cksum', 'crontab', diff --git a/jc/parsers/cef.py b/jc/parsers/cef.py index 300bf0ac..44d4f1c5 100644 --- a/jc/parsers/cef.py +++ b/jc/parsers/cef.py @@ -64,7 +64,7 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors- Examples: - $ cef | jc --cef -p + $ cat cef.log | jc --cef -p [ { "deviceVendor": "Trend Micro", @@ -91,7 +91,7 @@ Examples: } ] - $ cef | jc --cef -p -r + $ cat cef.log | jc --cef -p -r [ { "deviceVendor": "Trend Micro", diff --git a/jc/parsers/cef_s.py b/jc/parsers/cef_s.py new file mode 100644 index 00000000..88826689 --- /dev/null +++ b/jc/parsers/cef_s.py @@ -0,0 +1,320 @@ +"""jc - JSON Convert CEF string output streaming parser + +> This streaming parser outputs JSON Lines (cli) or returns an Iterable of +> Dictionaries (module) + +This parser conforms to the Microfocus Arcsight CEF specification. + +This parser will accept a single CEF string or multiple CEF string lines. +Any text before "CEF" will be ignored. Syslog and CEF escaped characters +(`\\`, `\\"`, `\\]`, `\\|`, `\\=`, `\\%`, `\\#`, `\\n`, and `\\r`) are +unescaped. + +Extended fields, as defined in the CEF specification, are relabeled +and the values are converted to their respective types. Extra naive and +UTC epoch timestamps are added where appropriate per the CEF specification. + +A warning message to `STDERR` will be printed if an unparsable line is found +unless `--quiet` or `quiet=True` is used. + +To preserve escaping and original keynames and to prevent type conversions +use the `--raw` CLI option or `raw=True` param in the `parse()` function. + +Usage (cli): + + $ echo 'CEF:0|Vendor|Product|3.2.0|1|SYSTEM|1|... | jc --cef-s + +Usage (module): + + import jc + + result = jc.parse('cef_s', cef_command_output.splitlines()) + for item in result: + # do something + +Schema: + +See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm + +> Note: Special characters in key names will be converted to underscores. + + { + "deviceVendor": string, + "deviceProduct": string, + "deviceVersion": string, + "deviceEventClassId": string, + "deviceEventClassIdNum": integer/null, + "name": string, + "agentSeverity": string/integer, + "agentSeverityString": string, + "agentSeverityNum": integer/null, + "CEFVersion": integer, + string/integer/float, # [0] + "_epoch": integer/null, # [1] + "_epoch_utc": integer/null, # [2] + string, + "unparsable": string # [3] + + # below object only exists if using -qq or ignore_exceptions=True + "_jc_meta": { + "success": boolean, # false if error parsing + "error": string, # exists if "success" is false + "line": string # exists if "success" is false + } + } + + [0] Will attempt to convert extended fields to the type specified in the + CEF specification. If conversion fails, then the field will remain + a string. + [1] Naive calculated epoch timestamp + [2] Timezone-aware calculated epoch timestamp. (UTC only) This value + will be null if a UTC timezone cannot be extracted from the original + timestamp string value. + [3] This field exists if the CEF line is not parsable. The value + is the original syslog line. + +Examples: + + $ cat cef.log | jc --cef-s + {"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...} + {"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...} + ... + + $ cat cef.log | jc --cef-s -r + {"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...} + {"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...} + ... +""" +from typing import Dict, Iterable, Union +import re +import jc.utils +from jc.streaming import ( + add_jc_meta, streaming_input_type_check, streaming_line_input_type_check, raise_or_yield +) +from jc.exceptions import ParseError +from jc.parsers.cef import _pycef_parse + + +class info(): + """Provides parser metadata (version, author, etc.)""" + version = '1.0' + description = 'CEF string streaming parser' + author = 'Kelly Brazil' + author_email = 'kellyjonbrazil@gmail.com' + compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd'] + streaming = True + + +__version__ = info.version + + +def _process(proc_data: Dict) -> Dict: + """ + Final processing to conform to the schema. + + Parameters: + + proc_data: (Dictionary) raw structured data to process + + Returns: + + Dictionary. Structured data to conform to the schema. + """ + # fix escape chars specified in syslog RFC 5424 and CEF spec + # https://www.rfc-editor.org/rfc/rfc5424.html#section-6 + # https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm?tocpath=_____2#_Toc494359738 + escape_map = { + r'\\': '\\', + r'\"': '"', + r'\]': ']', + r'\|': '|', + r'\=': '=', + r'\%': '%', + r'\#': '#', + r'\n': '\n', + r'\r': '\r' + } + + int_list = {'CEFVersion'} + + severity_map = { + None: 'Unknown', + 0: 'Low', + 1: 'Low', + 2: 'Low', + 3: 'Low', + 4: 'Medium', + 5: 'Medium', + 6: 'Medium', + 7: 'High', + 8: 'High', + 9: 'Very-High', + 10: 'Very-High' + } + + severity_set = {'unknown', 'low', 'medium', 'high', 'very-high'} + + # set defined types for extended fields + # see https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/#CEF/Chapter%202%20ArcSight%20Extension.htm + extended_ints = { + 'spid', 'customerKey', 'deviceTranslatedZoneKey', 'oldFileSize', + 'destinationTranslatedPort', 'cn3', 'sourceTranslatedPort', 'in', 'fsize', 'slat', + 'dpid', 'cnt', 'agentZoneKey', 'out', 'type', 'eventId', 'dlong', 'cn2', + 'deviceDirection', 'spt', 'agentTranslatedZoneKey', 'sTranslatedZoneKey', 'cn1', + 'slong', 'dZoneKey', 'deviceZoneKey', 'dvcpid', 'dpt', 'dTranslatedZoneKey', 'dlat', + 'sZoneKey' + } + + extended_floats = { + 'cfp1', 'cfp2', 'cfp3', 'cfp4' + } + + extended_dt = { + 'deviceCustomDate1', 'deviceCustomDate2', 'end', 'fileCreateTime', + 'fileModificationTime', 'flexDate1', 'oldFileCreateTime', 'oldFileModificationTime', + 'rt', 'start', 'art' + } + + for key, value in proc_data.copy().items(): + if key in extended_ints: + try: + proc_data[key] = int(value) + except Exception: + pass + + if key in extended_floats: + try: + proc_data[key] = float(value) + except Exception: + pass + + if key in extended_dt: + if re.match(r'\d{10,13}', proc_data[key]): + proc_data[key + '_epoch'] = int(proc_data[key][:10]) + proc_data[key + '_epoch_utc'] = None + else: + formats = (1400, 1410, 1420, 1430) + dt = jc.utils.timestamp(proc_data[key], formats) + proc_data[key + '_epoch'] = dt.naive + proc_data[key + '_epoch_utc'] = dt.utc + + # Process custom field labels (adapted from pycef library) + cleanup_list = [] + custom_fields = list(proc_data.keys()) + for key in custom_fields: + if key.endswith('Label'): + customlabel = key[:-5] + for customfield in custom_fields: + new_name = proc_data[key] + # check for normal custom fields + if customfield == customlabel: + proc_data[new_name] = proc_data[customfield] + cleanup_list.append(customfield) + cleanup_list.append(key) + + # check for datetime objects + if customfield == customlabel + '_epoch': + proc_data[new_name + '_epoch'] = proc_data[customfield] + cleanup_list.append(customfield) + + if customfield == customlabel + '_epoch_utc': + proc_data[new_name + '_epoch_utc'] = proc_data[customfield] + cleanup_list.append(customfield) + + # cleanup extra custom fields + for key in cleanup_list: + del proc_data[key] + + # more normalization + for key, value in proc_data.copy().items(): + if isinstance(proc_data[key], str): + # remove any spaces around values + proc_data[key] = value.strip() + + # fixup escaped characters + for esc, esc_sub in escape_map.items(): + proc_data[key] = proc_data[key].replace(esc, esc_sub) + + # normalize keynames + new_key = key.strip() + new_key = re.sub(r'[^a-zA-Z0-9]', '_', new_key) + new_key = new_key.strip('_') + proc_data[new_key] = proc_data.pop(key) + + # integer conversions + if key in int_list: + proc_data[key] = jc.utils.convert_to_int(proc_data[key]) + + # set agentSeverityString and agentSeverityNum: + if 'agentSeverity' in proc_data: + if proc_data['agentSeverity'].lower() in severity_set: + proc_data['agentSeverityString'] = proc_data['agentSeverity'] + proc_data['agentSeverityNum'] = None + else: + try: + proc_data['agentSeverityString'] = severity_map[int(proc_data['agentSeverity'])] + proc_data['agentSeverityNum'] = int(proc_data['agentSeverity']) + except Exception: + pass + + # set deviceEventClassIdNum: + if 'deviceEventClassId' in proc_data: + proc_data['deviceEventClassIdNum'] = jc.utils.convert_to_int(proc_data['deviceEventClassId']) + + return proc_data + + +@add_jc_meta +def parse( + data: Iterable[str], + raw: bool = False, + quiet: bool = False, + ignore_exceptions: bool = False +) -> Union[Iterable[Dict], tuple]: + """ + Main text parsing generator function. Returns an iterable object. + + Parameters: + + data: (iterable) line-based text data to parse + (e.g. sys.stdin or str.splitlines()) + + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + ignore_exceptions: (boolean) ignore parsing exceptions if True + + + Returns: + + Iterable of Dictionaries + """ + jc.utils.compatibility(__name__, info.compatible, quiet) + streaming_input_type_check(data) + + for line in data: + try: + streaming_line_input_type_check(line) + output_line: Dict = {} + + #skip blank lines + if not line.strip(): + continue + + try: + output_line = _pycef_parse(line) + + except Exception: + output_line = { + 'unparsable': line.rstrip() + } + + if not quiet: + jc.utils.warning_message( + [f'Unparsable line found: {line.rstrip()}'] + ) + + if output_line: + yield output_line if raw else _process(output_line) + + except Exception as e: + yield raise_or_yield(ignore_exceptions, e, line) diff --git a/man/jc.1 b/man/jc.1 index f101dc07..002d11a0 100644 --- a/man/jc.1 +++ b/man/jc.1 @@ -57,6 +57,11 @@ multi-line ASCII and Unicode table parser \fB--cef\fP CEF string parser +.TP +.B +\fB--cef-s\fP +CEF string streaming parser + .TP .B \fB--chage\fP