mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-17 00:07:37 +02:00
add cef streaming parser. doc updates
This commit is contained in:
@ -158,6 +158,7 @@ option.
|
||||
| ` --asciitable-m` | multi-line ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable_m) |
|
||||
| ` --blkid` | `blkid` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/blkid) |
|
||||
| ` --cef` | CEF string parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef) |
|
||||
| ` --cef-s` | CEF string streaming parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef_s) |
|
||||
| ` --chage` | `chage --list` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/chage) |
|
||||
| ` --cksum` | `cksum` and `sum` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cksum) |
|
||||
| ` --crontab` | `crontab` command and file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab) |
|
||||
|
@ -4,7 +4,7 @@ _jc()
|
||||
jc_about_options jc_about_mod_options jc_help_options jc_special_options
|
||||
|
||||
jc_commands=(acpi airport arp blkid chage cksum crontab date df dig dmidecode dpkg du env file finger free git gpg hciconfig id ifconfig iostat iptables iw jobs last lastb ls lsblk lsmod lsof lsusb md5 md5sum mdadm mount mpstat netstat nmcli ntpq pidstat ping ping6 pip pip3 postconf printenv ps route rpm rsync sfdisk sha1sum sha224sum sha256sum sha384sum sha512sum shasum ss stat sum sysctl systemctl systeminfo timedatectl top tracepath tracepath6 traceroute traceroute6 ufw uname update-alternatives upower uptime vdir vmstat w wc who xrandr zipinfo)
|
||||
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
||||
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
||||
jc_options=(--force-color -C --debug -d --monochrome -m --pretty -p --quiet -q --raw -r --time-out -t --unbuffer -u --yaml-out -y)
|
||||
jc_about_options=(--about -a)
|
||||
jc_about_mod_options=(--pretty -p --yaml-out -y --monochrome -m --force-color -C)
|
||||
|
@ -95,7 +95,7 @@ _jc() {
|
||||
'xrandr:run "xrandr" command with magic syntax.'
|
||||
'zipinfo:run "zipinfo" command with magic syntax.'
|
||||
)
|
||||
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
||||
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --crontab --crontab-u --csv --csv-s --date --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --finger --free --fstab --git-log --git-log-s --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iso-datetime --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --passwd --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --ps --route --rpm-qi --rsync --rsync-s --sfdisk --shadow --ss --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
||||
jc_parsers_describe=(
|
||||
'--acpi:`acpi` command parser'
|
||||
'--airport:`airport -I` command parser'
|
||||
@ -105,6 +105,7 @@ _jc() {
|
||||
'--asciitable-m:multi-line ASCII and Unicode table parser'
|
||||
'--blkid:`blkid` command parser'
|
||||
'--cef:CEF string parser'
|
||||
'--cef-s:CEF string streaming parser'
|
||||
'--chage:`chage --list` command parser'
|
||||
'--cksum:`cksum` and `sum` command parser'
|
||||
'--crontab:`crontab` command and file parser'
|
||||
|
@ -69,7 +69,7 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-
|
||||
|
||||
Examples:
|
||||
|
||||
$ cef | jc --cef -p
|
||||
$ cat cef.log | jc --cef -p
|
||||
[
|
||||
{
|
||||
"deviceVendor": "Trend Micro",
|
||||
@ -96,7 +96,7 @@ Examples:
|
||||
}
|
||||
]
|
||||
|
||||
$ cef | jc --cef -p -r
|
||||
$ cat cef.log | jc --cef -p -r
|
||||
[
|
||||
{
|
||||
"deviceVendor": "Trend Micro",
|
||||
|
124
docs/parsers/cef_s.md
Normal file
124
docs/parsers/cef_s.md
Normal file
@ -0,0 +1,124 @@
|
||||
[Home](https://kellyjonbrazil.github.io/jc/)
|
||||
<a id="jc.parsers.cef_s"></a>
|
||||
|
||||
# jc.parsers.cef\_s
|
||||
|
||||
jc - JSON Convert CEF string output streaming parser
|
||||
|
||||
> This streaming parser outputs JSON Lines (cli) or returns an Iterable of
|
||||
> Dictionaries (module)
|
||||
|
||||
This parser conforms to the Microfocus Arcsight CEF specification.
|
||||
|
||||
This parser will accept a single CEF string or multiple CEF string lines.
|
||||
Any text before "CEF" will be ignored. Syslog and CEF escaped characters
|
||||
(`\\`, `\\"`, `\\]`, `\\|`, `\\=`, `\\%`, `\\#`, `\\n`, and `\\r`) are
|
||||
unescaped.
|
||||
|
||||
Extended fields, as defined in the CEF specification, are relabeled
|
||||
and the values are converted to their respective types. Extra naive and
|
||||
UTC epoch timestamps are added where appropriate per the CEF specification.
|
||||
|
||||
A warning message to `STDERR` will be printed if an unparsable line is found
|
||||
unless `--quiet` or `quiet=True` is used.
|
||||
|
||||
To preserve escaping and original keynames and to prevent type conversions
|
||||
use the `--raw` CLI option or `raw=True` param in the `parse()` function.
|
||||
|
||||
Usage (cli):
|
||||
|
||||
$ echo 'CEF:0|Vendor|Product|3.2.0|1|SYSTEM|1|... | jc --cef-s
|
||||
|
||||
Usage (module):
|
||||
|
||||
import jc
|
||||
|
||||
result = jc.parse('cef_s', cef_command_output.splitlines())
|
||||
for item in result:
|
||||
# do something
|
||||
|
||||
Schema:
|
||||
|
||||
See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm
|
||||
|
||||
> Note: Special characters in key names will be converted to underscores.
|
||||
|
||||
{
|
||||
"deviceVendor": string,
|
||||
"deviceProduct": string,
|
||||
"deviceVersion": string,
|
||||
"deviceEventClassId": string,
|
||||
"deviceEventClassIdNum": integer/null,
|
||||
"name": string,
|
||||
"agentSeverity": string/integer,
|
||||
"agentSeverityString": string,
|
||||
"agentSeverityNum": integer/null,
|
||||
"CEFVersion": integer,
|
||||
<extended fields> string/integer/float, # [0]
|
||||
<extended fields>"_epoch": integer/null, # [1]
|
||||
<extended fields>"_epoch_utc": integer/null, # [2]
|
||||
<custom fields> string,
|
||||
"unparsable": string # [3]
|
||||
|
||||
# below object only exists if using -qq or ignore_exceptions=True
|
||||
"_jc_meta": {
|
||||
"success": boolean, # false if error parsing
|
||||
"error": string, # exists if "success" is false
|
||||
"line": string # exists if "success" is false
|
||||
}
|
||||
}
|
||||
|
||||
[0] Will attempt to convert extended fields to the type specified in the
|
||||
CEF specification. If conversion fails, then the field will remain
|
||||
a string.
|
||||
[1] Naive calculated epoch timestamp
|
||||
[2] Timezone-aware calculated epoch timestamp. (UTC only) This value
|
||||
will be null if a UTC timezone cannot be extracted from the original
|
||||
timestamp string value.
|
||||
[3] This field exists if the CEF line is not parsable. The value
|
||||
is the original syslog line.
|
||||
|
||||
Examples:
|
||||
|
||||
$ cat cef.log | jc --cef-s
|
||||
{"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...}
|
||||
{"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...}
|
||||
...
|
||||
|
||||
$ cat cef.log | jc --cef-s -r
|
||||
{"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...}
|
||||
{"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...}
|
||||
...
|
||||
|
||||
<a id="jc.parsers.cef_s.parse"></a>
|
||||
|
||||
### parse
|
||||
|
||||
```python
|
||||
@add_jc_meta
|
||||
def parse(data: Iterable[str],
|
||||
raw: bool = False,
|
||||
quiet: bool = False,
|
||||
ignore_exceptions: bool = False) -> Union[Iterable[Dict], tuple]
|
||||
```
|
||||
|
||||
Main text parsing generator function. Returns an iterable object.
|
||||
|
||||
Parameters:
|
||||
|
||||
data: (iterable) line-based text data to parse
|
||||
(e.g. sys.stdin or str.splitlines())
|
||||
|
||||
raw: (boolean) unprocessed output if True
|
||||
quiet: (boolean) suppress warning messages if True
|
||||
ignore_exceptions: (boolean) ignore parsing exceptions if True
|
||||
|
||||
|
||||
Returns:
|
||||
|
||||
Iterable of Dictionaries
|
||||
|
||||
### Parser Information
|
||||
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
|
||||
|
||||
Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com)
|
@ -17,6 +17,7 @@ parsers = [
|
||||
'asciitable-m',
|
||||
'blkid',
|
||||
'cef',
|
||||
'cef-s',
|
||||
'chage',
|
||||
'cksum',
|
||||
'crontab',
|
||||
|
@ -64,7 +64,7 @@ See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-
|
||||
|
||||
Examples:
|
||||
|
||||
$ cef | jc --cef -p
|
||||
$ cat cef.log | jc --cef -p
|
||||
[
|
||||
{
|
||||
"deviceVendor": "Trend Micro",
|
||||
@ -91,7 +91,7 @@ Examples:
|
||||
}
|
||||
]
|
||||
|
||||
$ cef | jc --cef -p -r
|
||||
$ cat cef.log | jc --cef -p -r
|
||||
[
|
||||
{
|
||||
"deviceVendor": "Trend Micro",
|
||||
|
320
jc/parsers/cef_s.py
Normal file
320
jc/parsers/cef_s.py
Normal file
@ -0,0 +1,320 @@
|
||||
"""jc - JSON Convert CEF string output streaming parser
|
||||
|
||||
> This streaming parser outputs JSON Lines (cli) or returns an Iterable of
|
||||
> Dictionaries (module)
|
||||
|
||||
This parser conforms to the Microfocus Arcsight CEF specification.
|
||||
|
||||
This parser will accept a single CEF string or multiple CEF string lines.
|
||||
Any text before "CEF" will be ignored. Syslog and CEF escaped characters
|
||||
(`\\`, `\\"`, `\\]`, `\\|`, `\\=`, `\\%`, `\\#`, `\\n`, and `\\r`) are
|
||||
unescaped.
|
||||
|
||||
Extended fields, as defined in the CEF specification, are relabeled
|
||||
and the values are converted to their respective types. Extra naive and
|
||||
UTC epoch timestamps are added where appropriate per the CEF specification.
|
||||
|
||||
A warning message to `STDERR` will be printed if an unparsable line is found
|
||||
unless `--quiet` or `quiet=True` is used.
|
||||
|
||||
To preserve escaping and original keynames and to prevent type conversions
|
||||
use the `--raw` CLI option or `raw=True` param in the `parse()` function.
|
||||
|
||||
Usage (cli):
|
||||
|
||||
$ echo 'CEF:0|Vendor|Product|3.2.0|1|SYSTEM|1|... | jc --cef-s
|
||||
|
||||
Usage (module):
|
||||
|
||||
import jc
|
||||
|
||||
result = jc.parse('cef_s', cef_command_output.splitlines())
|
||||
for item in result:
|
||||
# do something
|
||||
|
||||
Schema:
|
||||
|
||||
See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm
|
||||
|
||||
> Note: Special characters in key names will be converted to underscores.
|
||||
|
||||
{
|
||||
"deviceVendor": string,
|
||||
"deviceProduct": string,
|
||||
"deviceVersion": string,
|
||||
"deviceEventClassId": string,
|
||||
"deviceEventClassIdNum": integer/null,
|
||||
"name": string,
|
||||
"agentSeverity": string/integer,
|
||||
"agentSeverityString": string,
|
||||
"agentSeverityNum": integer/null,
|
||||
"CEFVersion": integer,
|
||||
<extended fields> string/integer/float, # [0]
|
||||
<extended fields>"_epoch": integer/null, # [1]
|
||||
<extended fields>"_epoch_utc": integer/null, # [2]
|
||||
<custom fields> string,
|
||||
"unparsable": string # [3]
|
||||
|
||||
# below object only exists if using -qq or ignore_exceptions=True
|
||||
"_jc_meta": {
|
||||
"success": boolean, # false if error parsing
|
||||
"error": string, # exists if "success" is false
|
||||
"line": string # exists if "success" is false
|
||||
}
|
||||
}
|
||||
|
||||
[0] Will attempt to convert extended fields to the type specified in the
|
||||
CEF specification. If conversion fails, then the field will remain
|
||||
a string.
|
||||
[1] Naive calculated epoch timestamp
|
||||
[2] Timezone-aware calculated epoch timestamp. (UTC only) This value
|
||||
will be null if a UTC timezone cannot be extracted from the original
|
||||
timestamp string value.
|
||||
[3] This field exists if the CEF line is not parsable. The value
|
||||
is the original syslog line.
|
||||
|
||||
Examples:
|
||||
|
||||
$ cat cef.log | jc --cef-s
|
||||
{"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...}
|
||||
{"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...}
|
||||
...
|
||||
|
||||
$ cat cef.log | jc --cef-s -r
|
||||
{"deviceVendor":"Fortinet","deviceProduct":"FortiDeceptor","deviceV...}
|
||||
{"deviceVendor":"Trend Micro","deviceProduct":"Deep Security Agent"...}
|
||||
...
|
||||
"""
|
||||
from typing import Dict, Iterable, Union
|
||||
import re
|
||||
import jc.utils
|
||||
from jc.streaming import (
|
||||
add_jc_meta, streaming_input_type_check, streaming_line_input_type_check, raise_or_yield
|
||||
)
|
||||
from jc.exceptions import ParseError
|
||||
from jc.parsers.cef import _pycef_parse
|
||||
|
||||
|
||||
class info():
|
||||
"""Provides parser metadata (version, author, etc.)"""
|
||||
version = '1.0'
|
||||
description = 'CEF string streaming parser'
|
||||
author = 'Kelly Brazil'
|
||||
author_email = 'kellyjonbrazil@gmail.com'
|
||||
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
|
||||
streaming = True
|
||||
|
||||
|
||||
__version__ = info.version
|
||||
|
||||
|
||||
def _process(proc_data: Dict) -> Dict:
|
||||
"""
|
||||
Final processing to conform to the schema.
|
||||
|
||||
Parameters:
|
||||
|
||||
proc_data: (Dictionary) raw structured data to process
|
||||
|
||||
Returns:
|
||||
|
||||
Dictionary. Structured data to conform to the schema.
|
||||
"""
|
||||
# fix escape chars specified in syslog RFC 5424 and CEF spec
|
||||
# https://www.rfc-editor.org/rfc/rfc5424.html#section-6
|
||||
# https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm?tocpath=_____2#_Toc494359738
|
||||
escape_map = {
|
||||
r'\\': '\\',
|
||||
r'\"': '"',
|
||||
r'\]': ']',
|
||||
r'\|': '|',
|
||||
r'\=': '=',
|
||||
r'\%': '%',
|
||||
r'\#': '#',
|
||||
r'\n': '\n',
|
||||
r'\r': '\r'
|
||||
}
|
||||
|
||||
int_list = {'CEFVersion'}
|
||||
|
||||
severity_map = {
|
||||
None: 'Unknown',
|
||||
0: 'Low',
|
||||
1: 'Low',
|
||||
2: 'Low',
|
||||
3: 'Low',
|
||||
4: 'Medium',
|
||||
5: 'Medium',
|
||||
6: 'Medium',
|
||||
7: 'High',
|
||||
8: 'High',
|
||||
9: 'Very-High',
|
||||
10: 'Very-High'
|
||||
}
|
||||
|
||||
severity_set = {'unknown', 'low', 'medium', 'high', 'very-high'}
|
||||
|
||||
# set defined types for extended fields
|
||||
# see https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/#CEF/Chapter%202%20ArcSight%20Extension.htm
|
||||
extended_ints = {
|
||||
'spid', 'customerKey', 'deviceTranslatedZoneKey', 'oldFileSize',
|
||||
'destinationTranslatedPort', 'cn3', 'sourceTranslatedPort', 'in', 'fsize', 'slat',
|
||||
'dpid', 'cnt', 'agentZoneKey', 'out', 'type', 'eventId', 'dlong', 'cn2',
|
||||
'deviceDirection', 'spt', 'agentTranslatedZoneKey', 'sTranslatedZoneKey', 'cn1',
|
||||
'slong', 'dZoneKey', 'deviceZoneKey', 'dvcpid', 'dpt', 'dTranslatedZoneKey', 'dlat',
|
||||
'sZoneKey'
|
||||
}
|
||||
|
||||
extended_floats = {
|
||||
'cfp1', 'cfp2', 'cfp3', 'cfp4'
|
||||
}
|
||||
|
||||
extended_dt = {
|
||||
'deviceCustomDate1', 'deviceCustomDate2', 'end', 'fileCreateTime',
|
||||
'fileModificationTime', 'flexDate1', 'oldFileCreateTime', 'oldFileModificationTime',
|
||||
'rt', 'start', 'art'
|
||||
}
|
||||
|
||||
for key, value in proc_data.copy().items():
|
||||
if key in extended_ints:
|
||||
try:
|
||||
proc_data[key] = int(value)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if key in extended_floats:
|
||||
try:
|
||||
proc_data[key] = float(value)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if key in extended_dt:
|
||||
if re.match(r'\d{10,13}', proc_data[key]):
|
||||
proc_data[key + '_epoch'] = int(proc_data[key][:10])
|
||||
proc_data[key + '_epoch_utc'] = None
|
||||
else:
|
||||
formats = (1400, 1410, 1420, 1430)
|
||||
dt = jc.utils.timestamp(proc_data[key], formats)
|
||||
proc_data[key + '_epoch'] = dt.naive
|
||||
proc_data[key + '_epoch_utc'] = dt.utc
|
||||
|
||||
# Process custom field labels (adapted from pycef library)
|
||||
cleanup_list = []
|
||||
custom_fields = list(proc_data.keys())
|
||||
for key in custom_fields:
|
||||
if key.endswith('Label'):
|
||||
customlabel = key[:-5]
|
||||
for customfield in custom_fields:
|
||||
new_name = proc_data[key]
|
||||
# check for normal custom fields
|
||||
if customfield == customlabel:
|
||||
proc_data[new_name] = proc_data[customfield]
|
||||
cleanup_list.append(customfield)
|
||||
cleanup_list.append(key)
|
||||
|
||||
# check for datetime objects
|
||||
if customfield == customlabel + '_epoch':
|
||||
proc_data[new_name + '_epoch'] = proc_data[customfield]
|
||||
cleanup_list.append(customfield)
|
||||
|
||||
if customfield == customlabel + '_epoch_utc':
|
||||
proc_data[new_name + '_epoch_utc'] = proc_data[customfield]
|
||||
cleanup_list.append(customfield)
|
||||
|
||||
# cleanup extra custom fields
|
||||
for key in cleanup_list:
|
||||
del proc_data[key]
|
||||
|
||||
# more normalization
|
||||
for key, value in proc_data.copy().items():
|
||||
if isinstance(proc_data[key], str):
|
||||
# remove any spaces around values
|
||||
proc_data[key] = value.strip()
|
||||
|
||||
# fixup escaped characters
|
||||
for esc, esc_sub in escape_map.items():
|
||||
proc_data[key] = proc_data[key].replace(esc, esc_sub)
|
||||
|
||||
# normalize keynames
|
||||
new_key = key.strip()
|
||||
new_key = re.sub(r'[^a-zA-Z0-9]', '_', new_key)
|
||||
new_key = new_key.strip('_')
|
||||
proc_data[new_key] = proc_data.pop(key)
|
||||
|
||||
# integer conversions
|
||||
if key in int_list:
|
||||
proc_data[key] = jc.utils.convert_to_int(proc_data[key])
|
||||
|
||||
# set agentSeverityString and agentSeverityNum:
|
||||
if 'agentSeverity' in proc_data:
|
||||
if proc_data['agentSeverity'].lower() in severity_set:
|
||||
proc_data['agentSeverityString'] = proc_data['agentSeverity']
|
||||
proc_data['agentSeverityNum'] = None
|
||||
else:
|
||||
try:
|
||||
proc_data['agentSeverityString'] = severity_map[int(proc_data['agentSeverity'])]
|
||||
proc_data['agentSeverityNum'] = int(proc_data['agentSeverity'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# set deviceEventClassIdNum:
|
||||
if 'deviceEventClassId' in proc_data:
|
||||
proc_data['deviceEventClassIdNum'] = jc.utils.convert_to_int(proc_data['deviceEventClassId'])
|
||||
|
||||
return proc_data
|
||||
|
||||
|
||||
@add_jc_meta
|
||||
def parse(
|
||||
data: Iterable[str],
|
||||
raw: bool = False,
|
||||
quiet: bool = False,
|
||||
ignore_exceptions: bool = False
|
||||
) -> Union[Iterable[Dict], tuple]:
|
||||
"""
|
||||
Main text parsing generator function. Returns an iterable object.
|
||||
|
||||
Parameters:
|
||||
|
||||
data: (iterable) line-based text data to parse
|
||||
(e.g. sys.stdin or str.splitlines())
|
||||
|
||||
raw: (boolean) unprocessed output if True
|
||||
quiet: (boolean) suppress warning messages if True
|
||||
ignore_exceptions: (boolean) ignore parsing exceptions if True
|
||||
|
||||
|
||||
Returns:
|
||||
|
||||
Iterable of Dictionaries
|
||||
"""
|
||||
jc.utils.compatibility(__name__, info.compatible, quiet)
|
||||
streaming_input_type_check(data)
|
||||
|
||||
for line in data:
|
||||
try:
|
||||
streaming_line_input_type_check(line)
|
||||
output_line: Dict = {}
|
||||
|
||||
#skip blank lines
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
try:
|
||||
output_line = _pycef_parse(line)
|
||||
|
||||
except Exception:
|
||||
output_line = {
|
||||
'unparsable': line.rstrip()
|
||||
}
|
||||
|
||||
if not quiet:
|
||||
jc.utils.warning_message(
|
||||
[f'Unparsable line found: {line.rstrip()}']
|
||||
)
|
||||
|
||||
if output_line:
|
||||
yield output_line if raw else _process(output_line)
|
||||
|
||||
except Exception as e:
|
||||
yield raise_or_yield(ignore_exceptions, e, line)
|
Reference in New Issue
Block a user