From 07c65351d555fc2bebe515bb20a91ba58b25d0ad Mon Sep 17 00:00:00 2001
From: Kelly Brazil <kellyjonbrazil@gmail.com>
Date: Wed, 17 Aug 2022 15:20:18 -0700
Subject: [PATCH] rename keys to match spec. attempt type conversions for
 extended fields.

---
 jc/parsers/cef.py | 106 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 91 insertions(+), 15 deletions(-)
diff --git a/jc/parsers/cef.py b/jc/parsers/cef.py
index c63306a7..497f67ca 100644
--- a/jc/parsers/cef.py
+++ b/jc/parsers/cef.py
@@ -21,14 +21,28 @@ Usage (module):
 
 Schema:
 
+See: https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm
+
     [
       {
-        "cef":     string,
-        "bar":     boolean,
-        "baz":     integer
+        "deviceVendor":                   string,
+        "deviceProduct":                  string,
+        "deviceVersion":                  string,
+        "deviceEventClassId":             string,
+        "name":                           string,
+        "agentSeverity":                  string/integer,
+        "agentSeverityString":            string,
+        "agentSeverityNum":               integer,
+        "CEF_Version":                    integer,
+        <extended fields>                 string/integer/float,  # [0]
+        <custom fields>                   string
       }
     ]
 
+    [0] Will attempt to convert extended fields to the type specified in the
+        CEF specification. If conversion fails, then the field will remain
+        a string.
+
 Examples:
 
     $ cef | jc --cef -p
@@ -114,13 +128,13 @@ def _pycef_parse(str_input):
 
         # Since these values are set by their position in the header, it's
         # easy to know which is which.
-        values["DeviceVendor"] = spl[1]
-        values["DeviceProduct"] = spl[2]
-        values["DeviceVersion"] = spl[3]
-        values["DeviceEventClassID"] = spl[4]
-        values["Name"] = spl[5]
+        values["deviceVendor"] = spl[1]
+        values["deviceProduct"] = spl[2]
+        values["deviceVersion"] = spl[3]
+        values["deviceEventClassId"] = spl[4]
+        values["name"] = spl[5]
         if len(spl) > 6:
-            values["Severity"] = spl[6]
+            values["agentSeverity"] = spl[6]
 
         # The first value is actually the CEF version, formatted like
         # "CEF:#".  Ignore anything before that (like a date from a syslog message).
@@ -130,7 +144,7 @@ def _pycef_parse(str_input):
         if cef_start == -1:
             raise ParseError('Invalid CEF string.')
         (cef, version) = spl[0][cef_start:].split(':')
-        values["CEFVersion"] = version
+        values["CEF_Version"] = version
 
         # The ugly, gnarly regex here finds a single key=value pair,
         # taking into account multiple whitespaces, escaped '=' and '|'
@@ -141,6 +155,34 @@ def _pycef_parse(str_input):
             # Split the tuples and put them into the dictionary
             values[i[0]] = i[1]
 
+        # set defined types for extended fields
+        # see https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/#CEF/Chapter%202%20ArcSight%20Extension.htm
+        extended_ints = {
+            'spid', 'customerKey', 'deviceTranslatedZoneKey', 'oldFileSize',
+            'destination TranslatedPort', 'cn3', 'source TranslatedPort', 'in', 'fsize', 'slat',
+            'dpid', 'cnt', 'agentZoneKey', 'out', 'type', 'eventId', 'dlong', 'cn2',
+            'deviceDirection', 'spt', 'agentTranslatedZoneKey', 'sTranslatedZoneKey', 'cn1',
+            'slong', 'dZoneKey', 'deviceZoneKey', 'dvcpid', 'dpt', 'dTranslatedZoneKey', 'dlat',
+            'sZoneKey'
+        }
+
+        extended_floats = {
+            'cfp1', 'cfp2', 'cfp3', 'cfp4'
+        }
+
+        for k, v in values.items():
+            if k in extended_ints:
+                try:
+                    values[k] = int(v)
+                except Exception:
+                    pass
+
+            if k in extended_floats:
+                try:
+                    values[k] = float(v)
+                except Exception:
+                    pass
+
         # Process custom field labels
         for key in list(values.keys()):
             # If the key string ends with Label, replace it in the appropriate
@@ -188,14 +230,34 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
         r'\r': '\r'
     }
 
+    int_list = {'CEF_Version'}
+
+    severity_map = {
+        None: 'Unknown',
+        0: 'Low',
+        1: 'Low',
+        2: 'Low',
+        3: 'Low',
+        4: 'Medium',
+        5: 'Medium',
+        6: 'Medium',
+        7: 'High',
+        8: 'High',
+        9: 'Very-High',
+        10: 'Very-High'
+    }
+
+    severity_set = {'unknown', 'low', 'medium', 'high', 'very-high'}
+
     for item in proc_data:
         for key, value in item.copy().items():
-            # remove any spaces around values
-            item[key] = value.strip()
+            if isinstance(item[key], str):
+                # remove any spaces around values
+                item[key] = value.strip()
 
-            # fixup escaped characters
-            for esc, esc_sub in escape_map.items():
-                item[key] = item[key].replace(esc, esc_sub)
+                # fixup escaped characters
+                for esc, esc_sub in escape_map.items():
+                    item[key] = item[key].replace(esc, esc_sub)
 
             # normalize keynames
             new_key = key.strip()
@@ -203,6 +265,20 @@ def _process(proc_data: List[Dict]) -> List[Dict]:
             new_key = new_key.strip('_')
             item[new_key] = item.pop(key)
 
+            # integer conversions
+            if key in int_list:
+                item[key] = jc.utils.convert_to_int(item[key])
+
+        # set SeverityString and SeverityNum:
+        if 'agentSeverity' in item:
+            if isinstance(item['agentSeverity'], str) and item['agentSeverity'].lower() in severity_set:
+                item['agentSeverityString'] = item['agentSeverity']
+                item['agentSeverityNum'] = None
+            else:
+                item['agentSeverity'] = int(item['agentSeverity'])
+                item['agentSeverityString'] = severity_map[item['agentSeverity']]
+                item['agentSeverityNum'] = item['agentSeverity']
+
     return proc_data