From dafbf9fdcf53a8abbd006baef0be6697f860d2a1 Mon Sep 17 00:00:00 2001
From: Kelly Brazil <kellyjonbrazil@gmail.com>
Date: Wed, 6 Nov 2019 19:17:01 -0800
Subject: [PATCH] process lsmod data

---
 jc/parsers/lsmod.py    |  85 +++++++++++++++++++++++++++++--
 jc/parsers/lsof.py     | 112 +++++++++++++++++++++++++++++------------
 jc/parsers/mount.py    |  31 ++++++++++--
 jc/parsers/netstat2.py |  80 +++++++++++++++++++++++------
 4 files changed, 251 insertions(+), 57 deletions(-)

diff --git a/jc/parsers/lsmod.py b/jc/parsers/lsmod.py
index d269e35a..c34befcb 100644
--- a/jc/parsers/lsmod.py
+++ b/jc/parsers/lsmod.py
@@ -3,9 +3,54 @@
 Usage:
     specify --lsmod as the first argument if the piped input is coming from lsmod
 
-Example:
+Examples:
 
 $ lsmod | jc --lsmod -p
+[
+  ...
+  {
+    "module": "nf_nat",
+    "size": 26583,
+    "used": 3,
+    "by": [
+      "nf_nat_ipv4",
+      "nf_nat_ipv6",
+      "nf_nat_masquerade_ipv4"
+    ]
+  },
+  {
+    "module": "iptable_mangle",
+    "size": 12695,
+    "used": 1
+  },
+  {
+    "module": "iptable_security",
+    "size": 12705,
+    "used": 1
+  },
+  {
+    "module": "iptable_raw",
+    "size": 12678,
+    "used": 1
+  },
+  {
+    "module": "nf_conntrack",
+    "size": 139224,
+    "used": 7,
+    "by": [
+      "nf_nat",
+      "nf_nat_ipv4",
+      "nf_nat_ipv6",
+      "xt_conntrack",
+      "nf_nat_masquerade_ipv4",
+      "nf_conntrack_ipv4",
+      "nf_conntrack_ipv6"
+    ]
+  },
+  ...
+]
+
+$ lsmod | jc --lsmod -p -r
 [
   ...
   {
@@ -55,7 +100,34 @@ $ lsmod | jc --lsmod -p
 import jc
 
 
-def parse(data):
+def process(proc_data):
+    '''schema:
+    [
+      {
+        "module": string,
+        "size":   integer,
+        "used":   integer,
+        "by": [
+                  string
+        ]
+      }
+    ]
+    '''
+    for entry in proc_data:
+        # integer changes
+        int_list = ['size', 'used']
+        for key in int_list:
+            if key in entry:
+                try:
+                    key_int = int(entry[key])
+                    entry[key] = key_int
+                except (ValueError):
+                    entry[key] = None
+
+    return proc_data
+
+
+def parse(data, raw=False):
     # compatible options: linux, darwin, cygwin, win32, aix, freebsd
     jc.jc.compatibility(__name__,
                         ['linux'])
@@ -67,10 +139,13 @@ def parse(data):
     headers = [h for h in ' '.join(cleandata[0].lower().strip().split()).split() if h]
 
     raw_data = map(lambda s: s.strip().split(None, len(headers) - 1), cleandata[1:])
-    output = [dict(zip(headers, r)) for r in raw_data]
+    raw_output = [dict(zip(headers, r)) for r in raw_data]
 
-    for mod in output:
+    for mod in raw_output:
         if 'by' in mod:
             mod['by'] = mod['by'].split(',')
 
-    return output
+    if raw:
+        return raw_output
+    else:
+        return process(raw_output)
diff --git a/jc/parsers/lsof.py b/jc/parsers/lsof.py
index e2a5f1ec..7d3dcdf5 100644
--- a/jc/parsers/lsof.py
+++ b/jc/parsers/lsof.py
@@ -3,9 +3,50 @@
 Usage:
     specify --lsof as the first argument if the piped input is coming from lsof
 
-Example:
+Examples:
 
-$ sudo lsof | jc --lsof -p | more
+$ sudo lsof | jc --lsof -p
+[
+  {
+    "command": "systemd",
+    "pid": 1,
+    "tid": null,
+    "user": "root",
+    "fd": "cwd",
+    "type": "DIR",
+    "device": "253,0",
+    "size_off": 224,
+    "node": 64,
+    "name": "/"
+  },
+  {
+    "command": "systemd",
+    "pid": 1,
+    "tid": null,
+    "user": "root",
+    "fd": "rtd",
+    "type": "DIR",
+    "device": "253,0",
+    "size_off": 224,
+    "node": 64,
+    "name": "/"
+  },
+  {
+    "command": "systemd",
+    "pid": 1,
+    "tid": null,
+    "user": "root",
+    "fd": "txt",
+    "type": "REG",
+    "device": "253,0",
+    "size_off": 1624520,
+    "node": 50360451,
+    "name": "/usr/lib/systemd/systemd"
+  },
+  ...
+]
+
+$ sudo lsof | jc --lsof -p -r
 [
   {
     "command": "systemd",
@@ -43,30 +84,6 @@ $ sudo lsof | jc --lsof -p | more
     "node": "668802",
     "name": "/lib/systemd/systemd"
   },
-  {
-    "command": "systemd",
-    "pid": "1",
-    "tid": null,
-    "user": "root",
-    "fd": "mem",
-    "type": "REG",
-    "device": "8,2",
-    "size_off": "1700792",
-    "node": "656167",
-    "name": "/lib/x86_64-linux-gnu/libm-2.27.so"
-  },
-  {
-    "command": "systemd",
-    "pid": "1",
-    "tid": null,
-    "user": "root",
-    "fd": "mem",
-    "type": "REG",
-    "device": "8,2",
-    "size_off": "121016",
-    "node": "655394",
-    "name": "/lib/x86_64-linux-gnu/libudev.so.1.6.9"
-  },
   ...
 ]
 """
@@ -74,12 +91,42 @@ import string
 import jc
 
 
-def parse(data):
+def process(proc_data):
+    '''schema:
+    [
+      {
+        "command":    string,
+        "pid":        integer,
+        "tid":        integer,
+        "user":       string,
+        "fd":         string,
+        "type":       string,
+        "device":     string,
+        "size_off":   integer,
+        "node":       integer,
+        "name":       string
+      }
+    ]
+    '''
+    for entry in proc_data:
+        # integer changes
+        int_list = ['pid', 'tid', 'size_off', 'node']
+        for key in int_list:
+            if key in entry:
+                try:
+                    key_int = int(entry[key])
+                    entry[key] = key_int
+                except (ValueError, TypeError):
+                    entry[key] = None
+    return proc_data
+
+
+def parse(data, raw=False):
     # compatible options: linux, darwin, cygwin, win32, aix, freebsd
     jc.jc.compatibility(__name__,
                         ['linux'])
 
-    output = []
+    raw_output = []
 
     linedata = data.splitlines()
 
@@ -93,7 +140,7 @@ def parse(data):
 
         # clean up 'size/off' header
         # even though forward slash in a key is valid json, it can make things difficult
-        header_row = header_text.replace('size/off', 'size_off')
+        header_row = header_text.replace('/', '_')
 
         headers = header_row.split()
 
@@ -125,6 +172,9 @@ def parse(data):
             fixed_line.append(name)
 
             output_line = dict(zip(headers, fixed_line))
-            output.append(output_line)
+            raw_output.append(output_line)
 
-    return output
+    if raw:
+        return raw_output
+    else:
+        return process(raw_output)
diff --git a/jc/parsers/mount.py b/jc/parsers/mount.py
index c96e02d6..177af886 100644
--- a/jc/parsers/mount.py
+++ b/jc/parsers/mount.py
@@ -50,12 +50,30 @@ $ mount | jc --mount -p
 import jc
 
 
-def parse(data):
+def process(proc_data):
+    '''schema:
+    [
+      {
+        "filesystem":   string,
+        "mount_point":  string,
+        "type":         string,
+        "access": [
+                        string
+        ]
+      }
+    ]
+
+    nothing to process
+    '''
+    return proc_data
+
+
+def parse(data, raw=False):
     # compatible options: linux, darwin, cygwin, win32, aix, freebsd
     jc.jc.compatibility(__name__,
                         ['linux'])
 
-    output = []
+    raw_output = []
 
     linedata = data.splitlines()
 
@@ -73,8 +91,11 @@ def parse(data):
 
             access = parsed_line[5].lstrip('(').rstrip(')').split(',')
 
-            output_line['access'] = access
+            output_line['options'] = access
 
-            output.append(output_line)
+            raw_output.append(output_line)
 
-    return output
+    if raw:
+        return raw_output
+    else:
+        return process(raw_output)
diff --git a/jc/parsers/netstat2.py b/jc/parsers/netstat2.py
index 9c129843..a08439fe 100644
--- a/jc/parsers/netstat2.py
+++ b/jc/parsers/netstat2.py
@@ -3,53 +3,91 @@
 Usage:
     Specify --netstat as the first argument if the piped input is coming from netstat
 """
+import string
 import jc
 
 
+def process(proc_data):
+    '''schema:
+    [
+      {
+        "proto": "tcp",
+        "recv-q": "0",
+        "send-q": "0",
+        "local_address": "0.0.0.0:22",
+        "foreign_address": "0.0.0.0:*",
+        "state": "LISTEN",
+        "program_name": "1219/sshd",
+        "security_context": "system_u:system_r:sshd_t:s0-s0:c0.c1023           ",
+        "refcnt": "2",
+        "flags": "ACC",
+        "type": "STREAM",
+        "i-node": "20782",
+        "path": "/var/run/NetworkManager/private-dhcp",
+        "kind": "network"
+      }
+    ]
+    '''
+    return proc_data
+
+
 def normalize_headers(header):
     header = header.lower()
     header = header.replace('local address', 'local_address')
     header = header.replace('foreign address', 'foreign_address')
     header = header.replace('pid/program name', 'program_name')
     header = header.replace('security context', 'security_context')
-    return header.split()
+
+    return header
 
 
 def parse_network(headers, entry):
     # Count words in header
     # if len of line is one less than len of header, then insert None in field 5
-    output_line = {}
+    entry = entry.split(maxsplit=len(headers) - 1)
+
+    if len(entry) == len(headers) - 1:
+        entry.insert(5, None)
+
+    output_line = dict(zip(headers, entry))
+    output_line['kind'] = 'network'
+
     return output_line
 
 
-def parse_socket(headers, entry):
+def parse_socket(header_text, headers, entry):
     # get the column # of first letter of "state"
     # for each line check column # to see if state column is populated
     # remove [ and ] from each line
     output_line = {}
+    state_col = header_text.find('state')
+
+    entry = entry.replace('[ ]', '---')
+    entry = entry.replace('[', ' ').replace(']', ' ')
+    entry_list = entry.split(maxsplit=len(headers) - 1)
+    if entry[state_col] in string.whitespace:
+        entry_list.insert(4, None)
+
+    output_line = dict(zip(headers, entry_list))
+    output_line['kind'] = 'socket'
+
     return output_line
 
 
-def post_process(network_list, socket_list):
-    output = {}
-
-    if network_list:
-        output['network'] = network_list
-
-    if socket_list:
-        output['socket'] = socket_list
+def parse_post(raw_data):
 
     # post process to split pid and program name and ip addresses and ports
 
-    return output
+    return raw_data
 
 
-def parse(data):
+def parse(data, raw=False):
     # compatible options: linux, darwin, cygwin, win32, aix, freebsd
     jc.jc.compatibility(__name__,
                         ['linux'])
 
     cleandata = data.splitlines()
+    raw_output = []
 
     network = False
     socket = False
@@ -70,7 +108,8 @@ def parse(data):
             continue
 
         if line.find('Proto') == 0:
-            headers = normalize_headers(line)
+            header_text = normalize_headers(line)
+            headers = header_text.split()
             continue
 
         if network:
@@ -78,10 +117,19 @@ def parse(data):
             continue
 
         if socket:
-            socket_list.append(parse_socket(headers, line))
+            socket_list.append(parse_socket(header_text, headers, line))
             continue
 
-    return post_process(network_list, socket_list)
+    for item in [network_list, socket_list]:
+        for entry in item:
+            raw_output.append(entry)
+
+    raw_output = parse_post(raw_output)
+
+    if raw:
+        return raw_output
+    else:
+        return process(raw_output)