1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00
Files
jc/jc/parsers/lsof.py

210 lines
4.9 KiB
Python
Raw Normal View History

2019-10-23 17:22:25 -07:00
"""jc - JSON CLI output utility lsof Parser
Usage:
specify --lsof as the first argument if the piped input is coming from lsof
2019-12-12 09:21:20 -08:00
Compatibility:
'linux'
2019-11-06 19:17:01 -08:00
Examples:
2019-10-23 17:22:25 -07:00
2019-11-11 18:30:46 -08:00
$ sudo lsof | jc --lsof -p
[
{
"command": "systemd",
"pid": 1,
"tid": null,
"user": "root",
"fd": "cwd",
"type": "DIR",
"device": "253,0",
"size_off": 224,
"node": 64,
"name": "/"
},
{
"command": "systemd",
"pid": 1,
"tid": null,
"user": "root",
"fd": "rtd",
"type": "DIR",
"device": "253,0",
"size_off": 224,
"node": 64,
"name": "/"
},
{
"command": "systemd",
"pid": 1,
"tid": null,
"user": "root",
"fd": "txt",
"type": "REG",
"device": "253,0",
"size_off": 1624520,
"node": 50360451,
"name": "/usr/lib/systemd/systemd"
},
...
]
$ sudo lsof | jc --lsof -p -r
[
{
"command": "systemd",
"pid": "1",
"tid": null,
"user": "root",
"fd": "cwd",
"type": "DIR",
"device": "8,2",
"size_off": "4096",
"node": "2",
"name": "/"
},
{
"command": "systemd",
"pid": "1",
"tid": null,
"user": "root",
"fd": "rtd",
"type": "DIR",
"device": "8,2",
"size_off": "4096",
"node": "2",
"name": "/"
},
{
"command": "systemd",
"pid": "1",
"tid": null,
"user": "root",
"fd": "txt",
"type": "REG",
"device": "8,2",
"size_off": "1595792",
"node": "668802",
"name": "/lib/systemd/systemd"
},
...
]
2019-10-23 17:22:25 -07:00
"""
2019-10-25 10:04:29 -07:00
import string
2019-11-07 08:07:43 -08:00
import jc.utils
2019-10-23 17:22:25 -07:00
2019-11-06 19:17:01 -08:00
def process(proc_data):
2019-11-11 18:30:46 -08:00
"""
2019-11-12 11:28:10 -08:00
Final processing to conform to the schema.
Parameters:
2019-11-14 16:36:00 -08:00
2019-11-13 08:04:40 -08:00
proc_data: (dictionary) raw structured data to process
2019-11-12 11:28:10 -08:00
Returns:
dictionary structured data with the following schema:
2019-11-14 16:36:00 -08:00
2019-11-11 18:30:46 -08:00
[
{
"command": string,
"pid": integer,
"tid": integer,
"user": string,
"fd": string,
"type": string,
"device": string,
"size_off": integer,
"node": integer,
"name": string
}
]
"""
2019-11-06 19:17:01 -08:00
for entry in proc_data:
# integer changes
int_list = ['pid', 'tid', 'size_off', 'node']
for key in int_list:
if key in entry:
try:
key_int = int(entry[key])
entry[key] = key_int
except (ValueError, TypeError):
entry[key] = None
return proc_data
2019-11-07 08:07:43 -08:00
def parse(data, raw=False, quiet=False):
2019-11-11 18:30:46 -08:00
"""
2019-11-12 11:17:33 -08:00
Main text parsing function
2019-11-11 18:30:46 -08:00
2019-11-12 11:17:33 -08:00
Parameters:
2019-11-14 16:36:00 -08:00
2019-11-12 11:17:33 -08:00
data: (string) text data to parse
raw: (boolean) output preprocessed JSON if True
quiet: (boolean) suppress warning messages if True
2019-11-11 18:30:46 -08:00
2019-11-12 11:17:33 -08:00
Returns:
dictionary raw or processed structured data
2019-11-11 18:30:46 -08:00
"""
2019-11-14 16:36:00 -08:00
2019-11-05 22:42:48 -06:00
# compatible options: linux, darwin, cygwin, win32, aix, freebsd
2019-11-07 08:07:43 -08:00
compatible = ['linux']
if not quiet:
jc.utils.compatibility(__name__, compatible)
2019-11-05 22:42:48 -06:00
2019-11-06 19:17:01 -08:00
raw_output = []
2019-10-23 17:22:25 -07:00
linedata = data.splitlines()
# Clear any blank lines
cleandata = list(filter(None, linedata))
if cleandata:
# find column value of last character of each header
2019-10-25 14:58:15 -07:00
header_text = cleandata.pop(0).lower()
2019-10-25 10:22:23 -07:00
2019-10-25 10:55:09 -07:00
# clean up 'size/off' header
2019-10-25 10:04:29 -07:00
# even though forward slash in a key is valid json, it can make things difficult
2019-11-06 19:17:01 -08:00
header_row = header_text.replace('/', '_')
2019-10-25 14:58:15 -07:00
headers = header_row.split()
2019-10-25 10:22:23 -07:00
2019-10-23 17:22:25 -07:00
header_spec = []
for i, h in enumerate(headers):
# header tuple is (index, header_name, col)
header_spec.append((i, h, header_row.find(h) + len(h)))
# parse lines
for entry in cleandata:
output_line = {}
2019-10-23 17:27:23 -07:00
# normalize data by inserting Null for missing data
2019-10-23 17:22:25 -07:00
temp_line = entry.split(maxsplit=len(headers) - 1)
for spec in header_spec:
2019-10-25 10:04:29 -07:00
index = spec[0]
header_name = spec[1]
2019-10-25 14:58:15 -07:00
col = spec[2] - 1 # subtract one since column starts at 0 instead of 1
2019-10-25 10:04:29 -07:00
2019-10-25 10:55:09 -07:00
if header_name == 'command' or header_name == 'name':
2019-10-23 17:22:25 -07:00
continue
2019-10-25 14:58:15 -07:00
if entry[col] in string.whitespace:
2019-10-25 10:04:29 -07:00
temp_line.insert(index, None)
2019-10-23 17:22:25 -07:00
name = ' '.join(temp_line[9:])
fixed_line = temp_line[0:9]
fixed_line.append(name)
output_line = dict(zip(headers, fixed_line))
2019-11-06 19:17:01 -08:00
raw_output.append(output_line)
2019-10-23 17:22:25 -07:00
2019-11-06 19:17:01 -08:00
if raw:
return raw_output
else:
return process(raw_output)