jc/jc/parsers/lsof.py

"""jc - JSON CLI output utility lsof Parser

Usage:
    specify --lsof as the first argument if the piped input is coming from lsof

Compatibility:
    'linux'

Examples:

    $ sudo lsof | jc --lsof -p
    [
      {
        "command": "systemd",
        "pid": 1,
        "tid": null,
        "user": "root",
        "fd": "cwd",
        "type": "DIR",
        "device": "253,0",
        "size_off": 224,
        "node": 64,
        "name": "/"
      },
      {
        "command": "systemd",
        "pid": 1,
        "tid": null,
        "user": "root",
        "fd": "rtd",
        "type": "DIR",
        "device": "253,0",
        "size_off": 224,
        "node": 64,
        "name": "/"
      },
      {
        "command": "systemd",
        "pid": 1,
        "tid": null,
        "user": "root",
        "fd": "txt",
        "type": "REG",
        "device": "253,0",
        "size_off": 1624520,
        "node": 50360451,
        "name": "/usr/lib/systemd/systemd"
      },
      ...
    ]

    $ sudo lsof | jc --lsof -p -r
    [
      {
        "command": "systemd",
        "pid": "1",
        "tid": null,
        "user": "root",
        "fd": "cwd",
        "type": "DIR",
        "device": "8,2",
        "size_off": "4096",
        "node": "2",
        "name": "/"
      },
      {
        "command": "systemd",
        "pid": "1",
        "tid": null,
        "user": "root",
        "fd": "rtd",
        "type": "DIR",
        "device": "8,2",
        "size_off": "4096",
        "node": "2",
        "name": "/"
      },
      {
        "command": "systemd",
        "pid": "1",
        "tid": null,
        "user": "root",
        "fd": "txt",
        "type": "REG",
        "device": "8,2",
        "size_off": "1595792",
        "node": "668802",
        "name": "/lib/systemd/systemd"
      },
      ...
    ]
"""
import string
import jc.utils


def process(proc_data):
    """
    Final processing to conform to the schema.

    Parameters:

        proc_data:   (dictionary) raw structured data to process

    Returns:

        dictionary   structured data with the following schema:

        [
          {
            "command":    string,
            "pid":        integer,
            "tid":        integer,
            "user":       string,
            "fd":         string,
            "type":       string,
            "device":     string,
            "size_off":   integer,
            "node":       integer,
            "name":       string
          }
        ]
    """
    for entry in proc_data:
        # integer changes
        int_list = ['pid', 'tid', 'size_off', 'node']
        for key in int_list:
            if key in entry:
                try:
                    key_int = int(entry[key])
                    entry[key] = key_int
                except (ValueError, TypeError):
                    entry[key] = None
    return proc_data


def parse(data, raw=False, quiet=False):
    """
    Main text parsing function

    Parameters:

        data:        (string)  text data to parse
        raw:         (boolean) output preprocessed JSON if True
        quiet:       (boolean) suppress warning messages if True

    Returns:

        dictionary   raw or processed structured data
    """

    # compatible options: linux, darwin, cygwin, win32, aix, freebsd
    compatible = ['linux']

    if not quiet:
        jc.utils.compatibility(__name__, compatible)

    raw_output = []

    linedata = data.splitlines()

    # Clear any blank lines
    cleandata = list(filter(None, linedata))

    if cleandata:

        # find column value of last character of each header
        header_text = cleandata.pop(0).lower()

        # clean up 'size/off' header
        # even though forward slash in a key is valid json, it can make things difficult
        header_row = header_text.replace('/', '_')

        headers = header_row.split()

        header_spec = []
        for i, h in enumerate(headers):
            # header tuple is (index, header_name, col)
            header_spec.append((i, h, header_row.find(h) + len(h)))

        # parse lines
        for entry in cleandata:
            output_line = {}

            # normalize data by inserting Null for missing data
            temp_line = entry.split(maxsplit=len(headers) - 1)

            for spec in header_spec:

                index = spec[0]
                header_name = spec[1]
                col = spec[2] - 1     # subtract one since column starts at 0 instead of 1

                if header_name == 'command' or header_name == 'name':
                    continue
                if entry[col] in string.whitespace:
                    temp_line.insert(index, None)

            name = ' '.join(temp_line[9:])
            fixed_line = temp_line[0:9]
            fixed_line.append(name)

            output_line = dict(zip(headers, fixed_line))
            raw_output.append(output_line)

    if raw:
        return raw_output
    else:
        return process(raw_output)
add lsof parser 2019-10-23 17:22:25 -07:00			`"""jc - JSON CLI output utility lsof Parser`

			`Usage:`
			`specify --lsof as the first argument if the piped input is coming from lsof`

add compatibility to docs 2019-12-12 09:21:20 -08:00			`Compatibility:`
			`'linux'`

process lsmod data 2019-11-06 19:17:01 -08:00			`Examples:`
add lsof parser 2019-10-23 17:22:25 -07:00
update docs 2019-11-11 18:30:46 -08:00			`$ sudo lsof \| jc --lsof -p`
			`[`
			`{`
			`"command": "systemd",`
			`"pid": 1,`
			`"tid": null,`
			`"user": "root",`
			`"fd": "cwd",`
			`"type": "DIR",`
			`"device": "253,0",`
			`"size_off": 224,`
			`"node": 64,`
			`"name": "/"`
			`},`
			`{`
			`"command": "systemd",`
			`"pid": 1,`
			`"tid": null,`
			`"user": "root",`
			`"fd": "rtd",`
			`"type": "DIR",`
			`"device": "253,0",`
			`"size_off": 224,`
			`"node": 64,`
			`"name": "/"`
			`},`
			`{`
			`"command": "systemd",`
			`"pid": 1,`
			`"tid": null,`
			`"user": "root",`
			`"fd": "txt",`
			`"type": "REG",`
			`"device": "253,0",`
			`"size_off": 1624520,`
			`"node": 50360451,`
			`"name": "/usr/lib/systemd/systemd"`
			`},`
			`...`
			`]`

			`$ sudo lsof \| jc --lsof -p -r`
			`[`
			`{`
			`"command": "systemd",`
			`"pid": "1",`
			`"tid": null,`
			`"user": "root",`
			`"fd": "cwd",`
			`"type": "DIR",`
			`"device": "8,2",`
			`"size_off": "4096",`
			`"node": "2",`
			`"name": "/"`
			`},`
			`{`
			`"command": "systemd",`
			`"pid": "1",`
			`"tid": null,`
			`"user": "root",`
			`"fd": "rtd",`
			`"type": "DIR",`
			`"device": "8,2",`
			`"size_off": "4096",`
			`"node": "2",`
			`"name": "/"`
			`},`
			`{`
			`"command": "systemd",`
			`"pid": "1",`
			`"tid": null,`
			`"user": "root",`
			`"fd": "txt",`
			`"type": "REG",`
			`"device": "8,2",`
			`"size_off": "1595792",`
			`"node": "668802",`
			`"name": "/lib/systemd/systemd"`
			`},`
			`...`
			`]`
add lsof parser 2019-10-23 17:22:25 -07:00			`"""`
change SIZE/OFF key to SIZE_OFF 2019-10-25 10:04:29 -07:00			`import string`
fix compatibility code 2019-11-07 08:07:43 -08:00			`import jc.utils`
add lsof parser 2019-10-23 17:22:25 -07:00

process lsmod data 2019-11-06 19:17:01 -08:00			`def process(proc_data):`
update docs 2019-11-11 18:30:46 -08:00			`"""`
update process() doc 2019-11-12 11:28:10 -08:00			`Final processing to conform to the schema.`

			`Parameters:`
pep8 fixes 2019-11-14 16:36:00 -08:00
add colon to parameter in docs 2019-11-13 08:04:40 -08:00			`proc_data: (dictionary) raw structured data to process`
update process() doc 2019-11-12 11:28:10 -08:00
			`Returns:`

			`dictionary structured data with the following schema:`
pep8 fixes 2019-11-14 16:36:00 -08:00
update docs 2019-11-11 18:30:46 -08:00			`[`
			`{`
			`"command": string,`
			`"pid": integer,`
			`"tid": integer,`
			`"user": string,`
			`"fd": string,`
			`"type": string,`
			`"device": string,`
			`"size_off": integer,`
			`"node": integer,`
			`"name": string`
			`}`
			`]`
			`"""`
process lsmod data 2019-11-06 19:17:01 -08:00			`for entry in proc_data:`
			`# integer changes`
			`int_list = ['pid', 'tid', 'size_off', 'node']`
			`for key in int_list:`
			`if key in entry:`
			`try:`
			`key_int = int(entry[key])`
			`entry[key] = key_int`
			`except (ValueError, TypeError):`
			`entry[key] = None`
			`return proc_data`


fix compatibility code 2019-11-07 08:07:43 -08:00			`def parse(data, raw=False, quiet=False):`
update docs 2019-11-11 18:30:46 -08:00			`"""`
doc update 2019-11-12 11:17:33 -08:00			`Main text parsing function`
update docs 2019-11-11 18:30:46 -08:00
doc update 2019-11-12 11:17:33 -08:00			`Parameters:`
pep8 fixes 2019-11-14 16:36:00 -08:00
doc update 2019-11-12 11:17:33 -08:00			`data: (string) text data to parse`
			`raw: (boolean) output preprocessed JSON if True`
			`quiet: (boolean) suppress warning messages if True`
update docs 2019-11-11 18:30:46 -08:00
doc update 2019-11-12 11:17:33 -08:00			`Returns:`

			`dictionary raw or processed structured data`
update docs 2019-11-11 18:30:46 -08:00			`"""`
pep8 fixes 2019-11-14 16:36:00 -08:00
add compatibility function 2019-11-05 22:42:48 -06:00			`# compatible options: linux, darwin, cygwin, win32, aix, freebsd`
fix compatibility code 2019-11-07 08:07:43 -08:00			`compatible = ['linux']`

			`if not quiet:`
			`jc.utils.compatibility(__name__, compatible)`
add compatibility function 2019-11-05 22:42:48 -06:00
process lsmod data 2019-11-06 19:17:01 -08:00			`raw_output = []`
add lsof parser 2019-10-23 17:22:25 -07:00
			`linedata = data.splitlines()`

			`# Clear any blank lines`
			`cleandata = list(filter(None, linedata))`

			`if cleandata:`

			`# find column value of last character of each header`
lower() headers 2019-10-25 14:58:15 -07:00			`header_text = cleandata.pop(0).lower()`
formatting 2019-10-25 10:22:23 -07:00
convert headers to lowercase 2019-10-25 10:55:09 -07:00			`# clean up 'size/off' header`
change SIZE/OFF key to SIZE_OFF 2019-10-25 10:04:29 -07:00			`# even though forward slash in a key is valid json, it can make things difficult`
process lsmod data 2019-11-06 19:17:01 -08:00			`header_row = header_text.replace('/', '_')`
lower() headers 2019-10-25 14:58:15 -07:00
			`headers = header_row.split()`
formatting 2019-10-25 10:22:23 -07:00
add lsof parser 2019-10-23 17:22:25 -07:00			`header_spec = []`
			`for i, h in enumerate(headers):`
			`# header tuple is (index, header_name, col)`
			`header_spec.append((i, h, header_row.find(h) + len(h)))`

			`# parse lines`
			`for entry in cleandata:`
			`output_line = {}`

use None instead of -- 2019-10-23 17:27:23 -07:00			`# normalize data by inserting Null for missing data`
add lsof parser 2019-10-23 17:22:25 -07:00			`temp_line = entry.split(maxsplit=len(headers) - 1)`

			`for spec in header_spec:`
change SIZE/OFF key to SIZE_OFF 2019-10-25 10:04:29 -07:00
			`index = spec[0]`
			`header_name = spec[1]`
lower() headers 2019-10-25 14:58:15 -07:00			`col = spec[2] - 1 # subtract one since column starts at 0 instead of 1`
change SIZE/OFF key to SIZE_OFF 2019-10-25 10:04:29 -07:00
convert headers to lowercase 2019-10-25 10:55:09 -07:00			`if header_name == 'command' or header_name == 'name':`
add lsof parser 2019-10-23 17:22:25 -07:00			`continue`
lower() headers 2019-10-25 14:58:15 -07:00			`if entry[col] in string.whitespace:`
change SIZE/OFF key to SIZE_OFF 2019-10-25 10:04:29 -07:00			`temp_line.insert(index, None)`
add lsof parser 2019-10-23 17:22:25 -07:00
			`name = ' '.join(temp_line[9:])`
			`fixed_line = temp_line[0:9]`
			`fixed_line.append(name)`

			`output_line = dict(zip(headers, fixed_line))`
process lsmod data 2019-11-06 19:17:01 -08:00			`raw_output.append(output_line)`
add lsof parser 2019-10-23 17:22:25 -07:00
process lsmod data 2019-11-06 19:17:01 -08:00			`if raw:`
			`return raw_output`
			`else:`
			`return process(raw_output)`