diff --git a/CHANGELOG b/CHANGELOG index 754a1683..00d70b39 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ jc changelog +20220928 v1.22.1 +- fix proc-pid-stat parser for command names with spaces and newlines + 20220926 v1.22.0 - Add /proc file parsers for linux. Support for the following files: `/proc/buddyinfo` diff --git a/jc/parsers/proc.py b/jc/parsers/proc.py index 8cada8bc..937ba3a3 100644 --- a/jc/parsers/proc.py +++ b/jc/parsers/proc.py @@ -201,7 +201,7 @@ def parse( pid_mountinfo_p = re.compile(r'^\d+ \d+ \d+:\d+ /.+\n') pid_numa_maps_p = re.compile(r'^[a-f0-9]{12} default [^\n]+\n') pid_smaps_p = re.compile(r'^[0-9a-f]{12}-[0-9a-f]{12} [rwxsp\-]{4} [0-9a-f]{8} [0-9a-f]{2}:[0-9a-f]{2} \d+ [^\n]+\nSize:\s+\d+ \S\S\n') - pid_stat_p = re.compile(r'^\d+ \(.{1,16}\) \w \d+ \d+ \d+ \d+ -?\d+ (?:\d+ ){43}\d+$') + pid_stat_p = re.compile(r'^\d+ \(.{1,15}\) \S \d+ \d+ \d+ \d+ -?\d+ (?:\d+ ){43}\d+$', re.DOTALL) pid_statm_p = re.compile(r'^\d+ \d+ \d+\s\d+\s\d+\s\d+\s\d+$') pid_status_p = re.compile(r'^Name:\t.+\nUmask:\t\d+\nState:\t.+\nTgid:\t\d+\n') diff --git a/jc/parsers/proc_pid_stat.py b/jc/parsers/proc_pid_stat.py index a263c16f..8968f0ed 100644 --- a/jc/parsers/proc_pid_stat.py +++ b/jc/parsers/proc_pid_stat.py @@ -195,6 +195,7 @@ Examples: "exit_code": 0 } """ +import re from typing import Dict import jc.utils @@ -242,6 +243,12 @@ def _process(proc_data: Dict) -> Dict: if 'state' in proc_data: proc_data['state_pretty'] = state_map[proc_data['state']] + for key, val in proc_data.items(): + try: + proc_data[key] = int(val) + except Exception: + pass + return proc_data @@ -270,74 +277,65 @@ def parse( if jc.utils.has_data(data): - split_line = data.split() - raw_output = { - 'pid': int(split_line[0]), - 'comm': split_line[1].strip('()'), - 'state': split_line[2], - 'ppid': int(split_line[3]), - 'pgrp': int(split_line[4]), - 'session': int(split_line[5]), - 'tty_nr': int(split_line[6]), - 'tpg_id': int(split_line[7]), - 'flags': int(split_line[8]), - 'minflt': int(split_line[9]), - 'cminflt': int(split_line[10]), - 'majflt': int(split_line[11]), - 'cmajflt': int(split_line[12]), - 'utime': int(split_line[13]), - 'stime': int(split_line[14]), - 'cutime': int(split_line[15]), - 'cstime': int(split_line[16]), - 'priority': int(split_line[17]), - 'nice': int(split_line[18]), - 'num_threads': int(split_line[19]), - 'itrealvalue': int(split_line[20]), - 'starttime': int(split_line[21]), - 'vsize': int(split_line[22]), - 'rss': int(split_line[23]), - 'rsslim': int(split_line[24]), - 'startcode': int(split_line[25]), - 'endcode': int(split_line[26]), - 'startstack': int(split_line[27]), - 'kstkeep': int(split_line[28]), - 'kstkeip': int(split_line[29]), - 'signal': int(split_line[30]), - 'blocked': int(split_line[31]), - 'sigignore': int(split_line[32]), - 'sigcatch': int(split_line[33]), - 'wchan': int(split_line[34]), - 'nswap': int(split_line[35]), - 'cnswap': int(split_line[36]) - } + line_re = re.compile(r''' + ^(?P\d+)\s + \((?P.+)\)\s + (?P\S)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P-?\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+)\s + (?P\d+) + ''', re.VERBOSE | re.DOTALL + ) - if len(split_line) > 37: - raw_output['exit_signal'] = int(split_line[37]) + line_match = line_re.search(data) - if len(split_line) > 38: - raw_output['processor'] = int(split_line[38]) - - if len(split_line) > 39: - raw_output['rt_priority'] = int(split_line[39]) - raw_output['policy'] = int(split_line[40]) - - if len(split_line) > 41: - raw_output['delayacct_blkio_ticks'] = int(split_line[41]) - - if len(split_line) > 42: - raw_output['guest_time'] = int(split_line[42]) - raw_output['cguest_time'] = int(split_line[43]) - - if len(split_line) > 44: - raw_output['start_data'] = int(split_line[44]) - raw_output['end_data'] = int(split_line[45]) - raw_output['start_brk'] = int(split_line[46]) - - if len(split_line) > 47: - raw_output['arg_start'] = int(split_line[47]) - raw_output['arg_end'] = int(split_line[48]) - raw_output['env_start'] = int(split_line[49]) - raw_output['env_end'] = int(split_line[50]) - raw_output['exit_code'] = int(split_line[51]) + if line_match: + raw_output = line_match.groupdict() return raw_output if raw else _process(raw_output) diff --git a/tests/fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm b/tests/fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm new file mode 100644 index 00000000..431e7f04 --- /dev/null +++ b/tests/fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm @@ -0,0 +1,2 @@ +2001 (my file with +sp) S 1888 2001 1888 34816 2001 4202496 428 0 0 0 0 0 0 0 20 0 1 0 75513 115900416 297 18446744073709551615 4194304 5100612 140737020052256 140737020050904 140096699233308 0 65536 4 65538 18446744072034584486 0 0 17 0 0 0 0 0 0 7200240 7236240 35389440 140737020057179 140737020057223 140737020057223 140737020059606 0 diff --git a/tests/fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm.json b/tests/fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm.json new file mode 100644 index 00000000..e338542d --- /dev/null +++ b/tests/fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm.json @@ -0,0 +1 @@ +{"pid":2001,"comm":"my file with\nsp","state":"S","ppid":1888,"pgrp":2001,"session":1888,"tty_nr":34816,"tpg_id":2001,"flags":4202496,"minflt":428,"cminflt":0,"majflt":0,"cmajflt":0,"utime":0,"stime":0,"cutime":0,"cstime":0,"priority":20,"nice":0,"num_threads":1,"itrealvalue":0,"starttime":75513,"vsize":115900416,"rss":297,"rsslim":18446744073709551615,"startcode":4194304,"endcode":5100612,"startstack":140737020052256,"kstkeep":140737020050904,"kstkeip":140096699233308,"signal":0,"blocked":65536,"sigignore":4,"sigcatch":65538,"wchan":18446744072034584486,"nswap":0,"cnswap":0,"exit_signal":17,"processor":0,"rt_priority":0,"policy":0,"delayacct_blkio_ticks":0,"guest_time":0,"cguest_time":0,"start_data":7200240,"end_data":7236240,"start_brk":35389440,"arg_start":140737020057179,"arg_end":140737020057223,"env_start":140737020057223,"env_end":140737020059606,"exit_code":0,"state_pretty":"Sleeping in an interruptible wait"} diff --git a/tests/test_proc.py b/tests/test_proc.py index b96e289b..e5989ef7 100644 --- a/tests/test_proc.py +++ b/tests/test_proc.py @@ -203,6 +203,9 @@ class MyTests(unittest.TestCase): 'proc_pid_stat': ( 'fixtures/linux-proc/pid_stat', 'fixtures/linux-proc/pid_stat.json'), + 'pid_stat_w_space_and_nl_in_comm': ( + 'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm', + 'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm.json'), 'proc_pid_statm': ( 'fixtures/linux-proc/pid_statm', 'fixtures/linux-proc/pid_statm.json'), diff --git a/tests/test_proc_pid_stat.py b/tests/test_proc_pid_stat.py index 876ce032..4201abf8 100644 --- a/tests/test_proc_pid_stat.py +++ b/tests/test_proc_pid_stat.py @@ -16,7 +16,10 @@ class MyTests(unittest.TestCase): fixtures = { 'proc_pid_stat': ( 'fixtures/linux-proc/pid_stat', - 'fixtures/linux-proc/pid_stat.json') + 'fixtures/linux-proc/pid_stat.json'), + 'pid_stat_w_space_and_nl_in_comm': ( + 'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm', + 'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm.json') } for file, filepaths in fixtures.items(): @@ -39,6 +42,13 @@ class MyTests(unittest.TestCase): self.assertEqual(jc.parsers.proc_pid_stat.parse(self.f_in['proc_pid_stat'], quiet=True), self.f_json['proc_pid_stat']) + def test_proc_pid_stat_w_space_and_nl(self): + """ + Test '/proc//stat' with command with spaces and newline + """ + self.assertEqual(jc.parsers.proc_pid_stat.parse(self.f_in['pid_stat_w_space_and_nl_in_comm'], quiet=True), + self.f_json['pid_stat_w_space_and_nl_in_comm']) + if __name__ == '__main__': unittest.main()