1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00

fix proc-pid-stat parser for command names with spaces and newlines

This commit is contained in:
Kelly Brazil
2022-09-28 14:55:25 -07:00
parent 32fddce8fb
commit 690603bfda
7 changed files with 86 additions and 69 deletions

View File

@ -1,5 +1,8 @@
jc changelog
20220928 v1.22.1
- fix proc-pid-stat parser for command names with spaces and newlines
20220926 v1.22.0
- Add /proc file parsers for linux. Support for the following files:
`/proc/buddyinfo`

View File

@ -201,7 +201,7 @@ def parse(
pid_mountinfo_p = re.compile(r'^\d+ \d+ \d+:\d+ /.+\n')
pid_numa_maps_p = re.compile(r'^[a-f0-9]{12} default [^\n]+\n')
pid_smaps_p = re.compile(r'^[0-9a-f]{12}-[0-9a-f]{12} [rwxsp\-]{4} [0-9a-f]{8} [0-9a-f]{2}:[0-9a-f]{2} \d+ [^\n]+\nSize:\s+\d+ \S\S\n')
pid_stat_p = re.compile(r'^\d+ \(.{1,16}\) \w \d+ \d+ \d+ \d+ -?\d+ (?:\d+ ){43}\d+$')
pid_stat_p = re.compile(r'^\d+ \(.{1,15}\) \S \d+ \d+ \d+ \d+ -?\d+ (?:\d+ ){43}\d+$', re.DOTALL)
pid_statm_p = re.compile(r'^\d+ \d+ \d+\s\d+\s\d+\s\d+\s\d+$')
pid_status_p = re.compile(r'^Name:\t.+\nUmask:\t\d+\nState:\t.+\nTgid:\t\d+\n')

View File

@ -195,6 +195,7 @@ Examples:
"exit_code": 0
}
"""
import re
from typing import Dict
import jc.utils
@ -242,6 +243,12 @@ def _process(proc_data: Dict) -> Dict:
if 'state' in proc_data:
proc_data['state_pretty'] = state_map[proc_data['state']]
for key, val in proc_data.items():
try:
proc_data[key] = int(val)
except Exception:
pass
return proc_data
@ -270,74 +277,65 @@ def parse(
if jc.utils.has_data(data):
split_line = data.split()
raw_output = {
'pid': int(split_line[0]),
'comm': split_line[1].strip('()'),
'state': split_line[2],
'ppid': int(split_line[3]),
'pgrp': int(split_line[4]),
'session': int(split_line[5]),
'tty_nr': int(split_line[6]),
'tpg_id': int(split_line[7]),
'flags': int(split_line[8]),
'minflt': int(split_line[9]),
'cminflt': int(split_line[10]),
'majflt': int(split_line[11]),
'cmajflt': int(split_line[12]),
'utime': int(split_line[13]),
'stime': int(split_line[14]),
'cutime': int(split_line[15]),
'cstime': int(split_line[16]),
'priority': int(split_line[17]),
'nice': int(split_line[18]),
'num_threads': int(split_line[19]),
'itrealvalue': int(split_line[20]),
'starttime': int(split_line[21]),
'vsize': int(split_line[22]),
'rss': int(split_line[23]),
'rsslim': int(split_line[24]),
'startcode': int(split_line[25]),
'endcode': int(split_line[26]),
'startstack': int(split_line[27]),
'kstkeep': int(split_line[28]),
'kstkeip': int(split_line[29]),
'signal': int(split_line[30]),
'blocked': int(split_line[31]),
'sigignore': int(split_line[32]),
'sigcatch': int(split_line[33]),
'wchan': int(split_line[34]),
'nswap': int(split_line[35]),
'cnswap': int(split_line[36])
}
line_re = re.compile(r'''
^(?P<pid>\d+)\s
\((?P<comm>.+)\)\s
(?P<state>\S)\s
(?P<ppid>\d+)\s
(?P<pgrp>\d+)\s
(?P<session>\d+)\s
(?P<tty_nr>\d+)\s
(?P<tpg_id>-?\d+)\s
(?P<flags>\d+)\s
(?P<minflt>\d+)\s
(?P<cminflt>\d+)\s
(?P<majflt>\d+)\s
(?P<cmajflt>\d+)\s
(?P<utime>\d+)\s
(?P<stime>\d+)\s
(?P<cutime>\d+)\s
(?P<cstime>\d+)\s
(?P<priority>\d+)\s
(?P<nice>\d+)\s
(?P<num_threads>\d+)\s
(?P<itrealvalue>\d+)\s
(?P<starttime>\d+)\s
(?P<vsize>\d+)\s
(?P<rss>\d+)\s
(?P<rsslim>\d+)\s
(?P<startcode>\d+)\s
(?P<endcode>\d+)\s
(?P<startstack>\d+)\s
(?P<kstkeep>\d+)\s
(?P<kstkeip>\d+)\s
(?P<signal>\d+)\s
(?P<blocked>\d+)\s
(?P<sigignore>\d+)\s
(?P<sigcatch>\d+)\s
(?P<wchan>\d+)\s
(?P<nswap>\d+)\s
(?P<cnswap>\d+)\s
(?P<exit_signal>\d+)\s
(?P<processor>\d+)\s
(?P<rt_priority>\d+)\s
(?P<policy>\d+)\s
(?P<delayacct_blkio_ticks>\d+)\s
(?P<guest_time>\d+)\s
(?P<cguest_time>\d+)\s
(?P<start_data>\d+)\s
(?P<end_data>\d+)\s
(?P<start_brk>\d+)\s
(?P<arg_start>\d+)\s
(?P<arg_end>\d+)\s
(?P<env_start>\d+)\s
(?P<env_end>\d+)\s
(?P<exit_code>\d+)
''', re.VERBOSE | re.DOTALL
)
if len(split_line) > 37:
raw_output['exit_signal'] = int(split_line[37])
line_match = line_re.search(data)
if len(split_line) > 38:
raw_output['processor'] = int(split_line[38])
if len(split_line) > 39:
raw_output['rt_priority'] = int(split_line[39])
raw_output['policy'] = int(split_line[40])
if len(split_line) > 41:
raw_output['delayacct_blkio_ticks'] = int(split_line[41])
if len(split_line) > 42:
raw_output['guest_time'] = int(split_line[42])
raw_output['cguest_time'] = int(split_line[43])
if len(split_line) > 44:
raw_output['start_data'] = int(split_line[44])
raw_output['end_data'] = int(split_line[45])
raw_output['start_brk'] = int(split_line[46])
if len(split_line) > 47:
raw_output['arg_start'] = int(split_line[47])
raw_output['arg_end'] = int(split_line[48])
raw_output['env_start'] = int(split_line[49])
raw_output['env_end'] = int(split_line[50])
raw_output['exit_code'] = int(split_line[51])
if line_match:
raw_output = line_match.groupdict()
return raw_output if raw else _process(raw_output)

View File

@ -0,0 +1,2 @@
2001 (my file with
sp) S 1888 2001 1888 34816 2001 4202496 428 0 0 0 0 0 0 0 20 0 1 0 75513 115900416 297 18446744073709551615 4194304 5100612 140737020052256 140737020050904 140096699233308 0 65536 4 65538 18446744072034584486 0 0 17 0 0 0 0 0 0 7200240 7236240 35389440 140737020057179 140737020057223 140737020057223 140737020059606 0

View File

@ -0,0 +1 @@
{"pid":2001,"comm":"my file with\nsp","state":"S","ppid":1888,"pgrp":2001,"session":1888,"tty_nr":34816,"tpg_id":2001,"flags":4202496,"minflt":428,"cminflt":0,"majflt":0,"cmajflt":0,"utime":0,"stime":0,"cutime":0,"cstime":0,"priority":20,"nice":0,"num_threads":1,"itrealvalue":0,"starttime":75513,"vsize":115900416,"rss":297,"rsslim":18446744073709551615,"startcode":4194304,"endcode":5100612,"startstack":140737020052256,"kstkeep":140737020050904,"kstkeip":140096699233308,"signal":0,"blocked":65536,"sigignore":4,"sigcatch":65538,"wchan":18446744072034584486,"nswap":0,"cnswap":0,"exit_signal":17,"processor":0,"rt_priority":0,"policy":0,"delayacct_blkio_ticks":0,"guest_time":0,"cguest_time":0,"start_data":7200240,"end_data":7236240,"start_brk":35389440,"arg_start":140737020057179,"arg_end":140737020057223,"env_start":140737020057223,"env_end":140737020059606,"exit_code":0,"state_pretty":"Sleeping in an interruptible wait"}

View File

@ -203,6 +203,9 @@ class MyTests(unittest.TestCase):
'proc_pid_stat': (
'fixtures/linux-proc/pid_stat',
'fixtures/linux-proc/pid_stat.json'),
'pid_stat_w_space_and_nl_in_comm': (
'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm',
'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm.json'),
'proc_pid_statm': (
'fixtures/linux-proc/pid_statm',
'fixtures/linux-proc/pid_statm.json'),

View File

@ -16,7 +16,10 @@ class MyTests(unittest.TestCase):
fixtures = {
'proc_pid_stat': (
'fixtures/linux-proc/pid_stat',
'fixtures/linux-proc/pid_stat.json')
'fixtures/linux-proc/pid_stat.json'),
'pid_stat_w_space_and_nl_in_comm': (
'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm',
'fixtures/linux-proc/pid_stat_w_space_and_nl_in_comm.json')
}
for file, filepaths in fixtures.items():
@ -39,6 +42,13 @@ class MyTests(unittest.TestCase):
self.assertEqual(jc.parsers.proc_pid_stat.parse(self.f_in['proc_pid_stat'], quiet=True),
self.f_json['proc_pid_stat'])
def test_proc_pid_stat_w_space_and_nl(self):
"""
Test '/proc/<pid>/stat' with command with spaces and newline
"""
self.assertEqual(jc.parsers.proc_pid_stat.parse(self.f_in['pid_stat_w_space_and_nl_in_comm'], quiet=True),
self.f_json['pid_stat_w_space_and_nl_in_comm'])
if __name__ == '__main__':
unittest.main()