formatting updates

2025-06-27 00:41:20 +02:00 · 2023-12-09 11:41:22 -08:00
parent 347097a294
commit 32bd7ffbf6
1 changed files with 230 additions and 161 deletions
--- a/jc/parsers/iftop.py
+++ b/jc/parsers/iftop.py
@ -1,7 +1,5 @@
 """jc - JSON Convert `iftop` command output parser

-Some of `iftop` options are supported.
-
 Usage (cli):

    $ iftop -i <device> -t -B -s1 | jc --iftop
@ -59,11 +57,71 @@ Schema:
          "last_10s":               string,
          "last_40s":               string
        }
-
+      }
+    ]

 Examples:

-    $ iftop -i eno0 -t -P -s 1 | jc --iftop -p -r
+    $ iftop -i enp0s3 -t -P -s1 | jc --iftop -p
+    [
+      {
+        "device": "enp0s3",
+        "ip_address": "10.10.15.129",
+        "mac_address": "08:00:27:c0:4a:4f",
+        "clients": [
+          {
+            "index": 1,
+            "connections": [
+              {
+                "host_name": "ubuntu-2004-clean-01",
+                "host_port": "ssh",
+                "last_2s": 448,
+                "last_10s": 448,
+                "last_40s": 448,
+                "cumulative": 112,
+                "direction": "send"
+              },
+              {
+                "host_name": "10.10.15.72",
+                "host_port": "40876",
+                "last_2s": 208,
+                "last_10s": 208,
+                "last_40s": 208,
+                "cumulative": 52,
+                "direction": "receive"
+              }
+            ]
+          }
+        ],
+        "total_send_rate": {
+          "last_2s": 448,
+          "last_10s": 448,
+          "last_40s": 448
+        },
+        "total_receive_rate": {
+          "last_2s": 208,
+          "last_10s": 208,
+          "last_40s": 208
+        },
+        "total_send_and_receive_rate": {
+          "last_2s": 656,
+          "last_10s": 656,
+          "last_40s": 656
+        },
+        "peak_rate": {
+          "last_2s": 448,
+          "last_10s": 208,
+          "last_40s": 656
+        },
+        "cumulative_rate": {
+          "last_2s": 112,
+          "last_10s": 52,
+          "last_40s": 164
+        }
+      }
+    ]
+
+    $ iftop -i enp0s3 -t -P -s1 | jc --iftop -p -r
    [
      {
        "device": "enp0s3",
@ -121,7 +179,6 @@ Examples:
        }
      }
    ]
-
 """
 import re
 from typing import List, Dict
@ -133,7 +190,7 @@ from numbers import Number

 class info:
    """Provides parser metadata (version, author, etc.)"""
-    version = "0.1"
+    version = "1.0"
    description = "`iftop` command parser"
    author = "Ron Green"
    author_email = "11993626+georgettica@users.noreply.github.com"
@ -164,6 +221,7 @@ def _process(proc_data: List[JSONDictType], quiet: bool = False) -> List[JSONDic
        "peak_rate",
        "cumulative_rate",
    ]
+
    if not proc_data:
        return proc_data
    for entry in proc_data:
@ -175,7 +233,7 @@ def _process(proc_data: List[JSONDictType], quiet: bool = False) -> List[JSONDic
                for one_nesting_item_key in entry[entry_key]:
                    # print(f"{one_nesting_item_key=}")
                    if one_nesting_item_key in string_to_bytes_fields:
-                        entry[entry_key][one_nesting_item_key] = humanfriendly_parse_size(entry[entry_key][one_nesting_item_key])
+                        entry[entry_key][one_nesting_item_key] = _parse_size(entry[entry_key][one_nesting_item_key])
            elif entry_key == "clients":
                for client in entry[entry_key]:
                    # print(f"{client=}")
@ -186,47 +244,35 @@ def _process(proc_data: List[JSONDictType], quiet: bool = False) -> List[JSONDic
                        for connection_key in connection:
                            # print(f"{connection_key=}")
                            if connection_key in string_to_bytes_fields:
-                                connection[connection_key] = humanfriendly_parse_size(connection[connection_key])
+                                connection[connection_key] = _parse_size(connection[connection_key])
    return proc_data

+# _parse_size from https://github.com/xolox/python-humanfriendly

-# Named tuples to define units of size.
-SizeUnit = namedtuple('SizeUnit', 'divider, symbol, name')
-CombinedUnit = namedtuple('CombinedUnit', 'decimal, binary')
+# Copyright (c) 2021 Peter Odding

-# Differences between Python 2 and 3.
-try:
-    # Python 2.
-    basestring = basestring
-except (ImportError, NameError):
-    # Python 3.
-    basestring = str
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:

-def humanfriendly_is_string(value):
-    """
-    Check if a value is a :func:`python2:basestring` (in Python 2) or :class:`python3:str` (in Python 3) object.
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.

-    :param value: The value to check.
-    :returns: :data:`True` if the value is a string, :data:`False` otherwise.
-    """
-    return isinstance(value, basestring)
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-# Common disk size units in binary (base-2) and decimal (base-10) multiples.
-disk_size_units = (
-    CombinedUnit(SizeUnit(1000**1, 'KB', 'kilobyte'), SizeUnit(1024**1, 'KiB', 'kibibyte')),
-    CombinedUnit(SizeUnit(1000**2, 'MB', 'megabyte'), SizeUnit(1024**2, 'MiB', 'mebibyte')),
-    CombinedUnit(SizeUnit(1000**3, 'GB', 'gigabyte'), SizeUnit(1024**3, 'GiB', 'gibibyte')),
-    CombinedUnit(SizeUnit(1000**4, 'TB', 'terabyte'), SizeUnit(1024**4, 'TiB', 'tebibyte')),
-    CombinedUnit(SizeUnit(1000**5, 'PB', 'petabyte'), SizeUnit(1024**5, 'PiB', 'pebibyte')),
-    CombinedUnit(SizeUnit(1000**6, 'EB', 'exabyte'), SizeUnit(1024**6, 'EiB', 'exbibyte')),
-    CombinedUnit(SizeUnit(1000**7, 'ZB', 'zettabyte'), SizeUnit(1024**7, 'ZiB', 'zebibyte')),
-    CombinedUnit(SizeUnit(1000**8, 'YB', 'yottabyte'), SizeUnit(1024**8, 'YiB', 'yobibyte')),
-)
-
-class HumanfriendlyInvalidSize(Exception):
-    pass
-
-def humanfriendly_parse_size(size, binary=False):
+# Note: this function can be replaced with jc.utils.convert_size_to_int
+#       in the future.
+def _parse_size(size, binary=False):
    """
    Parse a human readable data size and return the number of bytes.

@ -260,10 +306,34 @@ def humanfriendly_parse_size(size, binary=False):
    >>> parse_size('1.5 GB', binary=True)
    1610612736
    """
-    tokens = humanfriendly_tokenize(size)
+    def tokenize(text):
+        tokenized_input = []
+        for token in re.split(r'(\d+(?:\.\d+)?)', text):
+            token = token.strip()
+            if re.match(r'\d+\.\d+', token):
+                tokenized_input.append(float(token))
+            elif token.isdigit():
+                tokenized_input.append(int(token))
+            elif token:
+                tokenized_input.append(token)
+        return tokenized_input
+
+    SizeUnit = namedtuple('SizeUnit', 'divider, symbol, name')
+    CombinedUnit = namedtuple('CombinedUnit', 'decimal, binary')
+    disk_size_units = (
+        CombinedUnit(SizeUnit(1000**1, 'KB', 'kilobyte'), SizeUnit(1024**1, 'KiB', 'kibibyte')),
+        CombinedUnit(SizeUnit(1000**2, 'MB', 'megabyte'), SizeUnit(1024**2, 'MiB', 'mebibyte')),
+        CombinedUnit(SizeUnit(1000**3, 'GB', 'gigabyte'), SizeUnit(1024**3, 'GiB', 'gibibyte')),
+        CombinedUnit(SizeUnit(1000**4, 'TB', 'terabyte'), SizeUnit(1024**4, 'TiB', 'tebibyte')),
+        CombinedUnit(SizeUnit(1000**5, 'PB', 'petabyte'), SizeUnit(1024**5, 'PiB', 'pebibyte')),
+        CombinedUnit(SizeUnit(1000**6, 'EB', 'exabyte'), SizeUnit(1024**6, 'EiB', 'exbibyte')),
+        CombinedUnit(SizeUnit(1000**7, 'ZB', 'zettabyte'), SizeUnit(1024**7, 'ZiB', 'zebibyte')),
+        CombinedUnit(SizeUnit(1000**8, 'YB', 'yottabyte'), SizeUnit(1024**8, 'YiB', 'yobibyte')),
+    )
+    tokens = tokenize(size)
    if tokens and isinstance(tokens[0], Number):
        # Get the normalized unit (if any) from the tokenized input.
-        normalized_unit = tokens[1].lower() if len(tokens) == 2 and humanfriendly_is_string(tokens[1]) else ''
+        normalized_unit = tokens[1].lower() if len(tokens) == 2 and isinstance(tokens[1], str) else ''
        # If the input contains only a number, it's assumed to be the number of
        # bytes. The second token can also explicitly reference the unit bytes.
        if len(tokens) == 1 or normalized_unit.startswith('b'):
@ -286,23 +356,7 @@ def humanfriendly_parse_size(size, binary=False):
                        normalized_unit.startswith(unit.decimal.symbol[0].lower())):
                    return int(tokens[0] * (unit.binary.divider if binary else unit.decimal.divider))
    # We failed to parse the size specification.
-    msg = "Failed to parse size! (input %r was tokenized as %r)"
-    raise HumanfriendlyInvalidSize(format(msg, size, tokens))
-
-
-# taken from https://github.com/xolox/python-humanfriendly/blob/master/humanfriendly/text.py#L402
-# so there are no dependencies on the humanfriendly package
-def humanfriendly_tokenize(text):
-    tokenized_input = []
-    for token in re.split(r'(\d+(?:\.\d+)?)', text):
-        token = token.strip()
-        if re.match(r'\d+\.\d+', token):
-            tokenized_input.append(float(token))
-        elif token.isdigit():
-            tokenized_input.append(int(token))
-        elif token:
-            tokenized_input.append(token)
-    return tokenized_input
+    return None


 def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictType]:
@ -323,10 +377,11 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
    jc.utils.input_type_check(data)

    raw_output: List[Dict] = []
-
    interface_item: Dict = {}
-
+    current_client: Dict = {}
    clients: List = []
+    is_previous_line_interface = False
+    saw_already_host_line = False

    before_arrow = r"\s+(?P<index>\d+)\s+(?P<host_name>[^\s]+):(?P<host_port>[^\s]+)\s+"
    before_arrow_no_port = r"\s+(?P<index>\d+)\s+(?P<host_name>[^\s]+)\s+"
@ -367,39 +422,35 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
    jc.utils.compatibility(__name__, info.compatible, quiet)
    jc.utils.input_type_check(data)

-    raw_output: List[Dict] = []
-    current_client: Dict = {}
-
    if not jc.utils.has_data(data):
        return raw_output if raw else _process(raw_output, quiet=quiet)

-    is_previous_line_interface = False
-    saw_already_host_line = False
    for line in filter(None, data.splitlines()):
        if line.startswith("interface:"):
            # Example:
            # interface: enp0s3
-
            interface_item["device"] = line.split(":")[1].strip()
+
        elif line.startswith("IP address is:"):
            # Example:
            # IP address is: 10.10.15.129
-
            interface_item["ip_address"] = line.split(":")[1].strip()
+
        elif line.startswith("MAC address is:"):
            # Example:
            # MAC address is: 08:00:27:c0:4a:4f
-
            # strip off the "MAC address is: " part
-            data_without_front = line.split(":")[1:]
+            data_without_front_list = line.split(":")[1:]
+
            # join the remaining parts back together
-            data_without_front = ":".join(data_without_front)
+            data_without_front = ":".join(data_without_front_list)
            interface_item["mac_address"] = data_without_front.strip()

        elif line.startswith("Listening on"):
            # Example:
            # Listening on enp0s3
            pass
+
        elif (
            line.startswith("# Host name (port/service if enabled)")
            and not saw_already_host_line
@ -408,6 +459,7 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
            # Example:
            #    # Host name (port/service if enabled)            last 2s   last 10s   last 40s cumulative
            pass
+
        elif (
            line.startswith("# Host name (port/service if enabled)")
            and saw_already_host_line
@ -424,16 +476,17 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
        elif "=>" in line and is_previous_line_interface and ":" in line:
            # should not happen
            pass
+
        elif "=>" in line and not is_previous_line_interface and ":" in line:
            # Example:
            #    1 ubuntu-2004-clean-01:ssh                 =>       448b       448b       448b       112B
-
            is_previous_line_interface = True
            match_raw = re_linux_clients_before_newline.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            current_client = {}
            current_client["index"] = int(match_dict["index"])
@ -449,16 +502,19 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
            }
            current_client["connections"].append(current_client_send)
            # not adding yet as the receive part is not yet parsed
+
        elif "=>" in line and not is_previous_line_interface and ":" not in line:
            # should not happen
            pass
+
        elif "=>" in line and is_previous_line_interface and ":" not in line:
            is_previous_line_interface = True
            match_raw = re_linux_clients_before_newline_no_port.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            current_client = {}
            current_client["index"] = int(match_dict["index"])
@ -473,20 +529,21 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
            }
            current_client["connections"].append(current_client_send)
            # not adding yet as the receive part is not yet parsed
+
        elif "<=" in line and not is_previous_line_interface and ":" in line:
            # should not happen
            pass
+
        elif "<=" in line and is_previous_line_interface and ":" in line:
            # Example:
            #      10.10.15.72:40876                        <=       208b       208b       208b        52B
-
            is_previous_line_interface = False
-
            match_raw = re_linux_clients_after_newline.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            current_client_receive = {
                "host_name": match_dict["receive_ip"],
@ -500,20 +557,21 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp

            current_client["connections"].append(current_client_receive)
            clients.append(current_client)
+
        elif "<=" in line and not is_previous_line_interface and ":" not in line:
            # should not happen
            pass
+
        elif "<=" in line and is_previous_line_interface and ":" not in line:
            # Example:
            #      10.10.15.72:40876                        <=       208b       208b       208b        52B
-
            is_previous_line_interface = False
-
            match_raw = re_linux_clients_after_newline_no_port.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            current_client_receive = {
                "host_name": match_dict["receive_ip"],
@ -526,17 +584,20 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp

            current_client["connections"].append(current_client_receive)
            clients.append(current_client)
+
        # check if all of the characters are dashes or equal signs
        elif all(c == "-" for c in line):
            pass
+
        elif line.startswith("Total send rate"):
            # Example:
            # Total send rate:                                       448b       448b       448b
            match_raw = re_total_send_rate.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            interface_item["total_send_rate"] = {}
            interface_item["total_send_rate"].update(
@ -546,14 +607,16 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
                    "last_40s": match_dict["total_send_rate_last_40s"],
                }
            )
+
        elif line.startswith("Total receive rate"):
            # Example:
            # Total receive rate:                                    208b       208b       208b
            match_raw = re_total_receive_rate.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            interface_item["total_receive_rate"] = {}
            interface_item["total_receive_rate"].update(
@ -563,14 +626,16 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
                    "last_40s": match_dict["total_receive_rate_last_40s"],
                }
            )
+
        elif line.startswith("Total send and receive rate"):
            # Example:
            # Total send and receive rate:                           656b       656b       656b
            match_raw = re_total_send_and_receive_rate.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            interface_item["total_send_and_receive_rate"] = {}
            interface_item["total_send_and_receive_rate"].update(
@ -580,12 +645,14 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
                    "last_40s": match_dict["total_send_and_receive_rate_last_40s"],
                }
            )
+
        elif line.startswith("Peak rate"):
            match_raw = re_peak_rate.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            interface_item["peak_rate"] = {}
            interface_item["peak_rate"].update(
@ -595,12 +662,14 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
                    "last_40s": match_dict["peak_rate_total"],
                }
            )
+
        elif line.startswith("Cumulative"):
            match_raw = re_cumulative_rate.match(line)
+
            if not match_raw:
                # this is a bug in iftop
-                #
                continue
+
            match_dict = match_raw.groupdict()
            interface_item["cumulative_rate"] = {}
            interface_item["cumulative_rate"].update(
@ -610,11 +679,11 @@ def parse(data: str, raw: bool = False, quiet: bool = False) -> List[JSONDictTyp
                    "last_40s": match_dict["cumulative_rate_total"],
                }
            )
+
        elif all(c == "=" for c in line):
            interface_item["clients"] = clients
            clients = []
-            raw_output.append(interface_item.copy()) # keep the copy here as without it keeps the objects linked 
-        else:
-            pass
+            # keep the copy here as without it keeps the objects linked
+            raw_output.append(interface_item.copy())

    return raw_output if raw else _process(raw_output, quiet=quiet)