add epoch timestamps

2025-07-13 01:20:24 +02:00 · 2022-12-13 13:29:54 -08:00
parent b063f1bfb4
commit ac1f690c54
1 changed files with 16 additions and 6 deletions
--- a/jc/parsers/cbt.py
+++ b/jc/parsers/cbt.py
@ -5,11 +5,17 @@ Google's BigTable).
 No effort is made to convert the data types of the values in the cells.
-The timestamps of the cells are converted to Python's isoformat.
+The `timestamp_epoch` calculated timestamp field is naive. (i.e. based on
 the local time of the system the parser is run on)
-Raw output contains all cells for each column (including timestamps in
+The `timestamp_epoch_utc` calculated timestamp field is timezone-aware and
-converted to Python's isoformat), while the normal output contains only the
+is only available if the timestamp has a UTC timezone.
-latest value for each column.
+
 The `timestamp_iso` calculated timestamp field will only include UTC
 timezone information if the timestamp has a UTC timezone.
 Raw output contains all cells for each column (including timestamps), while
 the normal output contains only the latest value for each column.
 Usage (cli):
@ -120,7 +126,7 @@ def _process(proc_data: List[JSONDictType]) -> List[JSONDictType]:
        key_func = lambda cell: (cell["column_family"], cell["column"])
        all_cells = sorted(row["cells"], key=key_func)
        for (column_family, column), group in groupby(all_cells, key=key_func):
-            group_list = sorted(group, key=lambda cell: cell["timestamp"], reverse=True)
+            group_list = sorted(group, key=lambda cell: cell["timestamp_iso"], reverse=True)
            if column_family not in cells:
                cells[column_family] = {}
            cells[column_family][column] = group_list[0]["value"]
@ -165,10 +171,14 @@ def parse(
                if field.startswith(" " * 4):
                    value = field.strip(' "')
                    if value_next:
                        dt = jc.utils.timestamp(timestamp)
                        cells.append({
                            "column_family": column_name.split(":", 1)[0],
                            "column": column_name.split(":", 1)[1],
-                            "timestamp": datetime.datetime.strptime(timestamp, "%Y/%m/%d-%H:%M:%S.%f").isoformat(),
+                            # "timestamp": datetime.datetime.strptime(timestamp, "%Y/%m/%d-%H:%M:%S.%f").isoformat(),
                            "timestamp_iso": dt.iso,
                            "timestamp_epoch": dt.naive,
                            "timestamp_epoch_utc": dt.utc,
                            "value": value
                        })
                elif field.startswith(" " * 2):