1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-07-13 01:20:24 +02:00

add epoch timestamps

This commit is contained in:
Kelly Brazil
2022-12-13 13:29:54 -08:00
parent b063f1bfb4
commit ac1f690c54

View File

@ -5,11 +5,17 @@ Google's BigTable).
No effort is made to convert the data types of the values in the cells. No effort is made to convert the data types of the values in the cells.
The timestamps of the cells are converted to Python's isoformat. The `timestamp_epoch` calculated timestamp field is naive. (i.e. based on
the local time of the system the parser is run on)
Raw output contains all cells for each column (including timestamps in The `timestamp_epoch_utc` calculated timestamp field is timezone-aware and
converted to Python's isoformat), while the normal output contains only the is only available if the timestamp has a UTC timezone.
latest value for each column.
The `timestamp_iso` calculated timestamp field will only include UTC
timezone information if the timestamp has a UTC timezone.
Raw output contains all cells for each column (including timestamps), while
the normal output contains only the latest value for each column.
Usage (cli): Usage (cli):
@ -120,7 +126,7 @@ def _process(proc_data: List[JSONDictType]) -> List[JSONDictType]:
key_func = lambda cell: (cell["column_family"], cell["column"]) key_func = lambda cell: (cell["column_family"], cell["column"])
all_cells = sorted(row["cells"], key=key_func) all_cells = sorted(row["cells"], key=key_func)
for (column_family, column), group in groupby(all_cells, key=key_func): for (column_family, column), group in groupby(all_cells, key=key_func):
group_list = sorted(group, key=lambda cell: cell["timestamp"], reverse=True) group_list = sorted(group, key=lambda cell: cell["timestamp_iso"], reverse=True)
if column_family not in cells: if column_family not in cells:
cells[column_family] = {} cells[column_family] = {}
cells[column_family][column] = group_list[0]["value"] cells[column_family][column] = group_list[0]["value"]
@ -165,10 +171,14 @@ def parse(
if field.startswith(" " * 4): if field.startswith(" " * 4):
value = field.strip(' "') value = field.strip(' "')
if value_next: if value_next:
dt = jc.utils.timestamp(timestamp)
cells.append({ cells.append({
"column_family": column_name.split(":", 1)[0], "column_family": column_name.split(":", 1)[0],
"column": column_name.split(":", 1)[1], "column": column_name.split(":", 1)[1],
"timestamp": datetime.datetime.strptime(timestamp, "%Y/%m/%d-%H:%M:%S.%f").isoformat(), # "timestamp": datetime.datetime.strptime(timestamp, "%Y/%m/%d-%H:%M:%S.%f").isoformat(),
"timestamp_iso": dt.iso,
"timestamp_epoch": dt.naive,
"timestamp_epoch_utc": dt.utc,
"value": value "value": value
}) })
elif field.startswith(" " * 2): elif field.startswith(" " * 2):