mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-23 00:29:59 +02:00
make parse_datetime_to_timestamp function a class called timestamp for easier use
This commit is contained in:
155
jc/utils.py
155
jc/utils.py
@ -212,3 +212,158 @@ def parse_datetime_to_timestamp(data):
|
|||||||
timestamp_obj['timestamp_utc'] = timestamp_utc
|
timestamp_obj['timestamp_utc'] = timestamp_utc
|
||||||
|
|
||||||
return timestamp_obj
|
return timestamp_obj
|
||||||
|
|
||||||
|
|
||||||
|
class timestamp:
|
||||||
|
"""
|
||||||
|
Input a date-time text string of several formats and convert to a naive or timezone-aware epoch timestamp in UTC
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
datetime_string: (str) a string representation of a date-time in several supported formats
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
string (str) the input datetime string
|
||||||
|
format (int) the format rule that was used to decode the datetime string
|
||||||
|
naive (int) timestamp based on locally configured timezone. None if conversion fails
|
||||||
|
utc (int) aware timestamp only if UTC timezone detected in datetime string. None if conversion fails
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, datetime_string):
|
||||||
|
self.string = datetime_string
|
||||||
|
dt = self._parse()
|
||||||
|
self.format = dt['format']
|
||||||
|
self.naive = dt['timestamp_naive']
|
||||||
|
self.utc = dt['timestamp_utc']
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'timestamp(string="{self.string}", format={self.format}, naive={self.naive}, utc={self.utc})'
|
||||||
|
|
||||||
|
def _parse(self):
|
||||||
|
"""
|
||||||
|
Input a date-time text string of several formats and convert to a naive or timezone-aware epoch timestamp in UTC
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
data: (string) a string representation of a date-time in several supported formats
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
Dictionary A Dictionary of the following format:
|
||||||
|
|
||||||
|
{
|
||||||
|
"format": integer, # for debugging purposes. None if conversion fails
|
||||||
|
"timestamp_naive": integer, # timestamp based on locally configured timezone. None if conversion fails
|
||||||
|
"timestamp_utc": integer # aware timestamp only if UTC timezone detected. None if conversion fails
|
||||||
|
}
|
||||||
|
|
||||||
|
The format integer denotes which date_time format conversion succeeded.
|
||||||
|
The timestamp_naive integer is the converted date-time string to a naive epoch timestamp.
|
||||||
|
The timestamp_utc integer is the converted date-time string to an aware epoch timestamp
|
||||||
|
in the UTC timezone. If an aware conversion cannot be performed (e.g. the UTC timezone
|
||||||
|
is not found in the date-time string), then this field will be None.
|
||||||
|
|
||||||
|
If the conversion completely fails, all fields will be None.
|
||||||
|
"""
|
||||||
|
data = self.string or ''
|
||||||
|
normalized_datetime = ''
|
||||||
|
utc_tz = False
|
||||||
|
dt = None
|
||||||
|
dt_utc = None
|
||||||
|
timestamp_naive = None
|
||||||
|
timestamp_utc = None
|
||||||
|
timestamp_obj = {
|
||||||
|
'format': None,
|
||||||
|
'timestamp_naive': None,
|
||||||
|
'timestamp_utc': None
|
||||||
|
}
|
||||||
|
utc_tz = False
|
||||||
|
|
||||||
|
if 'UTC' in data:
|
||||||
|
utc_tz = True
|
||||||
|
if 'UTC+' in data or 'UTC-' in data:
|
||||||
|
if 'UTC+0000' in data or 'UTC-0000' in data:
|
||||||
|
utc_tz = True
|
||||||
|
else:
|
||||||
|
utc_tz = False
|
||||||
|
elif '+0000' in data or '-0000' in data:
|
||||||
|
utc_tz = True
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{'id': 1000, 'format': '%a %b %d %H:%M:%S %Y', 'locale': None}, # manual C locale format conversion: Tue Mar 23 16:12:11 2021 or Tue Mar 23 16:12:11 IST 2021
|
||||||
|
{'id': 1500, 'format': '%Y-%m-%d %H:%M', 'locale': None}, # en_US.UTF-8 local format (found in who cli output): 2021-03-23 00:14
|
||||||
|
{'id': 2000, 'format': '%a %d %b %Y %I:%M:%S %p %Z', 'locale': None}, # en_US.UTF-8 local format (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM UTC
|
||||||
|
{'id': 3000, 'format': '%a %d %b %Y %I:%M:%S %p', 'locale': None}, # en_US.UTF-8 local format with non-UTC tz (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM IST
|
||||||
|
{'id': 4000, 'format': '%A %d %B %Y %I:%M:%S %p %Z', 'locale': None}, # European-style local format (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM UTC
|
||||||
|
{'id': 5000, 'format': '%A %d %B %Y %I:%M:%S %p', 'locale': None}, # European-style local format with non-UTC tz (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM IST
|
||||||
|
{'id': 6000, 'format': '%a %b %d %I:%M:%S %p %Z %Y', 'locale': None}, # en_US.UTF-8 format (found in date cli): Wed Mar 24 06:16:19 PM UTC 2021
|
||||||
|
{'id': 7000, 'format': '%a %b %d %H:%M:%S %Z %Y', 'locale': None}, # C locale format (found in date cli): Wed Mar 24 11:11:30 UTC 2021
|
||||||
|
{'id': 7100, 'format': '%b %d %H:%M:%S %Y', 'locale': None}, # C locale format (found in stat cli output - osx): # Mar 29 11:49:05 2021
|
||||||
|
{'id': 7200, 'format': '%Y-%m-%d %H:%M:%S.%f %z', 'locale': None}, # C locale format (found in stat cli output - linux): 2019-08-13 18:13:43.555604315 -0400
|
||||||
|
{'id': 7300, 'format': '%a %Y-%m-%d %H:%M:%S %Z', 'locale': None}, # C locale format (found in timedatectl cli output): # Wed 2020-03-11 00:53:21 UTC
|
||||||
|
# attempt locale changes last
|
||||||
|
{'id': 8000, 'format': '%a %d %b %Y %H:%M:%S %Z', 'locale': ''}, # current locale format (found in upower cli output): # mar. 23 mars 2021 23:12:11 UTC
|
||||||
|
{'id': 8100, 'format': '%a %d %b %Y %H:%M:%S', 'locale': ''}, # current locale format with non-UTC tz (found in upower cli output): # mar. 23 mars 2021 19:12:11 EDT
|
||||||
|
{'id': 8200, 'format': '%A %d %B %Y, %H:%M:%S UTC%z', 'locale': ''}, # fr_FR.utf8 locale format (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC+0000)
|
||||||
|
{'id': 8300, 'format': '%A %d %B %Y, %H:%M:%S', 'locale': ''}, # fr_FR.utf8 locale format with non-UTC tz (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC-0400)
|
||||||
|
{'id': 9000, 'format': '%c', 'locale': ''} # locally configured locale format conversion: Could be anything :) this is a last-gasp attempt
|
||||||
|
]
|
||||||
|
|
||||||
|
# from https://www.timeanddate.com/time/zones/
|
||||||
|
# only removed UTC timezone and added known non-UTC offsets
|
||||||
|
tz_abbr = ['A', 'ACDT', 'ACST', 'ACT', 'ACWST', 'ADT', 'AEDT', 'AEST', 'AET', 'AFT', 'AKDT', 'AKST', 'ALMT',
|
||||||
|
'AMST', 'AMT', 'ANAST', 'ANAT', 'AQTT', 'ART', 'AST', 'AT', 'AWDT', 'AWST', 'AZOST', 'AZOT',
|
||||||
|
'AZST', 'AZT', 'AoE', 'B', 'BNT', 'BOT', 'BRST', 'BRT', 'BST', 'BTT', 'C', 'CAST', 'CAT', 'CCT',
|
||||||
|
'CDT', 'CEST', 'CET', 'CHADT', 'CHAST', 'CHOST', 'CHOT', 'CHUT', 'CIDST', 'CIST', 'CKT', 'CLST',
|
||||||
|
'CLT', 'COT', 'CST', 'CT', 'CVT', 'CXT', 'ChST', 'D', 'DAVT', 'DDUT', 'E', 'EASST', 'EAST',
|
||||||
|
'EAT', 'ECT', 'EDT', 'EEST', 'EET', 'EGST', 'EGT', 'EST', 'ET', 'F', 'FET', 'FJST', 'FJT', 'FKST',
|
||||||
|
'FKT', 'FNT', 'G', 'GALT', 'GAMT', 'GET', 'GFT', 'GILT', 'GMT', 'GST', 'GYT', 'H', 'HDT', 'HKT',
|
||||||
|
'HOVST', 'HOVT', 'HST', 'I', 'ICT', 'IDT', 'IOT', 'IRDT', 'IRKST', 'IRKT', 'IRST', 'IST', 'JST',
|
||||||
|
'K', 'KGT', 'KOST', 'KRAST', 'KRAT', 'KST', 'KUYT', 'L', 'LHDT', 'LHST', 'LINT', 'M', 'MAGST',
|
||||||
|
'MAGT', 'MART', 'MAWT', 'MDT', 'MHT', 'MMT', 'MSD', 'MSK', 'MST', 'MT', 'MUT', 'MVT', 'MYT', 'N',
|
||||||
|
'NCT', 'NDT', 'NFDT', 'NFT', 'NOVST', 'NOVT', 'NPT', 'NRT', 'NST', 'NUT', 'NZDT', 'NZST', 'O',
|
||||||
|
'OMSST', 'OMST', 'ORAT', 'P', 'PDT', 'PET', 'PETST', 'PETT', 'PGT', 'PHOT', 'PHT', 'PKT', 'PMDT',
|
||||||
|
'PMST', 'PONT', 'PST', 'PT', 'PWT', 'PYST', 'PYT', 'Q', 'QYZT', 'R', 'RET', 'ROTT', 'S', 'SAKT',
|
||||||
|
'SAMT', 'SAST', 'SBT', 'SCT', 'SGT', 'SRET', 'SRT', 'SST', 'SYOT', 'T', 'TAHT', 'TFT', 'TJT', 'TKT',
|
||||||
|
'TLT', 'TMT', 'TOST', 'TOT', 'TRT', 'TVT', 'U', 'ULAST', 'ULAT', 'UYST', 'UYT', 'UZT', 'V', 'VET',
|
||||||
|
'VLAST', 'VLAT', 'VOST', 'VUT', 'W', 'WAKT', 'WARST', 'WAST', 'WAT', 'WEST', 'WET', 'WFT', 'WGST',
|
||||||
|
'WGT', 'WIB', 'WIT', 'WITA', 'WST', 'WT', 'X', 'Y', 'YAKST', 'YAKT', 'YAPT', 'YEKST', 'YEKT', 'Z',
|
||||||
|
'UTC-1200', 'UTC-1100', 'UTC-1000', 'UTC-0930', 'UTC-0900', 'UTC-0800', 'UTC-0700', 'UTC-0600',
|
||||||
|
'UTC-0500', 'UTC-0400', 'UTC-0300', 'UTC-0230', 'UTC-0200', 'UTC-0100', 'UTC+0100', 'UTC+0200',
|
||||||
|
'UTC+0300', 'UTC+0400', 'UTC+0430', 'UTC+0500', 'UTC+0530', 'UTC+0545', 'UTC+0600', 'UTC+0630',
|
||||||
|
'UTC+0700', 'UTC+0800', 'UTC+0845', 'UTC+0900', 'UTC+1000', 'UTC+1030', 'UTC+1100', 'UTC+1200',
|
||||||
|
'UTC+1300', 'UTC+1345', 'UTC+1400']
|
||||||
|
|
||||||
|
# normalize the timezone by taking out any timezone reference, except UTC
|
||||||
|
cleandata = data.replace('(', '').replace(')', '')
|
||||||
|
normalized_datetime_list = []
|
||||||
|
for term in cleandata.split():
|
||||||
|
if term not in tz_abbr:
|
||||||
|
normalized_datetime_list.append(term)
|
||||||
|
|
||||||
|
normalized_datetime = ' '.join(normalized_datetime_list)
|
||||||
|
|
||||||
|
# normalize further by converting any greater-than 6-digit subsecond to 6-digits
|
||||||
|
p = re.compile(r'(\W\d\d:\d\d:\d\d\.\d{6})\d+\W')
|
||||||
|
normalized_datetime = p.sub(r'\g<1> ', normalized_datetime)
|
||||||
|
|
||||||
|
for fmt in formats:
|
||||||
|
try:
|
||||||
|
locale.setlocale(locale.LC_TIME, fmt['locale'])
|
||||||
|
dt = datetime.strptime(normalized_datetime, fmt['format'])
|
||||||
|
timestamp_naive = int(dt.replace(tzinfo=None).timestamp())
|
||||||
|
timestamp_obj['format'] = fmt['id']
|
||||||
|
locale.setlocale(locale.LC_TIME, None)
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
locale.setlocale(locale.LC_TIME, None)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if dt and utc_tz:
|
||||||
|
dt_utc = dt.replace(tzinfo=timezone.utc)
|
||||||
|
timestamp_utc = int(dt_utc.timestamp())
|
||||||
|
|
||||||
|
if timestamp_naive:
|
||||||
|
timestamp_obj['timestamp_naive'] = timestamp_naive
|
||||||
|
timestamp_obj['timestamp_utc'] = timestamp_utc
|
||||||
|
|
||||||
|
return timestamp_obj
|
||||||
|
@ -3,36 +3,38 @@ import jc.utils
|
|||||||
|
|
||||||
|
|
||||||
class MyTests(unittest.TestCase):
|
class MyTests(unittest.TestCase):
|
||||||
def test_utils_parse_datetime_to_timestamp(self):
|
def test_utils_timestamp(self):
|
||||||
|
|
||||||
# naive timestamps created in PDT
|
# naive timestamps created in PDT
|
||||||
datetime_map = {
|
datetime_map = {
|
||||||
# C locale format conversion, or date cli command in C locale with non-UTC tz
|
# C locale format conversion, or date cli command in C locale with non-UTC tz
|
||||||
'Tue Mar 23 16:12:11 2021': {'format': 1000, 'timestamp_naive': 1616541131, 'timestamp_utc': None},
|
'Tue Mar 23 16:12:11 2021': {'string': 'Tue Mar 23 16:12:11 2021', 'format': 1000, 'naive': 1616541131, 'utc': None},
|
||||||
'Tue Mar 23 16:12:11 IST 2021': {'format': 1000, 'timestamp_naive': 1616541131, 'timestamp_utc': None},
|
'Tue Mar 23 16:12:11 IST 2021': {'string': 'Tue Mar 23 16:12:11 IST 2021', 'format': 1000, 'naive': 1616541131, 'utc': None},
|
||||||
# en_US.UTF-8 local format (found in who cli output)
|
# en_US.UTF-8 local format (found in who cli output)
|
||||||
'2021-03-23 00:14': {'format': 1500, 'timestamp_naive': 1616483640, 'timestamp_utc': None},
|
'2021-03-23 00:14': {'string': '2021-03-23 00:14', 'format': 1500, 'naive': 1616483640, 'utc': None},
|
||||||
# en_US.UTF-8 local format (found in upower cli output)
|
# en_US.UTF-8 local format (found in upower cli output)
|
||||||
'Tue 23 Mar 2021 04:12:11 PM UTC': {'format': 2000, 'timestamp_naive': 1616541131, 'timestamp_utc': 1616515931},
|
'Tue 23 Mar 2021 04:12:11 PM UTC': {'string': 'Tue 23 Mar 2021 04:12:11 PM UTC', 'format': 2000, 'naive': 1616541131, 'utc': 1616515931},
|
||||||
# en_US.UTF-8 local format with non-UTC tz (found in upower cli output)
|
# en_US.UTF-8 local format with non-UTC tz (found in upower cli output)
|
||||||
'Tue 23 Mar 2021 04:12:11 PM IST': {'format': 3000, 'timestamp_naive': 1616541131, 'timestamp_utc': None},
|
'Tue 23 Mar 2021 04:12:11 PM IST': {'string': 'Tue 23 Mar 2021 04:12:11 PM IST', 'format': 3000, 'naive': 1616541131, 'utc': None},
|
||||||
# European local format (found in upower cli output)
|
# European local format (found in upower cli output)
|
||||||
'Tuesday 01 October 2019 12:50:41 PM UTC': {'format': 4000, 'timestamp_naive': 1569959441, 'timestamp_utc': 1569934241},
|
'Tuesday 01 October 2019 12:50:41 PM UTC': {'string': 'Tuesday 01 October 2019 12:50:41 PM UTC', 'format': 4000, 'naive': 1569959441, 'utc': 1569934241},
|
||||||
# European local format with non-UTC tz (found in upower cli output)
|
# European local format with non-UTC tz (found in upower cli output)
|
||||||
'Tuesday 01 October 2019 12:50:41 PM IST': {'format': 5000, 'timestamp_naive': 1569959441, 'timestamp_utc': None},
|
'Tuesday 01 October 2019 12:50:41 PM IST': {'string': 'Tuesday 01 October 2019 12:50:41 PM IST', 'format': 5000, 'naive': 1569959441, 'utc': None},
|
||||||
# date cli command in en_US.UTF-8 format
|
# date cli command in en_US.UTF-8 format
|
||||||
'Wed Mar 24 06:16:19 PM UTC 2021': {'format': 6000, 'timestamp_naive': 1616634979, 'timestamp_utc': 1616609779},
|
'Wed Mar 24 06:16:19 PM UTC 2021': {'string': 'Wed Mar 24 06:16:19 PM UTC 2021', 'format': 6000, 'naive': 1616634979, 'utc': 1616609779},
|
||||||
# date cli command in C locale format
|
# date cli command in C locale format
|
||||||
'Wed Mar 24 11:11:30 UTC 2021': {'format': 7000, 'timestamp_naive': 1616609490, 'timestamp_utc': 1616584290},
|
'Wed Mar 24 11:11:30 UTC 2021': {'string': 'Wed Mar 24 11:11:30 UTC 2021', 'format': 7000, 'naive': 1616609490, 'utc': 1616584290},
|
||||||
# C locale format (found in stat cli output - OSX)
|
# C locale format (found in stat cli output - OSX)
|
||||||
'Mar 29 11:49:05 2021': {'format': 7100, 'timestamp_naive': 1617043745, 'timestamp_utc': None},
|
'Mar 29 11:49:05 2021': {'string': 'Mar 29 11:49:05 2021', 'format': 7100, 'naive': 1617043745, 'utc': None},
|
||||||
# C local format (found in stat cli output - linux) non-UTC tz
|
# C local format (found in stat cli output - linux) non-UTC tz
|
||||||
'2019-08-13 18:13:43.555604315 -0400': {'format': 7200, 'timestamp_naive': 1565745223, 'timestamp_utc': None},
|
'2019-08-13 18:13:43.555604315 -0400': {'string': '2019-08-13 18:13:43.555604315 -0400', 'format': 7200, 'naive': 1565745223, 'utc': None},
|
||||||
# C local format (found in stat cli output - linux) UTC
|
# C local format (found in stat cli output - linux) UTC
|
||||||
'2019-08-13 18:13:43.555604315 -0000': {'format': 7200, 'timestamp_naive': 1565745223, 'timestamp_utc': 1565720023},
|
'2019-08-13 18:13:43.555604315 -0000': {'string': '2019-08-13 18:13:43.555604315 -0000', 'format': 7200, 'naive': 1565745223, 'utc': 1565720023},
|
||||||
# C locale format (found in timedatectl cli output)
|
# C locale format (found in timedatectl cli output)
|
||||||
'Wed 2020-03-11 00:53:21 UTC': {'format': 7300, 'timestamp_naive': 1583913201, 'timestamp_utc': 1583888001}
|
'Wed 2020-03-11 00:53:21 UTC': {'string': 'Wed 2020-03-11 00:53:21 UTC', 'format': 7300, 'naive': 1583913201, 'utc': 1583888001},
|
||||||
|
# test with None input
|
||||||
|
None: {'string': None, 'format': None, 'naive': None, 'utc': None}
|
||||||
}
|
}
|
||||||
|
|
||||||
for input_string, expected_output in datetime_map.items():
|
for input_string, expected_output in datetime_map.items():
|
||||||
self.assertEqual(jc.utils.parse_datetime_to_timestamp(input_string), expected_output)
|
self.assertEqual(jc.utils.timestamp(input_string).__dict__, expected_output)
|
||||||
|
Reference in New Issue
Block a user