mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-23 00:29:59 +02:00
better normalization of datetime string. better UTC detection. More formats supported with detected locale.
This commit is contained in:
34
jc/utils.py
34
jc/utils.py
@ -119,21 +119,33 @@ def parse_datetime_to_timestamp(data):
|
|||||||
timestamp_naive = None
|
timestamp_naive = None
|
||||||
timestamp_utc = None
|
timestamp_utc = None
|
||||||
timestamp_obj = {}
|
timestamp_obj = {}
|
||||||
utc_tz = True if 'UTC' in data else False
|
utc_tz = False
|
||||||
|
|
||||||
|
if 'UTC' in data:
|
||||||
|
utc_tz = True
|
||||||
|
if 'UTC+' in data or 'UTC-' in data:
|
||||||
|
if 'UTC+0000' in data or 'UTC-0000' in data:
|
||||||
|
utc_tz = True
|
||||||
|
else:
|
||||||
|
utc_tz = False
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{'id': 1000, 'format': '%c', 'locale': None}, # C locale format conversion, or date cli command in C locale with non-UTC tz: Tue Mar 23 16:12:11 2021 or Tue Mar 23 16:12:11 IST 2021
|
{'id': 1000, 'format': '%c', 'locale': None}, # C locale format conversion, or date cli command in C locale with non-UTC tz: Tue Mar 23 16:12:11 2021 or Tue Mar 23 16:12:11 IST 2021
|
||||||
{'id': 2000, 'format': '%a %d %b %Y %I:%M:%S %p %Z', 'locale': None}, # en_US.UTF-8 local format (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM UTC
|
{'id': 2000, 'format': '%a %d %b %Y %I:%M:%S %p %Z', 'locale': None}, # en_US.UTF-8 local format (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM UTC
|
||||||
|
{'id': 2500, 'format': '%a %d %b %Y %H:%M:%S %Z', 'locale': ''}, # current locale format (found in upower cli output): # mar. 23 mars 2021 23:12:11 UTC
|
||||||
{'id': 3000, 'format': '%a %d %b %Y %I:%M:%S %p', 'locale': None}, # en_US.UTF-8 local format with non-UTC tz (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM IST
|
{'id': 3000, 'format': '%a %d %b %Y %I:%M:%S %p', 'locale': None}, # en_US.UTF-8 local format with non-UTC tz (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM IST
|
||||||
{'id': 4000, 'format': '%A %d %B %Y %I:%M:%S %p %Z', 'locale': None}, # European local format (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM UTC
|
{'id': 3500, 'format': '%a %d %b %Y %H:%M:%S', 'locale': ''}, # current locale format with non-UTC tz (found in upower cli output): # mar. 23 mars 2021 19:12:11 EDT
|
||||||
{'id': 5000, 'format': '%A %d %B %Y %I:%M:%S %p', 'locale': None}, # European local format with non-UTC tz (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM IST
|
{'id': 4000, 'format': '%A %d %B %Y %I:%M:%S %p %Z', 'locale': None}, # European-style local format (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM UTC
|
||||||
{'id': 6000, 'format': '%a %b %d %I:%M:%S %p %Z %Y', 'locale': None}, # date cli command in en_US.UTF-8 format: Wed Mar 24 06:16:19 PM UTC 2021
|
{'id': 5000, 'format': '%A %d %B %Y %I:%M:%S %p', 'locale': None}, # European-style local format with non-UTC tz (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM IST
|
||||||
{'id': 7000, 'format': '%a %b %d %H:%M:%S %Z %Y', 'locale': None}, # date cli command in C locale format: Wed Mar 24 11:11:30 UTC 2021
|
{'id': 6000, 'format': '%a %b %d %I:%M:%S %p %Z %Y', 'locale': None}, # en_US.UTF-8 format (found in date cli): Wed Mar 24 06:16:19 PM UTC 2021
|
||||||
|
{'id': 7000, 'format': '%a %b %d %H:%M:%S %Z %Y', 'locale': None}, # C locale format (found in date cli): Wed Mar 24 11:11:30 UTC 2021
|
||||||
|
{'id': 7500, 'format': '%A %d %B %Y, %H:%M:%S UTC%z', 'locale': ''}, # fr_FR.utf8 locale format (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC+0000)
|
||||||
|
{'id': 7550, 'format': '%A %d %B %Y, %H:%M:%S', 'locale': ''}, # fr_FR.utf8 locale format with non-UTC tz (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC-0400)
|
||||||
{'id': 9000, 'format': '%c', 'locale': ''} # locally configured locale format conversion: Could be anything :) this is a last-gasp attempt
|
{'id': 9000, 'format': '%c', 'locale': ''} # locally configured locale format conversion: Could be anything :) this is a last-gasp attempt
|
||||||
]
|
]
|
||||||
|
|
||||||
# from https://www.timeanddate.com/time/zones/
|
# from https://www.timeanddate.com/time/zones/
|
||||||
# only removed UTC timezone
|
# only removed UTC timezone and added known non-UTC offsets
|
||||||
tz_abbr = ['A', 'ACDT', 'ACST', 'ACT', 'ACWST', 'ADT', 'AEDT', 'AEST', 'AET', 'AFT', 'AKDT', 'AKST', 'ALMT',
|
tz_abbr = ['A', 'ACDT', 'ACST', 'ACT', 'ACWST', 'ADT', 'AEDT', 'AEST', 'AET', 'AFT', 'AKDT', 'AKST', 'ALMT',
|
||||||
'AMST', 'AMT', 'ANAST', 'ANAT', 'AQTT', 'ART', 'AST', 'AT', 'AWDT', 'AWST', 'AZOST', 'AZOT',
|
'AMST', 'AMT', 'ANAST', 'ANAT', 'AQTT', 'ART', 'AST', 'AT', 'AWDT', 'AWST', 'AZOST', 'AZOT',
|
||||||
'AZST', 'AZT', 'AoE', 'B', 'BNT', 'BOT', 'BRST', 'BRT', 'BST', 'BTT', 'C', 'CAST', 'CAT', 'CCT',
|
'AZST', 'AZT', 'AoE', 'B', 'BNT', 'BOT', 'BRST', 'BRT', 'BST', 'BTT', 'C', 'CAST', 'CAT', 'CCT',
|
||||||
@ -150,11 +162,17 @@ def parse_datetime_to_timestamp(data):
|
|||||||
'SAMT', 'SAST', 'SBT', 'SCT', 'SGT', 'SRET', 'SRT', 'SST', 'SYOT', 'T', 'TAHT', 'TFT', 'TJT', 'TKT',
|
'SAMT', 'SAST', 'SBT', 'SCT', 'SGT', 'SRET', 'SRT', 'SST', 'SYOT', 'T', 'TAHT', 'TFT', 'TJT', 'TKT',
|
||||||
'TLT', 'TMT', 'TOST', 'TOT', 'TRT', 'TVT', 'U', 'ULAST', 'ULAT', 'UYST', 'UYT', 'UZT', 'V', 'VET',
|
'TLT', 'TMT', 'TOST', 'TOT', 'TRT', 'TVT', 'U', 'ULAST', 'ULAT', 'UYST', 'UYT', 'UZT', 'V', 'VET',
|
||||||
'VLAST', 'VLAT', 'VOST', 'VUT', 'W', 'WAKT', 'WARST', 'WAST', 'WAT', 'WEST', 'WET', 'WFT', 'WGST',
|
'VLAST', 'VLAT', 'VOST', 'VUT', 'W', 'WAKT', 'WARST', 'WAST', 'WAT', 'WEST', 'WET', 'WFT', 'WGST',
|
||||||
'WGT', 'WIB', 'WIT', 'WITA', 'WST', 'WT', 'X', 'Y', 'YAKST', 'YAKT', 'YAPT', 'YEKST', 'YEKT', 'Z']
|
'WGT', 'WIB', 'WIT', 'WITA', 'WST', 'WT', 'X', 'Y', 'YAKST', 'YAKT', 'YAPT', 'YEKST', 'YEKT', 'Z',
|
||||||
|
'UTC-1200', 'UTC-1100', 'UTC-1000', 'UTC-0930', 'UTC-0900', 'UTC-0800', 'UTC-0700', 'UTC-0600',
|
||||||
|
'UTC-0500', 'UTC-0400', 'UTC-0300', 'UTC-0230', 'UTC-0200', 'UTC-0100', 'UTC+0100', 'UTC+0200',
|
||||||
|
'UTC+0300', 'UTC+0400', 'UTC+0430', 'UTC+0500', 'UTC+0530', 'UTC+0545', 'UTC+0600', 'UTC+0630',
|
||||||
|
'UTC+0700', 'UTC+0800', 'UTC+0845', 'UTC+0900', 'UTC+1000', 'UTC+1030', 'UTC+1100', 'UTC+1200',
|
||||||
|
'UTC+1300', 'UTC+1345', 'UTC+1400']
|
||||||
|
|
||||||
# normalize the timezone by taking out any timezone reference, except UTC
|
# normalize the timezone by taking out any timezone reference, except UTC
|
||||||
|
cleandata = data.replace('(', '').replace(')', '')
|
||||||
normalized_datetime_list = []
|
normalized_datetime_list = []
|
||||||
for term in data.split():
|
for term in cleandata.split():
|
||||||
if term not in tz_abbr:
|
if term not in tz_abbr:
|
||||||
normalized_datetime_list.append(term)
|
normalized_datetime_list.append(term)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user