2022-03-04 13:27:39 -08:00
""" jc - JSON Convert utils """
2019-11-06 21:07:25 -08:00
import sys
2021-03-29 14:45:13 -07:00
import re
2021-03-24 12:36:54 -07:00
import locale
2021-09-23 20:53:31 -07:00
import shutil
2021-03-24 12:36:54 -07:00
from datetime import datetime , timezone
2021-09-23 20:53:31 -07:00
from textwrap import TextWrapper
2022-02-07 08:30:11 -08:00
from functools import lru_cache
2022-04-20 09:44:42 -04:00
from typing import List , Iterable , Union , Optional
2019-11-06 21:07:25 -08:00
2022-04-27 07:37:31 -07:00
def _asciify ( string : str ) - > str :
2022-04-26 14:25:34 -07:00
"""
Return a string downgraded from Unicode to ASCII with some simple
conversions .
"""
string = string . replace ( ' © ' , ' (c) ' )
# the ascii() function adds single quotes around the string
string = ascii ( string ) [ 1 : - 1 ]
string = string . replace ( r ' \ n ' , ' \n ' )
return string
2022-04-27 07:37:31 -07:00
def _safe_print ( string : str , sep = ' ' , end = ' \n ' , file = sys . stdout , flush = False ) - > None :
""" Output for both UTF-8 and ASCII encoding systems """
2022-04-26 14:25:34 -07:00
try :
print ( string , sep = sep , end = end , file = file , flush = flush )
except UnicodeEncodeError :
2022-04-27 07:37:31 -07:00
print ( _asciify ( string ) , sep = sep , end = end , file = file , flush = flush )
2022-04-26 14:25:34 -07:00
2022-01-26 15:54:36 -08:00
def warning_message ( message_lines : List [ str ] ) - > None :
2019-11-12 14:19:40 -08:00
"""
2022-01-19 11:08:59 -08:00
Prints warning message for non - fatal issues . The first line is
prepended with ' jc: Warning - ' and subsequent lines are indented .
Wraps text as needed based on the terminal width .
2019-11-12 14:19:40 -08:00
Parameters :
2021-09-23 20:53:31 -07:00
message : ( list ) list of string lines
2019-11-12 14:19:40 -08:00
Returns :
2021-03-29 11:42:01 -07:00
None - just prints output to STDERR
2019-11-12 14:19:40 -08:00
"""
2021-09-23 20:53:31 -07:00
# this is for backwards compatibility with existing custom parsers
if isinstance ( message_lines , str ) :
message_lines = [ message_lines ]
2019-11-11 18:30:46 -08:00
2021-09-23 20:53:31 -07:00
columns = shutil . get_terminal_size ( ) . columns
2019-11-06 21:07:25 -08:00
2021-09-23 20:53:31 -07:00
first_wrapper = TextWrapper ( width = columns , subsequent_indent = ' ' * 15 )
next_wrapper = TextWrapper ( width = columns , initial_indent = ' ' * 15 ,
2021-09-26 13:20:42 -07:00
subsequent_indent = ' ' * 19 )
2019-11-06 21:07:25 -08:00
2021-09-23 20:53:31 -07:00
first_line = message_lines . pop ( 0 )
first_str = f ' jc: Warning - { first_line } '
first_str = first_wrapper . fill ( first_str )
2022-04-27 07:37:31 -07:00
_safe_print ( first_str , file = sys . stderr )
2021-09-23 20:53:31 -07:00
for line in message_lines :
if line == ' ' :
continue
message = next_wrapper . fill ( line )
2022-04-27 07:37:31 -07:00
_safe_print ( message , file = sys . stderr )
2021-09-23 20:53:31 -07:00
2022-01-26 15:54:36 -08:00
def error_message ( message_lines : List [ str ] ) - > None :
2019-11-12 14:19:40 -08:00
"""
2022-01-19 11:08:59 -08:00
Prints an error message for fatal issues . The first line is
prepended with ' jc: Error - ' and subsequent lines are indented .
Wraps text as needed based on the terminal width .
2019-11-12 14:19:40 -08:00
Parameters :
2021-09-23 20:53:31 -07:00
message : ( list ) list of string lines
2019-11-12 14:19:40 -08:00
Returns :
2021-03-29 11:42:01 -07:00
None - just prints output to STDERR
2019-11-12 14:19:40 -08:00
"""
2021-09-23 20:53:31 -07:00
columns = shutil . get_terminal_size ( ) . columns
first_wrapper = TextWrapper ( width = columns , subsequent_indent = ' ' * 13 )
next_wrapper = TextWrapper ( width = columns , initial_indent = ' ' * 13 ,
2021-09-26 13:20:42 -07:00
subsequent_indent = ' ' * 17 )
2021-09-23 20:53:31 -07:00
first_line = message_lines . pop ( 0 )
first_str = f ' jc: Error - { first_line } '
first_str = first_wrapper . fill ( first_str )
2022-04-27 07:37:31 -07:00
_safe_print ( first_str , file = sys . stderr )
2019-12-14 23:56:40 -08:00
2021-09-23 20:53:31 -07:00
for line in message_lines :
if line == ' ' :
continue
message = next_wrapper . fill ( line )
2022-04-27 07:37:31 -07:00
_safe_print ( message , file = sys . stderr )
2019-11-06 21:07:25 -08:00
2022-05-23 13:52:08 -07:00
def is_compatible ( compatible : List ) - > bool :
"""
Returns True if the parser is compatible with the running OS platform .
"""
platform_found = False
for platform in compatible :
if sys . platform . startswith ( platform ) :
platform_found = True
break
return platform_found
2022-02-01 17:18:55 -08:00
def compatibility ( mod_name : str , compatible : List , quiet : bool = False ) - > None :
2022-01-19 11:08:59 -08:00
"""
Checks for the parser ' s compatibility with the running OS
platform .
2019-12-14 23:56:40 -08:00
2019-11-12 14:19:40 -08:00
Parameters :
2019-11-11 18:30:46 -08:00
2022-01-19 11:08:59 -08:00
mod_name : ( string ) __name__ of the calling module
2019-12-14 23:56:40 -08:00
2022-01-19 11:08:59 -08:00
compatible : ( list ) sys . platform name ( s ) compatible with
the parser . compatible options :
linux , darwin , cygwin , win32 , aix , freebsd
2019-11-12 14:20:59 -08:00
2022-01-19 11:08:59 -08:00
quiet : ( bool ) supress compatibility message if True
2021-11-30 11:43:06 -08:00
2019-11-12 14:20:59 -08:00
Returns :
2021-03-29 14:45:13 -07:00
None - just prints output to STDERR
2019-11-07 08:04:32 -08:00
"""
2022-05-23 13:52:08 -07:00
if not quiet and not is_compatible ( compatible ) :
mod = mod_name . split ( ' . ' ) [ - 1 ]
compat_list = ' , ' . join ( compatible )
warning_message ( [
f ' { mod } parser is not compatible with your OS ( { sys . platform } ). ' ,
f ' Compatible platforms: { compat_list } '
] )
2020-06-14 17:17:40 -07:00
2022-01-26 15:54:36 -08:00
def has_data ( data : str ) - > bool :
2020-06-14 17:23:10 -07:00
"""
2022-01-19 11:08:59 -08:00
Checks if the input contains data . If there are any non - whitespace
characters then return True , else return False .
2020-06-14 17:23:10 -07:00
Parameters :
data : ( string ) input to check whether it contains data
Returns :
2022-01-19 11:08:59 -08:00
Boolean True if input string ( data ) contains non - whitespace
characters , otherwise False
2020-06-14 17:23:10 -07:00
"""
2021-09-24 08:43:09 -07:00
return bool ( data and not data . isspace ( ) )
2021-03-24 12:36:54 -07:00
2022-02-01 17:54:22 -08:00
def convert_to_int ( value : Union [ str , float ] ) - > Optional [ int ] :
2021-04-17 17:22:59 -07:00
"""
2022-01-19 11:08:59 -08:00
Converts string and float input to int . Strips all non - numeric
characters from strings .
2021-04-17 17:22:59 -07:00
Parameters :
2022-01-26 16:55:39 -08:00
value : ( string / float ) Input value
2021-04-17 17:22:59 -07:00
Returns :
2021-04-18 11:46:42 -07:00
2022-01-19 11:08:59 -08:00
integer / None Integer if successful conversion , otherwise None
2021-04-17 17:22:59 -07:00
"""
2021-04-18 16:33:47 -07:00
if isinstance ( value , str ) :
2021-04-20 10:50:20 -07:00
str_val = re . sub ( r ' [^0-9 \ - \ .] ' , ' ' , value )
2021-04-18 11:46:42 -07:00
try :
2021-04-20 10:50:20 -07:00
return int ( str_val )
except ( ValueError , TypeError ) :
2021-04-18 16:33:47 -07:00
try :
2021-04-20 10:50:20 -07:00
return int ( float ( str_val ) )
2021-04-18 16:33:47 -07:00
except ( ValueError , TypeError ) :
return None
2021-04-18 11:46:42 -07:00
2021-04-18 16:33:47 -07:00
elif isinstance ( value , ( int , float ) ) :
return int ( value )
else :
return None
2021-04-18 11:46:42 -07:00
2022-02-01 17:54:22 -08:00
def convert_to_float ( value : Union [ str , int ] ) - > Optional [ float ] :
2021-04-18 11:46:42 -07:00
"""
2022-01-19 11:08:59 -08:00
Converts string and int input to float . Strips all non - numeric
characters from strings .
2021-04-18 11:46:42 -07:00
Parameters :
2022-01-26 16:55:39 -08:00
value : ( string / integer ) Input value
2021-04-18 11:46:42 -07:00
Returns :
2022-01-19 11:08:59 -08:00
float / None Float if successful conversion , otherwise None
2021-04-18 11:46:42 -07:00
"""
2021-04-18 16:33:47 -07:00
if isinstance ( value , str ) :
try :
return float ( re . sub ( r ' [^0-9 \ - \ .] ' , ' ' , value ) )
except ( ValueError , TypeError ) :
return None
2021-04-17 17:22:59 -07:00
2021-04-18 16:33:47 -07:00
elif isinstance ( value , ( int , float ) ) :
return float ( value )
else :
return None
2021-04-17 17:22:59 -07:00
2022-01-26 15:54:36 -08:00
def convert_to_bool ( value : Union [ str , int , float ] ) - > bool :
2021-04-18 11:46:42 -07:00
"""
2022-01-19 11:08:59 -08:00
Converts string , integer , or float input to boolean by checking
for ' truthy ' values .
2021-04-18 11:46:42 -07:00
Parameters :
value : ( string / integer / float ) Input value
Returns :
2022-01-19 11:08:59 -08:00
True / False False unless a ' truthy ' number or string is found
( ' y ' , ' yes ' , ' true ' , ' 1 ' , 1 , - 1 , etc . )
2021-04-18 11:46:42 -07:00
"""
# if number, then bool it
# if string, try to convert to float
# if float converts, then bool the result
# if float does not convert then look for truthy string and bool True
# else False
2021-06-30 12:38:36 -07:00
truthy = [ ' y ' , ' yes ' , ' true ' , ' * ' ]
2021-04-18 11:46:42 -07:00
if isinstance ( value , ( int , float ) ) :
return bool ( value )
if isinstance ( value , str ) :
try :
test_value = convert_to_float ( value )
if test_value is not None :
return bool ( test_value )
except Exception :
pass
if value :
2021-09-24 08:43:09 -07:00
return value . lower ( ) in truthy
2021-04-18 11:46:42 -07:00
return False
2022-01-26 15:54:36 -08:00
def input_type_check ( data : str ) - > None :
2022-01-26 16:58:48 -08:00
""" Ensure input data is a string. Raises `TypeError` if not. """
2021-11-30 11:43:06 -08:00
if not isinstance ( data , str ) :
raise TypeError ( " Input data must be a ' str ' object. " )
2021-04-01 11:07:37 -07:00
class timestamp :
2022-02-07 16:58:06 -08:00
def __init__ ( self ,
datetime_string : str ,
2022-04-20 09:44:42 -04:00
format_hint : Optional [ Iterable ] = None
2022-02-07 16:58:06 -08:00
) - > None :
2022-01-26 14:21:06 -08:00
"""
2022-02-07 15:43:24 -08:00
Input a datetime text string of several formats and convert to a
2022-01-26 14:21:06 -08:00
naive or timezone - aware epoch timestamp in UTC .
2021-04-03 14:48:30 -07:00
2022-01-26 14:21:06 -08:00
Parameters :
2021-04-01 11:07:37 -07:00
2022-02-01 17:54:22 -08:00
datetime_string ( str ) : a string representation of a
datetime in several supported formats
2021-04-03 14:48:30 -07:00
2022-04-20 09:44:42 -04:00
format_hint ( iterable ) : an optional iterable of format ID
2022-02-07 15:43:24 -08:00
integers to instruct the timestamp object to try those
formats first in the order given . Other formats will be
tried after the format hint list is exhausted . This can
speed up timestamp conversion so several different formats
don ' t have to be tried in brute-force fashion.
2022-02-01 17:54:22 -08:00
Returns a timestamp object with the following attributes :
2022-01-19 11:08:59 -08:00
2022-02-01 17:54:22 -08:00
string ( str ) : the input datetime string
2022-01-19 11:08:59 -08:00
2022-02-01 17:54:22 -08:00
format ( int | None ) : the format rule that was used to decode
the datetime string . None if conversion fails .
2022-01-19 11:08:59 -08:00
2022-02-01 17:54:22 -08:00
naive ( int | None ) : timestamp based on locally configured
timezone . None if conversion fails .
2021-04-01 11:07:37 -07:00
2022-02-01 19:20:19 -08:00
utc ( int | None ) : aware timestamp only if UTC timezone
2022-02-01 17:54:22 -08:00
detected in datetime string . None if conversion fails .
2022-01-26 14:21:06 -08:00
"""
2021-04-01 11:07:37 -07:00
self . string = datetime_string
2022-02-07 15:43:24 -08:00
if not format_hint :
format_hint = tuple ( )
else :
format_hint = tuple ( format_hint )
dt = self . _parse_dt ( self . string , format_hint = format_hint )
2021-04-01 11:07:37 -07:00
self . format = dt [ ' format ' ]
self . naive = dt [ ' timestamp_naive ' ]
self . utc = dt [ ' timestamp_utc ' ]
def __repr__ ( self ) :
2022-02-07 08:06:48 -08:00
return f ' timestamp(string= { self . string !r} , format= { self . format } , naive= { self . naive } , utc= { self . utc } ) '
2021-04-01 11:07:37 -07:00
2022-02-07 08:30:11 -08:00
@staticmethod
2022-02-07 08:34:34 -08:00
@lru_cache ( maxsize = 512 )
2022-02-07 15:43:24 -08:00
def _parse_dt ( dt_string , format_hint = None ) :
2021-04-01 11:07:37 -07:00
"""
2022-02-07 15:43:24 -08:00
Input a datetime text string of several formats and convert to
2022-01-19 11:08:59 -08:00
a naive or timezone - aware epoch timestamp in UTC .
2021-04-01 11:07:37 -07:00
Parameters :
2022-02-07 15:43:24 -08:00
dt_string : ( string ) a string representation of a date - time
in several supported formats
format_hint : ( list | tuple ) a list of format ID int ' s that
should be tried first . This can increase
performance since the function will not need to
try many incorrect formats before finding the
correct one .
2021-04-01 11:07:37 -07:00
Returns :
2022-02-01 17:54:22 -08:00
Dictionary of the following format :
2021-04-01 11:07:37 -07:00
2021-04-06 18:53:50 -07:00
{
2022-01-19 11:08:59 -08:00
# for debugging purposes. None if conversion fails
2022-02-01 17:54:22 -08:00
" format " : int ,
2022-01-19 11:08:59 -08:00
# timestamp based on locally configured timezone.
# None if conversion fails.
2022-02-01 17:54:22 -08:00
" timestamp_naive " : int ,
2022-01-19 11:08:59 -08:00
# aware timestamp only if UTC timezone detected.
# None if conversion fails.
2022-02-01 17:54:22 -08:00
" timestamp_utc " : int
2021-04-06 18:53:50 -07:00
}
2022-01-19 11:08:59 -08:00
The ` format ` integer denotes which date_time format
conversion succeeded .
The ` timestamp_naive ` integer is the converted date - time
string to a naive epoch timestamp .
The ` timestamp_utc ` integer is the converted date - time
string to an aware epoch timestamp in the UTC timezone . If
an aware conversion cannot be performed ( e . g . the UTC
timezone is not found in the date - time string ) , then this
field will be None .
2021-04-06 18:53:50 -07:00
If the conversion completely fails , all fields will be None .
2021-04-01 11:07:37 -07:00
"""
2022-02-07 08:30:11 -08:00
data = dt_string or ' '
2021-04-01 11:07:37 -07:00
normalized_datetime = ' '
utc_tz = False
dt = None
dt_utc = None
timestamp_naive = None
timestamp_utc = None
timestamp_obj = {
' format ' : None ,
' timestamp_naive ' : None ,
' timestamp_utc ' : None
}
utc_tz = False
2022-02-07 15:43:24 -08:00
# convert format_hint to a tuple so it is hashable (for lru_cache)
if not format_hint :
format_hint = tuple ( )
else :
format_hint = tuple ( format_hint )
2021-05-16 20:51:39 -07:00
# sometimes UTC is referenced as 'Coordinated Universal Time'. Convert to 'UTC'
data = data . replace ( ' Coordinated Universal Time ' , ' UTC ' )
2021-04-01 11:07:37 -07:00
if ' UTC ' in data :
utc_tz = True
if ' UTC+ ' in data or ' UTC- ' in data :
2021-09-24 08:43:09 -07:00
utc_tz = bool ( ' UTC+0000 ' in data or ' UTC-0000 ' in data )
2021-04-01 11:07:37 -07:00
elif ' +0000 ' in data or ' -0000 ' in data :
utc_tz = True
formats = [
{ ' id ' : 1000 , ' format ' : ' %a % b %d % H: % M: % S % Y ' , ' locale ' : None } , # manual C locale format conversion: Tue Mar 23 16:12:11 2021 or Tue Mar 23 16:12:11 IST 2021
2022-04-20 09:44:42 -04:00
{ ' id ' : 1100 , ' format ' : ' %a % b %d % H: % M: % S % Y % z ' , ' locale ' : None } , # git date output: Thu Mar 5 09:17:40 2020 -0800
2021-04-01 11:07:37 -07:00
{ ' id ' : 1500 , ' format ' : ' % Y- % m- %d % H: % M ' , ' locale ' : None } , # en_US.UTF-8 local format (found in who cli output): 2021-03-23 00:14
2021-04-02 12:01:05 -07:00
{ ' id ' : 1600 , ' format ' : ' % m/ %d / % Y % I: % M % p ' , ' locale ' : None } , # Windows english format (found in dir cli output): 12/07/2019 02:09 AM
2021-04-14 20:20:46 -07:00
{ ' id ' : 1700 , ' format ' : ' % m/ %d / % Y, % I: % M: % S % p ' , ' locale ' : None } , # Windows english format wint non-UTC tz (found in systeminfo cli output): 3/22/2021, 1:15:51 PM (UTC-0600)
2021-05-16 19:09:53 -07:00
{ ' id ' : 1705 , ' format ' : ' % m/ %d / % Y, % I: % M: % S % p % Z ' , ' locale ' : None } , # Windows english format with UTC tz (found in systeminfo cli output): 3/22/2021, 1:15:51 PM (UTC)
2021-04-14 20:20:46 -07:00
{ ' id ' : 1710 , ' format ' : ' % m/ %d / % Y, % I: % M: % S % p UTC % z ' , ' locale ' : None } , # Windows english format with UTC tz (found in systeminfo cli output): 3/22/2021, 1:15:51 PM (UTC+0000)
2021-04-01 11:07:37 -07:00
{ ' id ' : 2000 , ' format ' : ' %a %d % b % Y % I: % M: % S % p % Z ' , ' locale ' : None } , # en_US.UTF-8 local format (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM UTC
{ ' id ' : 3000 , ' format ' : ' %a %d % b % Y % I: % M: % S % p ' , ' locale ' : None } , # en_US.UTF-8 local format with non-UTC tz (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM IST
{ ' id ' : 4000 , ' format ' : ' % A %d % B % Y % I: % M: % S % p % Z ' , ' locale ' : None } , # European-style local format (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM UTC
{ ' id ' : 5000 , ' format ' : ' % A %d % B % Y % I: % M: % S % p ' , ' locale ' : None } , # European-style local format with non-UTC tz (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM IST
{ ' id ' : 6000 , ' format ' : ' %a % b %d % I: % M: % S % p % Z % Y ' , ' locale ' : None } , # en_US.UTF-8 format (found in date cli): Wed Mar 24 06:16:19 PM UTC 2021
{ ' id ' : 7000 , ' format ' : ' %a % b %d % H: % M: % S % Z % Y ' , ' locale ' : None } , # C locale format (found in date cli): Wed Mar 24 11:11:30 UTC 2021
{ ' id ' : 7100 , ' format ' : ' % b %d % H: % M: % S % Y ' , ' locale ' : None } , # C locale format (found in stat cli output - osx): # Mar 29 11:49:05 2021
{ ' id ' : 7200 , ' format ' : ' % Y- % m- %d % H: % M: % S. %f % z ' , ' locale ' : None } , # C locale format (found in stat cli output - linux): 2019-08-13 18:13:43.555604315 -0400
2021-09-22 14:06:28 -07:00
{ ' id ' : 7250 , ' format ' : ' % Y- % m- %d % H: % M: % S ' , ' locale ' : None } , # C locale format with non-UTC tz (found in modified vmstat cli output): # 2021-09-16 20:32:28 PDT
{ ' id ' : 7255 , ' format ' : ' % Y- % m- %d % H: % M: % S % Z ' , ' locale ' : None } , # C locale format (found in modified vmstat cli output): # 2021-09-16 20:32:28 UTC
2021-04-01 11:07:37 -07:00
{ ' id ' : 7300 , ' format ' : ' %a % Y- % m- %d % H: % M: % S % Z ' , ' locale ' : None } , # C locale format (found in timedatectl cli output): # Wed 2020-03-11 00:53:21 UTC
# attempt locale changes last
{ ' id ' : 8000 , ' format ' : ' %a %d % b % Y % H: % M: % S % Z ' , ' locale ' : ' ' } , # current locale format (found in upower cli output): # mar. 23 mars 2021 23:12:11 UTC
{ ' id ' : 8100 , ' format ' : ' %a %d % b % Y % H: % M: % S ' , ' locale ' : ' ' } , # current locale format with non-UTC tz (found in upower cli output): # mar. 23 mars 2021 19:12:11 EDT
{ ' id ' : 8200 , ' format ' : ' % A %d % B % Y, % H: % M: % S UTC % z ' , ' locale ' : ' ' } , # fr_FR.utf8 locale format (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC+0000)
{ ' id ' : 8300 , ' format ' : ' % A %d % B % Y, % H: % M: % S ' , ' locale ' : ' ' } , # fr_FR.utf8 locale format with non-UTC tz (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC-0400)
{ ' id ' : 9000 , ' format ' : ' %c ' , ' locale ' : ' ' } # locally configured locale format conversion: Could be anything :) this is a last-gasp attempt
]
# from https://www.timeanddate.com/time/zones/
# only removed UTC timezone and added known non-UTC offsets
2022-01-26 14:38:57 -08:00
tz_abbr = [
' A ' , ' ACDT ' , ' ACST ' , ' ACT ' , ' ACWST ' , ' ADT ' , ' AEDT ' , ' AEST ' , ' AET ' , ' AFT ' , ' AKDT ' ,
' AKST ' , ' ALMT ' , ' AMST ' , ' AMT ' , ' ANAST ' , ' ANAT ' , ' AQTT ' , ' ART ' , ' AST ' , ' AT ' , ' AWDT ' ,
' AWST ' , ' AZOST ' , ' AZOT ' , ' AZST ' , ' AZT ' , ' AoE ' , ' B ' , ' BNT ' , ' BOT ' , ' BRST ' , ' BRT ' , ' BST ' ,
' BTT ' , ' C ' , ' CAST ' , ' CAT ' , ' CCT ' , ' CDT ' , ' CEST ' , ' CET ' , ' CHADT ' , ' CHAST ' , ' CHOST ' ,
' CHOT ' , ' CHUT ' , ' CIDST ' , ' CIST ' , ' CKT ' , ' CLST ' , ' CLT ' , ' COT ' , ' CST ' , ' CT ' , ' CVT ' , ' CXT ' ,
' ChST ' , ' D ' , ' DAVT ' , ' DDUT ' , ' E ' , ' EASST ' , ' EAST ' , ' EAT ' , ' ECT ' , ' EDT ' , ' EEST ' , ' EET ' ,
' EGST ' , ' EGT ' , ' EST ' , ' ET ' , ' F ' , ' FET ' , ' FJST ' , ' FJT ' , ' FKST ' , ' FKT ' , ' FNT ' , ' G ' ,
' GALT ' , ' GAMT ' , ' GET ' , ' GFT ' , ' GILT ' , ' GMT ' , ' GST ' , ' GYT ' , ' H ' , ' HDT ' , ' HKT ' , ' HOVST ' ,
' HOVT ' , ' HST ' , ' I ' , ' ICT ' , ' IDT ' , ' IOT ' , ' IRDT ' , ' IRKST ' , ' IRKT ' , ' IRST ' , ' IST ' , ' JST ' ,
' K ' , ' KGT ' , ' KOST ' , ' KRAST ' , ' KRAT ' , ' KST ' , ' KUYT ' , ' L ' , ' LHDT ' , ' LHST ' , ' LINT ' , ' M ' ,
' MAGST ' , ' MAGT ' , ' MART ' , ' MAWT ' , ' MDT ' , ' MHT ' , ' MMT ' , ' MSD ' , ' MSK ' , ' MST ' , ' MT ' , ' MUT ' ,
' MVT ' , ' MYT ' , ' N ' , ' NCT ' , ' NDT ' , ' NFDT ' , ' NFT ' , ' NOVST ' , ' NOVT ' , ' NPT ' , ' NRT ' , ' NST ' ,
' NUT ' , ' NZDT ' , ' NZST ' , ' O ' , ' OMSST ' , ' OMST ' , ' ORAT ' , ' P ' , ' PDT ' , ' PET ' , ' PETST ' , ' PETT ' ,
' PGT ' , ' PHOT ' , ' PHT ' , ' PKT ' , ' PMDT ' , ' PMST ' , ' PONT ' , ' PST ' , ' PT ' , ' PWT ' , ' PYST ' , ' PYT ' ,
' Q ' , ' QYZT ' , ' R ' , ' RET ' , ' ROTT ' , ' S ' , ' SAKT ' , ' SAMT ' , ' SAST ' , ' SBT ' , ' SCT ' , ' SGT ' ,
' SRET ' , ' SRT ' , ' SST ' , ' SYOT ' , ' T ' , ' TAHT ' , ' TFT ' , ' TJT ' , ' TKT ' , ' TLT ' , ' TMT ' , ' TOST ' ,
' TOT ' , ' TRT ' , ' TVT ' , ' U ' , ' ULAST ' , ' ULAT ' , ' UYST ' , ' UYT ' , ' UZT ' , ' V ' , ' VET ' , ' VLAST ' ,
' VLAT ' , ' VOST ' , ' VUT ' , ' W ' , ' WAKT ' , ' WARST ' , ' WAST ' , ' WAT ' , ' WEST ' , ' WET ' , ' WFT ' ,
' WGST ' , ' WGT ' , ' WIB ' , ' WIT ' , ' WITA ' , ' WST ' , ' WT ' , ' X ' , ' Y ' , ' YAKST ' , ' YAKT ' , ' YAPT ' ,
' YEKST ' , ' YEKT ' , ' Z ' , ' UTC-1200 ' , ' UTC-1100 ' , ' UTC-1000 ' , ' UTC-0930 ' , ' UTC-0900 ' ,
' UTC-0800 ' , ' UTC-0700 ' , ' UTC-0600 ' , ' UTC-0500 ' , ' UTC-0400 ' , ' UTC-0300 ' , ' UTC-0230 ' ,
' UTC-0200 ' , ' UTC-0100 ' , ' UTC+0100 ' , ' UTC+0200 ' , ' UTC+0300 ' , ' UTC+0400 ' , ' UTC+0430 ' ,
' UTC+0500 ' , ' UTC+0530 ' , ' UTC+0545 ' , ' UTC+0600 ' , ' UTC+0630 ' , ' UTC+0700 ' , ' UTC+0800 ' ,
' UTC+0845 ' , ' UTC+0900 ' , ' UTC+1000 ' , ' UTC+1030 ' , ' UTC+1100 ' , ' UTC+1200 ' , ' UTC+1300 ' ,
' UTC+1345 ' , ' UTC+1400 '
]
2021-04-01 11:07:37 -07:00
# normalize the timezone by taking out any timezone reference, except UTC
cleandata = data . replace ( ' ( ' , ' ' ) . replace ( ' ) ' , ' ' )
normalized_datetime_list = [ ]
for term in cleandata . split ( ) :
if term not in tz_abbr :
normalized_datetime_list . append ( term )
normalized_datetime = ' ' . join ( normalized_datetime_list )
# normalize further by converting any greater-than 6-digit subsecond to 6-digits
p = re . compile ( r ' ( \ W \ d \ d: \ d \ d: \ d \ d \ . \ d {6} ) \ d+ \ W ' )
normalized_datetime = p . sub ( r ' \ g<1> ' , normalized_datetime )
2022-02-07 15:43:24 -08:00
# try format hints first, then fall back to brute-force method
hint_obj_list = [ ]
for fmt_id in format_hint :
for fmt in formats :
if fmt_id == fmt [ ' id ' ] :
hint_obj_list . append ( fmt )
remaining_formats = [ fmt for fmt in formats if not fmt [ ' id ' ] in format_hint ]
optimized_formats = hint_obj_list + remaining_formats
for fmt in optimized_formats :
2021-04-01 11:07:37 -07:00
try :
locale . setlocale ( locale . LC_TIME , fmt [ ' locale ' ] )
dt = datetime . strptime ( normalized_datetime , fmt [ ' format ' ] )
timestamp_naive = int ( dt . replace ( tzinfo = None ) . timestamp ( ) )
timestamp_obj [ ' format ' ] = fmt [ ' id ' ]
locale . setlocale ( locale . LC_TIME , None )
break
except Exception :
locale . setlocale ( locale . LC_TIME , None )
continue
if dt and utc_tz :
dt_utc = dt . replace ( tzinfo = timezone . utc )
timestamp_utc = int ( dt_utc . timestamp ( ) )
if timestamp_naive :
timestamp_obj [ ' timestamp_naive ' ] = timestamp_naive
timestamp_obj [ ' timestamp_utc ' ] = timestamp_utc
return timestamp_obj