2019-11-06 21:07:25 -08:00
""" jc - JSON CLI output utility utils """
import sys
2021-03-29 14:45:13 -07:00
import re
2021-03-24 12:36:54 -07:00
import locale
2021-09-23 20:53:31 -07:00
import shutil
2021-03-24 12:36:54 -07:00
from datetime import datetime , timezone
2021-09-23 20:53:31 -07:00
from textwrap import TextWrapper
2019-11-06 21:07:25 -08:00
2021-09-23 20:53:31 -07:00
def warning_message ( message_lines ) :
2019-11-12 14:19:40 -08:00
"""
2022-01-19 11:08:59 -08:00
Prints warning message for non - fatal issues . The first line is
prepended with ' jc: Warning - ' and subsequent lines are indented .
Wraps text as needed based on the terminal width .
2019-11-12 14:19:40 -08:00
Parameters :
2021-09-23 20:53:31 -07:00
message : ( list ) list of string lines
2019-11-12 14:19:40 -08:00
Returns :
2021-03-29 11:42:01 -07:00
None - just prints output to STDERR
2019-11-12 14:19:40 -08:00
"""
2021-09-23 20:53:31 -07:00
# this is for backwards compatibility with existing custom parsers
if isinstance ( message_lines , str ) :
message_lines = [ message_lines ]
2019-11-11 18:30:46 -08:00
2021-09-23 20:53:31 -07:00
columns = shutil . get_terminal_size ( ) . columns
2019-11-06 21:07:25 -08:00
2021-09-23 20:53:31 -07:00
first_wrapper = TextWrapper ( width = columns , subsequent_indent = ' ' * 15 )
next_wrapper = TextWrapper ( width = columns , initial_indent = ' ' * 15 ,
2021-09-26 13:20:42 -07:00
subsequent_indent = ' ' * 19 )
2019-11-06 21:07:25 -08:00
2021-09-23 20:53:31 -07:00
first_line = message_lines . pop ( 0 )
first_str = f ' jc: Warning - { first_line } '
first_str = first_wrapper . fill ( first_str )
print ( first_str , file = sys . stderr )
for line in message_lines :
if line == ' ' :
continue
message = next_wrapper . fill ( line )
print ( message , file = sys . stderr )
def error_message ( message_lines ) :
2019-11-12 14:19:40 -08:00
"""
2022-01-19 11:08:59 -08:00
Prints an error message for fatal issues . The first line is
prepended with ' jc: Error - ' and subsequent lines are indented .
Wraps text as needed based on the terminal width .
2019-11-12 14:19:40 -08:00
Parameters :
2021-09-23 20:53:31 -07:00
message : ( list ) list of string lines
2019-11-12 14:19:40 -08:00
Returns :
2021-03-29 11:42:01 -07:00
None - just prints output to STDERR
2019-11-12 14:19:40 -08:00
"""
2021-09-23 20:53:31 -07:00
columns = shutil . get_terminal_size ( ) . columns
first_wrapper = TextWrapper ( width = columns , subsequent_indent = ' ' * 13 )
next_wrapper = TextWrapper ( width = columns , initial_indent = ' ' * 13 ,
2021-09-26 13:20:42 -07:00
subsequent_indent = ' ' * 17 )
2021-09-23 20:53:31 -07:00
first_line = message_lines . pop ( 0 )
first_str = f ' jc: Error - { first_line } '
first_str = first_wrapper . fill ( first_str )
print ( first_str , file = sys . stderr )
2019-12-14 23:56:40 -08:00
2021-09-23 20:53:31 -07:00
for line in message_lines :
if line == ' ' :
continue
message = next_wrapper . fill ( line )
print ( message , file = sys . stderr )
2019-11-06 21:07:25 -08:00
2021-11-30 11:43:06 -08:00
def compatibility ( mod_name , compatible , quiet = False ) :
2022-01-19 11:08:59 -08:00
"""
Checks for the parser ' s compatibility with the running OS
platform .
2019-12-14 23:56:40 -08:00
2019-11-12 14:19:40 -08:00
Parameters :
2019-11-11 18:30:46 -08:00
2022-01-19 11:08:59 -08:00
mod_name : ( string ) __name__ of the calling module
2019-12-14 23:56:40 -08:00
2022-01-19 11:08:59 -08:00
compatible : ( list ) sys . platform name ( s ) compatible with
the parser . compatible options :
linux , darwin , cygwin , win32 , aix , freebsd
2019-11-12 14:20:59 -08:00
2022-01-19 11:08:59 -08:00
quiet : ( bool ) supress compatibility message if True
2021-11-30 11:43:06 -08:00
2019-11-12 14:20:59 -08:00
Returns :
2021-03-29 14:45:13 -07:00
None - just prints output to STDERR
2019-11-07 08:04:32 -08:00
"""
2021-11-30 11:43:06 -08:00
if not quiet :
platform_found = False
2020-05-30 15:48:29 -07:00
2021-11-30 11:43:06 -08:00
for platform in compatible :
if sys . platform . startswith ( platform ) :
platform_found = True
break
2020-05-30 15:48:29 -07:00
2021-11-30 11:43:06 -08:00
if not platform_found :
mod = mod_name . split ( ' . ' ) [ - 1 ]
compat_list = ' , ' . join ( compatible )
warning_message ( [ f ' { mod } parser not compatible with your OS ( { sys . platform } ). ' ,
f ' Compatible platforms: { compat_list } ' ] )
2020-06-14 17:17:40 -07:00
def has_data ( data ) :
2020-06-14 17:23:10 -07:00
"""
2022-01-19 11:08:59 -08:00
Checks if the input contains data . If there are any non - whitespace
characters then return True , else return False .
2020-06-14 17:23:10 -07:00
Parameters :
data : ( string ) input to check whether it contains data
Returns :
2022-01-19 11:08:59 -08:00
Boolean True if input string ( data ) contains non - whitespace
characters , otherwise False
2020-06-14 17:23:10 -07:00
"""
2021-09-24 08:43:09 -07:00
return bool ( data and not data . isspace ( ) )
2021-03-24 12:36:54 -07:00
2021-04-17 17:22:59 -07:00
def convert_to_int ( value ) :
"""
2022-01-19 11:08:59 -08:00
Converts string and float input to int . Strips all non - numeric
characters from strings .
2021-04-17 17:22:59 -07:00
Parameters :
2022-01-19 11:08:59 -08:00
value : ( string / integer / float ) Input value
2021-04-17 17:22:59 -07:00
Returns :
2021-04-18 11:46:42 -07:00
2022-01-19 11:08:59 -08:00
integer / None Integer if successful conversion , otherwise None
2021-04-17 17:22:59 -07:00
"""
2021-04-18 16:33:47 -07:00
if isinstance ( value , str ) :
2021-04-20 10:50:20 -07:00
str_val = re . sub ( r ' [^0-9 \ - \ .] ' , ' ' , value )
2021-04-18 11:46:42 -07:00
try :
2021-04-20 10:50:20 -07:00
return int ( str_val )
except ( ValueError , TypeError ) :
2021-04-18 16:33:47 -07:00
try :
2021-04-20 10:50:20 -07:00
return int ( float ( str_val ) )
2021-04-18 16:33:47 -07:00
except ( ValueError , TypeError ) :
return None
2021-04-18 11:46:42 -07:00
2021-04-18 16:33:47 -07:00
elif isinstance ( value , ( int , float ) ) :
return int ( value )
else :
return None
2021-04-18 11:46:42 -07:00
def convert_to_float ( value ) :
"""
2022-01-19 11:08:59 -08:00
Converts string and int input to float . Strips all non - numeric
characters from strings .
2021-04-18 11:46:42 -07:00
Parameters :
2022-01-19 11:08:59 -08:00
value : ( string ) Input value
2021-04-18 11:46:42 -07:00
Returns :
2022-01-19 11:08:59 -08:00
float / None Float if successful conversion , otherwise None
2021-04-18 11:46:42 -07:00
"""
2021-04-18 16:33:47 -07:00
if isinstance ( value , str ) :
try :
return float ( re . sub ( r ' [^0-9 \ - \ .] ' , ' ' , value ) )
except ( ValueError , TypeError ) :
return None
2021-04-17 17:22:59 -07:00
2021-04-18 16:33:47 -07:00
elif isinstance ( value , ( int , float ) ) :
return float ( value )
else :
return None
2021-04-17 17:22:59 -07:00
2021-04-18 11:46:42 -07:00
def convert_to_bool ( value ) :
"""
2022-01-19 11:08:59 -08:00
Converts string , integer , or float input to boolean by checking
for ' truthy ' values .
2021-04-18 11:46:42 -07:00
Parameters :
value : ( string / integer / float ) Input value
Returns :
2022-01-19 11:08:59 -08:00
True / False False unless a ' truthy ' number or string is found
( ' y ' , ' yes ' , ' true ' , ' 1 ' , 1 , - 1 , etc . )
2021-04-18 11:46:42 -07:00
"""
# if number, then bool it
# if string, try to convert to float
# if float converts, then bool the result
# if float does not convert then look for truthy string and bool True
# else False
2021-06-30 12:38:36 -07:00
truthy = [ ' y ' , ' yes ' , ' true ' , ' * ' ]
2021-04-18 11:46:42 -07:00
if isinstance ( value , ( int , float ) ) :
return bool ( value )
if isinstance ( value , str ) :
try :
test_value = convert_to_float ( value )
if test_value is not None :
return bool ( test_value )
except Exception :
pass
if value :
2021-09-24 08:43:09 -07:00
return value . lower ( ) in truthy
2021-04-18 11:46:42 -07:00
return False
2021-09-23 11:48:39 -07:00
def stream_success ( output_line , ignore_exceptions ) :
2021-09-25 08:23:46 -07:00
""" Add `_jc_meta` object to output line if `ignore_exceptions=True` """
2021-09-23 11:48:39 -07:00
if ignore_exceptions :
2021-09-23 13:07:10 -07:00
output_line . update ( { ' _jc_meta ' : { ' success ' : True } } )
2021-09-17 08:24:56 -07:00
2021-09-16 22:32:46 -07:00
return output_line
2021-09-16 20:25:28 -07:00
2021-09-23 11:48:39 -07:00
def stream_error ( e , ignore_exceptions , line ) :
2022-01-19 11:08:59 -08:00
"""
Reraise the stream exception with annotation or print an error
` _jc_meta ` field if ` ignore_exceptions = True ` .
2021-09-23 13:07:10 -07:00
"""
2021-09-23 11:48:39 -07:00
if not ignore_exceptions :
e . args = ( str ( e ) + ' ... Use the ignore_exceptions option (-qq) to ignore streaming parser errors. ' , )
2021-09-13 18:29:10 -07:00
raise e
2021-09-24 08:43:09 -07:00
return {
' _jc_meta ' :
{
' success ' : False ,
' error ' : f ' { e . __class__ . __name__ } : { e } ' ,
' line ' : line . strip ( )
}
}
2021-09-13 18:29:10 -07:00
2021-11-30 11:43:06 -08:00
def input_type_check ( data ) :
2021-11-30 11:57:04 -08:00
""" Ensure input data is a string """
2021-11-30 11:43:06 -08:00
if not isinstance ( data , str ) :
raise TypeError ( " Input data must be a ' str ' object. " )
def streaming_input_type_check ( data ) :
2021-11-30 11:57:04 -08:00
""" Ensure input data is an iterable, but not a string or bytes """
2021-11-30 11:43:06 -08:00
if not hasattr ( data , ' __iter__ ' ) or isinstance ( data , ( str , bytes ) ) :
raise TypeError ( " Input data must be a non-string iterable object. " )
def streaming_line_input_type_check ( line ) :
2021-11-30 11:57:04 -08:00
""" Ensure each line is a string """
2021-11-30 11:43:06 -08:00
if not isinstance ( line , str ) :
raise TypeError ( " Input line must be a ' str ' object. " )
2021-04-01 11:07:37 -07:00
class timestamp :
2022-01-26 14:21:06 -08:00
def __init__ ( self , datetime_string ) :
"""
Input a date - time text string of several formats and convert to a
naive or timezone - aware epoch timestamp in UTC .
2021-04-03 14:48:30 -07:00
2022-01-26 14:21:06 -08:00
Parameters :
2021-04-01 11:07:37 -07:00
2022-01-26 14:21:06 -08:00
datetime_string : ( str ) a string representation of a
date - time in several supported formats
2021-04-03 14:48:30 -07:00
2022-01-26 14:21:06 -08:00
Attributes :
2022-01-19 11:08:59 -08:00
2022-01-26 14:21:06 -08:00
string ( str ) the input datetime string
2022-01-19 11:08:59 -08:00
2022-01-26 14:21:06 -08:00
format ( int ) the format rule that was used to
decode the datetime string . None if
conversion fails
2022-01-19 11:08:59 -08:00
2022-01-26 14:21:06 -08:00
naive ( int ) timestamp based on locally configured
timezone . None if conversion fails
2021-04-01 11:07:37 -07:00
2022-01-26 14:21:06 -08:00
utc ( int ) aware timestamp only if UTC timezone
detected in datetime string . None if
conversion fails
"""
2021-04-01 11:07:37 -07:00
self . string = datetime_string
dt = self . _parse ( )
self . format = dt [ ' format ' ]
self . naive = dt [ ' timestamp_naive ' ]
self . utc = dt [ ' timestamp_utc ' ]
def __repr__ ( self ) :
return f ' timestamp(string= " { self . string } " , format= { self . format } , naive= { self . naive } , utc= { self . utc } ) '
def _parse ( self ) :
"""
2022-01-19 11:08:59 -08:00
Input a date - time text string of several formats and convert to
a naive or timezone - aware epoch timestamp in UTC .
2021-04-01 11:07:37 -07:00
Parameters :
2022-01-19 11:08:59 -08:00
data : ( string ) a string representation of a date - time
in several supported formats
2021-04-01 11:07:37 -07:00
Returns :
Dictionary A Dictionary of the following format :
2021-04-06 18:53:50 -07:00
{
2022-01-19 11:08:59 -08:00
# for debugging purposes. None if conversion fails
" format " : integer ,
# timestamp based on locally configured timezone.
# None if conversion fails.
" timestamp_naive " : integer ,
# aware timestamp only if UTC timezone detected.
# None if conversion fails.
" timestamp_utc " : integer
2021-04-06 18:53:50 -07:00
}
2022-01-19 11:08:59 -08:00
The ` format ` integer denotes which date_time format
conversion succeeded .
The ` timestamp_naive ` integer is the converted date - time
string to a naive epoch timestamp .
The ` timestamp_utc ` integer is the converted date - time
string to an aware epoch timestamp in the UTC timezone . If
an aware conversion cannot be performed ( e . g . the UTC
timezone is not found in the date - time string ) , then this
field will be None .
2021-04-06 18:53:50 -07:00
If the conversion completely fails , all fields will be None .
2021-04-01 11:07:37 -07:00
"""
data = self . string or ' '
normalized_datetime = ' '
utc_tz = False
dt = None
dt_utc = None
timestamp_naive = None
timestamp_utc = None
timestamp_obj = {
' format ' : None ,
' timestamp_naive ' : None ,
' timestamp_utc ' : None
}
utc_tz = False
2021-05-16 20:51:39 -07:00
# sometimes UTC is referenced as 'Coordinated Universal Time'. Convert to 'UTC'
data = data . replace ( ' Coordinated Universal Time ' , ' UTC ' )
2021-04-01 11:07:37 -07:00
if ' UTC ' in data :
utc_tz = True
if ' UTC+ ' in data or ' UTC- ' in data :
2021-09-24 08:43:09 -07:00
utc_tz = bool ( ' UTC+0000 ' in data or ' UTC-0000 ' in data )
2021-04-01 11:07:37 -07:00
elif ' +0000 ' in data or ' -0000 ' in data :
utc_tz = True
formats = [
{ ' id ' : 1000 , ' format ' : ' %a % b %d % H: % M: % S % Y ' , ' locale ' : None } , # manual C locale format conversion: Tue Mar 23 16:12:11 2021 or Tue Mar 23 16:12:11 IST 2021
{ ' id ' : 1500 , ' format ' : ' % Y- % m- %d % H: % M ' , ' locale ' : None } , # en_US.UTF-8 local format (found in who cli output): 2021-03-23 00:14
2021-04-02 12:01:05 -07:00
{ ' id ' : 1600 , ' format ' : ' % m/ %d / % Y % I: % M % p ' , ' locale ' : None } , # Windows english format (found in dir cli output): 12/07/2019 02:09 AM
2021-04-14 20:20:46 -07:00
{ ' id ' : 1700 , ' format ' : ' % m/ %d / % Y, % I: % M: % S % p ' , ' locale ' : None } , # Windows english format wint non-UTC tz (found in systeminfo cli output): 3/22/2021, 1:15:51 PM (UTC-0600)
2021-05-16 19:09:53 -07:00
{ ' id ' : 1705 , ' format ' : ' % m/ %d / % Y, % I: % M: % S % p % Z ' , ' locale ' : None } , # Windows english format with UTC tz (found in systeminfo cli output): 3/22/2021, 1:15:51 PM (UTC)
2021-04-14 20:20:46 -07:00
{ ' id ' : 1710 , ' format ' : ' % m/ %d / % Y, % I: % M: % S % p UTC % z ' , ' locale ' : None } , # Windows english format with UTC tz (found in systeminfo cli output): 3/22/2021, 1:15:51 PM (UTC+0000)
2021-04-01 11:07:37 -07:00
{ ' id ' : 2000 , ' format ' : ' %a %d % b % Y % I: % M: % S % p % Z ' , ' locale ' : None } , # en_US.UTF-8 local format (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM UTC
{ ' id ' : 3000 , ' format ' : ' %a %d % b % Y % I: % M: % S % p ' , ' locale ' : None } , # en_US.UTF-8 local format with non-UTC tz (found in upower cli output): Tue 23 Mar 2021 04:12:11 PM IST
{ ' id ' : 4000 , ' format ' : ' % A %d % B % Y % I: % M: % S % p % Z ' , ' locale ' : None } , # European-style local format (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM UTC
{ ' id ' : 5000 , ' format ' : ' % A %d % B % Y % I: % M: % S % p ' , ' locale ' : None } , # European-style local format with non-UTC tz (found in upower cli output): Tuesday 01 October 2019 12:50:41 PM IST
{ ' id ' : 6000 , ' format ' : ' %a % b %d % I: % M: % S % p % Z % Y ' , ' locale ' : None } , # en_US.UTF-8 format (found in date cli): Wed Mar 24 06:16:19 PM UTC 2021
{ ' id ' : 7000 , ' format ' : ' %a % b %d % H: % M: % S % Z % Y ' , ' locale ' : None } , # C locale format (found in date cli): Wed Mar 24 11:11:30 UTC 2021
{ ' id ' : 7100 , ' format ' : ' % b %d % H: % M: % S % Y ' , ' locale ' : None } , # C locale format (found in stat cli output - osx): # Mar 29 11:49:05 2021
{ ' id ' : 7200 , ' format ' : ' % Y- % m- %d % H: % M: % S. %f % z ' , ' locale ' : None } , # C locale format (found in stat cli output - linux): 2019-08-13 18:13:43.555604315 -0400
2021-09-22 14:06:28 -07:00
{ ' id ' : 7250 , ' format ' : ' % Y- % m- %d % H: % M: % S ' , ' locale ' : None } , # C locale format with non-UTC tz (found in modified vmstat cli output): # 2021-09-16 20:32:28 PDT
{ ' id ' : 7255 , ' format ' : ' % Y- % m- %d % H: % M: % S % Z ' , ' locale ' : None } , # C locale format (found in modified vmstat cli output): # 2021-09-16 20:32:28 UTC
2021-04-01 11:07:37 -07:00
{ ' id ' : 7300 , ' format ' : ' %a % Y- % m- %d % H: % M: % S % Z ' , ' locale ' : None } , # C locale format (found in timedatectl cli output): # Wed 2020-03-11 00:53:21 UTC
# attempt locale changes last
{ ' id ' : 8000 , ' format ' : ' %a %d % b % Y % H: % M: % S % Z ' , ' locale ' : ' ' } , # current locale format (found in upower cli output): # mar. 23 mars 2021 23:12:11 UTC
{ ' id ' : 8100 , ' format ' : ' %a %d % b % Y % H: % M: % S ' , ' locale ' : ' ' } , # current locale format with non-UTC tz (found in upower cli output): # mar. 23 mars 2021 19:12:11 EDT
{ ' id ' : 8200 , ' format ' : ' % A %d % B % Y, % H: % M: % S UTC % z ' , ' locale ' : ' ' } , # fr_FR.utf8 locale format (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC+0000)
{ ' id ' : 8300 , ' format ' : ' % A %d % B % Y, % H: % M: % S ' , ' locale ' : ' ' } , # fr_FR.utf8 locale format with non-UTC tz (found in date cli output): vendredi 26 mars 2021, 13:26:46 (UTC-0400)
{ ' id ' : 9000 , ' format ' : ' %c ' , ' locale ' : ' ' } # locally configured locale format conversion: Could be anything :) this is a last-gasp attempt
]
# from https://www.timeanddate.com/time/zones/
# only removed UTC timezone and added known non-UTC offsets
tz_abbr = [ ' A ' , ' ACDT ' , ' ACST ' , ' ACT ' , ' ACWST ' , ' ADT ' , ' AEDT ' , ' AEST ' , ' AET ' , ' AFT ' , ' AKDT ' , ' AKST ' , ' ALMT ' ,
' AMST ' , ' AMT ' , ' ANAST ' , ' ANAT ' , ' AQTT ' , ' ART ' , ' AST ' , ' AT ' , ' AWDT ' , ' AWST ' , ' AZOST ' , ' AZOT ' ,
' AZST ' , ' AZT ' , ' AoE ' , ' B ' , ' BNT ' , ' BOT ' , ' BRST ' , ' BRT ' , ' BST ' , ' BTT ' , ' C ' , ' CAST ' , ' CAT ' , ' CCT ' ,
' CDT ' , ' CEST ' , ' CET ' , ' CHADT ' , ' CHAST ' , ' CHOST ' , ' CHOT ' , ' CHUT ' , ' CIDST ' , ' CIST ' , ' CKT ' , ' CLST ' ,
' CLT ' , ' COT ' , ' CST ' , ' CT ' , ' CVT ' , ' CXT ' , ' ChST ' , ' D ' , ' DAVT ' , ' DDUT ' , ' E ' , ' EASST ' , ' EAST ' ,
' EAT ' , ' ECT ' , ' EDT ' , ' EEST ' , ' EET ' , ' EGST ' , ' EGT ' , ' EST ' , ' ET ' , ' F ' , ' FET ' , ' FJST ' , ' FJT ' , ' FKST ' ,
' FKT ' , ' FNT ' , ' G ' , ' GALT ' , ' GAMT ' , ' GET ' , ' GFT ' , ' GILT ' , ' GMT ' , ' GST ' , ' GYT ' , ' H ' , ' HDT ' , ' HKT ' ,
' HOVST ' , ' HOVT ' , ' HST ' , ' I ' , ' ICT ' , ' IDT ' , ' IOT ' , ' IRDT ' , ' IRKST ' , ' IRKT ' , ' IRST ' , ' IST ' , ' JST ' ,
' K ' , ' KGT ' , ' KOST ' , ' KRAST ' , ' KRAT ' , ' KST ' , ' KUYT ' , ' L ' , ' LHDT ' , ' LHST ' , ' LINT ' , ' M ' , ' MAGST ' ,
' MAGT ' , ' MART ' , ' MAWT ' , ' MDT ' , ' MHT ' , ' MMT ' , ' MSD ' , ' MSK ' , ' MST ' , ' MT ' , ' MUT ' , ' MVT ' , ' MYT ' , ' N ' ,
' NCT ' , ' NDT ' , ' NFDT ' , ' NFT ' , ' NOVST ' , ' NOVT ' , ' NPT ' , ' NRT ' , ' NST ' , ' NUT ' , ' NZDT ' , ' NZST ' , ' O ' ,
' OMSST ' , ' OMST ' , ' ORAT ' , ' P ' , ' PDT ' , ' PET ' , ' PETST ' , ' PETT ' , ' PGT ' , ' PHOT ' , ' PHT ' , ' PKT ' , ' PMDT ' ,
' PMST ' , ' PONT ' , ' PST ' , ' PT ' , ' PWT ' , ' PYST ' , ' PYT ' , ' Q ' , ' QYZT ' , ' R ' , ' RET ' , ' ROTT ' , ' S ' , ' SAKT ' ,
' SAMT ' , ' SAST ' , ' SBT ' , ' SCT ' , ' SGT ' , ' SRET ' , ' SRT ' , ' SST ' , ' SYOT ' , ' T ' , ' TAHT ' , ' TFT ' , ' TJT ' , ' TKT ' ,
' TLT ' , ' TMT ' , ' TOST ' , ' TOT ' , ' TRT ' , ' TVT ' , ' U ' , ' ULAST ' , ' ULAT ' , ' UYST ' , ' UYT ' , ' UZT ' , ' V ' , ' VET ' ,
' VLAST ' , ' VLAT ' , ' VOST ' , ' VUT ' , ' W ' , ' WAKT ' , ' WARST ' , ' WAST ' , ' WAT ' , ' WEST ' , ' WET ' , ' WFT ' , ' WGST ' ,
' WGT ' , ' WIB ' , ' WIT ' , ' WITA ' , ' WST ' , ' WT ' , ' X ' , ' Y ' , ' YAKST ' , ' YAKT ' , ' YAPT ' , ' YEKST ' , ' YEKT ' , ' Z ' ,
' UTC-1200 ' , ' UTC-1100 ' , ' UTC-1000 ' , ' UTC-0930 ' , ' UTC-0900 ' , ' UTC-0800 ' , ' UTC-0700 ' , ' UTC-0600 ' ,
' UTC-0500 ' , ' UTC-0400 ' , ' UTC-0300 ' , ' UTC-0230 ' , ' UTC-0200 ' , ' UTC-0100 ' , ' UTC+0100 ' , ' UTC+0200 ' ,
' UTC+0300 ' , ' UTC+0400 ' , ' UTC+0430 ' , ' UTC+0500 ' , ' UTC+0530 ' , ' UTC+0545 ' , ' UTC+0600 ' , ' UTC+0630 ' ,
' UTC+0700 ' , ' UTC+0800 ' , ' UTC+0845 ' , ' UTC+0900 ' , ' UTC+1000 ' , ' UTC+1030 ' , ' UTC+1100 ' , ' UTC+1200 ' ,
' UTC+1300 ' , ' UTC+1345 ' , ' UTC+1400 ' ]
# normalize the timezone by taking out any timezone reference, except UTC
cleandata = data . replace ( ' ( ' , ' ' ) . replace ( ' ) ' , ' ' )
normalized_datetime_list = [ ]
for term in cleandata . split ( ) :
if term not in tz_abbr :
normalized_datetime_list . append ( term )
normalized_datetime = ' ' . join ( normalized_datetime_list )
# normalize further by converting any greater-than 6-digit subsecond to 6-digits
p = re . compile ( r ' ( \ W \ d \ d: \ d \ d: \ d \ d \ . \ d {6} ) \ d+ \ W ' )
normalized_datetime = p . sub ( r ' \ g<1> ' , normalized_datetime )
for fmt in formats :
try :
locale . setlocale ( locale . LC_TIME , fmt [ ' locale ' ] )
dt = datetime . strptime ( normalized_datetime , fmt [ ' format ' ] )
timestamp_naive = int ( dt . replace ( tzinfo = None ) . timestamp ( ) )
timestamp_obj [ ' format ' ] = fmt [ ' id ' ]
locale . setlocale ( locale . LC_TIME , None )
break
except Exception :
locale . setlocale ( locale . LC_TIME , None )
continue
if dt and utc_tz :
dt_utc = dt . replace ( tzinfo = timezone . utc )
timestamp_utc = int ( dt_utc . timestamp ( ) )
if timestamp_naive :
timestamp_obj [ ' timestamp_naive ' ] = timestamp_naive
timestamp_obj [ ' timestamp_utc ' ] = timestamp_utc
return timestamp_obj