mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-17 00:07:37 +02:00
add convert_size_to_int function
This commit is contained in:
@ -9,6 +9,7 @@
|
|||||||
* [convert\_to\_int](#jc.utils.convert_to_int)
|
* [convert\_to\_int](#jc.utils.convert_to_int)
|
||||||
* [convert\_to\_float](#jc.utils.convert_to_float)
|
* [convert\_to\_float](#jc.utils.convert_to_float)
|
||||||
* [convert\_to\_bool](#jc.utils.convert_to_bool)
|
* [convert\_to\_bool](#jc.utils.convert_to_bool)
|
||||||
|
* [convert\_size\_to\_int](#jc.utils.convert_size_to_int)
|
||||||
* [input\_type\_check](#jc.utils.input_type_check)
|
* [input\_type\_check](#jc.utils.input_type_check)
|
||||||
* [timestamp](#jc.utils.timestamp)
|
* [timestamp](#jc.utils.timestamp)
|
||||||
* [\_\_init\_\_](#jc.utils.timestamp.__init__)
|
* [\_\_init\_\_](#jc.utils.timestamp.__init__)
|
||||||
@ -178,6 +179,47 @@ Returns:
|
|||||||
True/False False unless a 'truthy' number or string is found
|
True/False False unless a 'truthy' number or string is found
|
||||||
('y', 'yes', 'true', '1', 1, -1, etc.)
|
('y', 'yes', 'true', '1', 1, -1, etc.)
|
||||||
|
|
||||||
|
<a id="jc.utils.convert_size_to_int"></a>
|
||||||
|
|
||||||
|
### convert\_size\_to\_int
|
||||||
|
|
||||||
|
```python
|
||||||
|
def convert_size_to_int(size: str, binary: bool = False) -> Optional[int]
|
||||||
|
```
|
||||||
|
|
||||||
|
Parse a human readable data size and return the number of bytes.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
size: (string) The human readable file size to parse.
|
||||||
|
binary: (boolean) `True` to use binary multiples of bytes
|
||||||
|
(base-2) for ambiguous unit symbols and names,
|
||||||
|
`False` to use decimal multiples of bytes (base-10).
|
||||||
|
Returns:
|
||||||
|
integer/None Integer if successful conversion, otherwise None
|
||||||
|
|
||||||
|
This function knows how to parse sizes in bytes, kilobytes, megabytes,
|
||||||
|
gigabytes, terabytes and petabytes. Some examples:
|
||||||
|
|
||||||
|
>>> convert_size_to_int('42')
|
||||||
|
42
|
||||||
|
>>> convert_size_to_int('13b')
|
||||||
|
13
|
||||||
|
>>> convert_size_to_int('5 bytes')
|
||||||
|
5
|
||||||
|
>>> convert_size_to_int('1 KB')
|
||||||
|
1000
|
||||||
|
>>> convert_size_to_int('1 kilobyte')
|
||||||
|
1000
|
||||||
|
>>> convert_size_to_int('1 KiB')
|
||||||
|
1024
|
||||||
|
>>> convert_size_to_int('1 KB', binary=True)
|
||||||
|
1024
|
||||||
|
>>> convert_size_to_int('1.5 GB')
|
||||||
|
1500000000
|
||||||
|
>>> convert_size_to_int('1.5 GB', binary=True)
|
||||||
|
1610612736
|
||||||
|
|
||||||
<a id="jc.utils.input_type_check"></a>
|
<a id="jc.utils.input_type_check"></a>
|
||||||
|
|
||||||
### input\_type\_check
|
### input\_type\_check
|
||||||
|
112
jc/utils.py
112
jc/utils.py
@ -3,6 +3,8 @@ import sys
|
|||||||
import re
|
import re
|
||||||
import locale
|
import locale
|
||||||
import shutil
|
import shutil
|
||||||
|
from collections import namedtuple
|
||||||
|
from numbers import Number
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from textwrap import TextWrapper
|
from textwrap import TextWrapper
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
@ -274,6 +276,116 @@ def convert_to_bool(value: object) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# convert_size_to_int from https://github.com/xolox/python-humanfriendly
|
||||||
|
|
||||||
|
# Copyright (c) 2021 Peter Odding
|
||||||
|
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
# a copy of this software and associated documentation files (the
|
||||||
|
# "Software"), to deal in the Software without restriction, including
|
||||||
|
# without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
# permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
# the following conditions:
|
||||||
|
|
||||||
|
# The above copyright notice and this permission notice shall be
|
||||||
|
# included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||||
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
def convert_size_to_int(size: str, binary: bool = False) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
Parse a human readable data size and return the number of bytes.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
size: (string) The human readable file size to parse.
|
||||||
|
binary: (boolean) `True` to use binary multiples of bytes
|
||||||
|
(base-2) for ambiguous unit symbols and names,
|
||||||
|
`False` to use decimal multiples of bytes (base-10).
|
||||||
|
Returns:
|
||||||
|
integer/None Integer if successful conversion, otherwise None
|
||||||
|
|
||||||
|
This function knows how to parse sizes in bytes, kilobytes, megabytes,
|
||||||
|
gigabytes, terabytes and petabytes. Some examples:
|
||||||
|
|
||||||
|
>>> convert_size_to_int('42')
|
||||||
|
42
|
||||||
|
>>> convert_size_to_int('13b')
|
||||||
|
13
|
||||||
|
>>> convert_size_to_int('5 bytes')
|
||||||
|
5
|
||||||
|
>>> convert_size_to_int('1 KB')
|
||||||
|
1000
|
||||||
|
>>> convert_size_to_int('1 kilobyte')
|
||||||
|
1000
|
||||||
|
>>> convert_size_to_int('1 KiB')
|
||||||
|
1024
|
||||||
|
>>> convert_size_to_int('1 KB', binary=True)
|
||||||
|
1024
|
||||||
|
>>> convert_size_to_int('1.5 GB')
|
||||||
|
1500000000
|
||||||
|
>>> convert_size_to_int('1.5 GB', binary=True)
|
||||||
|
1610612736
|
||||||
|
"""
|
||||||
|
def tokenize(text: str) -> List[str]:
|
||||||
|
tokenized_input: List = []
|
||||||
|
for token in re.split(r'(\d+(?:\.\d+)?)', text):
|
||||||
|
token = token.strip()
|
||||||
|
if re.match(r'\d+\.\d+', token):
|
||||||
|
tokenized_input.append(float(token))
|
||||||
|
elif token.isdigit():
|
||||||
|
tokenized_input.append(int(token))
|
||||||
|
elif token:
|
||||||
|
tokenized_input.append(token)
|
||||||
|
return tokenized_input
|
||||||
|
|
||||||
|
SizeUnit = namedtuple('SizeUnit', 'divider, symbol, name')
|
||||||
|
CombinedUnit = namedtuple('CombinedUnit', 'decimal, binary')
|
||||||
|
disk_size_units = (
|
||||||
|
CombinedUnit(SizeUnit(1000**1, 'KB', 'kilobyte'), SizeUnit(1024**1, 'KiB', 'kibibyte')),
|
||||||
|
CombinedUnit(SizeUnit(1000**2, 'MB', 'megabyte'), SizeUnit(1024**2, 'MiB', 'mebibyte')),
|
||||||
|
CombinedUnit(SizeUnit(1000**3, 'GB', 'gigabyte'), SizeUnit(1024**3, 'GiB', 'gibibyte')),
|
||||||
|
CombinedUnit(SizeUnit(1000**4, 'TB', 'terabyte'), SizeUnit(1024**4, 'TiB', 'tebibyte')),
|
||||||
|
CombinedUnit(SizeUnit(1000**5, 'PB', 'petabyte'), SizeUnit(1024**5, 'PiB', 'pebibyte')),
|
||||||
|
CombinedUnit(SizeUnit(1000**6, 'EB', 'exabyte'), SizeUnit(1024**6, 'EiB', 'exbibyte')),
|
||||||
|
CombinedUnit(SizeUnit(1000**7, 'ZB', 'zettabyte'), SizeUnit(1024**7, 'ZiB', 'zebibyte')),
|
||||||
|
CombinedUnit(SizeUnit(1000**8, 'YB', 'yottabyte'), SizeUnit(1024**8, 'YiB', 'yobibyte')),
|
||||||
|
)
|
||||||
|
tokens = tokenize(size)
|
||||||
|
if tokens and isinstance(tokens[0], Number):
|
||||||
|
# Get the normalized unit (if any) from the tokenized input.
|
||||||
|
normalized_unit = tokens[1].lower() if len(tokens) == 2 and isinstance(tokens[1], str) else ''
|
||||||
|
# If the input contains only a number, it's assumed to be the number of
|
||||||
|
# bytes. The second token can also explicitly reference the unit bytes.
|
||||||
|
if len(tokens) == 1 or normalized_unit.startswith('b'):
|
||||||
|
return int(tokens[0])
|
||||||
|
# Otherwise we expect two tokens: A number and a unit.
|
||||||
|
if normalized_unit:
|
||||||
|
# Convert plural units to singular units, for details:
|
||||||
|
# https://github.com/xolox/python-humanfriendly/issues/26
|
||||||
|
normalized_unit = normalized_unit.rstrip('s')
|
||||||
|
for unit in disk_size_units:
|
||||||
|
# First we check for unambiguous symbols (KiB, MiB, GiB, etc)
|
||||||
|
# and names (kibibyte, mebibyte, gibibyte, etc) because their
|
||||||
|
# handling is always the same.
|
||||||
|
if normalized_unit in (unit.binary.symbol.lower(), unit.binary.name.lower()):
|
||||||
|
return int(tokens[0] * unit.binary.divider)
|
||||||
|
# Now we will deal with ambiguous prefixes (K, M, G, etc),
|
||||||
|
# symbols (KB, MB, GB, etc) and names (kilobyte, megabyte,
|
||||||
|
# gigabyte, etc) according to the caller's preference.
|
||||||
|
if (normalized_unit in (unit.decimal.symbol.lower(), unit.decimal.name.lower()) or
|
||||||
|
normalized_unit.startswith(unit.decimal.symbol[0].lower())):
|
||||||
|
return int(tokens[0] * (unit.binary.divider if binary else unit.decimal.divider))
|
||||||
|
# We failed to parse the size specification.
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def input_type_check(data: object) -> None:
|
def input_type_check(data: object) -> None:
|
||||||
"""Ensure input data is a string. Raises `TypeError` if not."""
|
"""Ensure input data is a string. Raises `TypeError` if not."""
|
||||||
if not isinstance(data, str):
|
if not isinstance(data, str):
|
||||||
|
Reference in New Issue
Block a user