1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00

add line_slice function

This commit is contained in:
Kelly Brazil
2024-01-05 15:05:59 -08:00
parent 3a95407161
commit c97a8fb48f
3 changed files with 106 additions and 1 deletions

View File

@ -11,6 +11,7 @@
* [convert\_to\_bool](#jc.utils.convert_to_bool) * [convert\_to\_bool](#jc.utils.convert_to_bool)
* [convert\_size\_to\_int](#jc.utils.convert_size_to_int) * [convert\_size\_to\_int](#jc.utils.convert_size_to_int)
* [input\_type\_check](#jc.utils.input_type_check) * [input\_type\_check](#jc.utils.input_type_check)
* [line\_slice](#jc.utils.line_slice)
* [timestamp](#jc.utils.timestamp) * [timestamp](#jc.utils.timestamp)
* [\_\_init\_\_](#jc.utils.timestamp.__init__) * [\_\_init\_\_](#jc.utils.timestamp.__init__)
@ -231,6 +232,35 @@ def input_type_check(data: object) -> None
Ensure input data is a string. Raises `TypeError` if not. Ensure input data is a string. Raises `TypeError` if not.
<a id="jc.utils.line_slice"></a>
### line\_slice
```python
def line_slice(data: Union[str, Iterable],
slice_start: Optional[int] = None,
slice_end: Optional[int] = None) -> Union[str, Iterable]
```
Slice input data by lines - lazily, if possible.
Accepts a string (for normal parsers) or an iterable (for streaming
parsers). Uses normal start/stop slicing values, but will always slice
on lines instead of characters. Positive slices will use less memory as
the function will attempt to lazily iterate over the input. A negative
slice parameter will force the function to read in all of the data and
then slice, which will use more memory.
Parameters:
data: (string or iterable) - input to slice by lines
slice_start: (int) - starting line
slice_end: (int) - ending line
Returns:
string if input is a string.
iterable of strings if input is an iterable (for streaming parsers)
<a id="jc.utils.timestamp"></a> <a id="jc.utils.timestamp"></a>
### timestamp Objects ### timestamp Objects

View File

@ -3,6 +3,7 @@ import sys
import re import re
import locale import locale
import shutil import shutil
from itertools import islice
from collections import namedtuple from collections import namedtuple
from numbers import Number from numbers import Number
from datetime import datetime, timezone from datetime import datetime, timezone
@ -393,6 +394,80 @@ def input_type_check(data: object) -> None:
raise TypeError("Input data must be a 'str' object.") raise TypeError("Input data must be a 'str' object.")
def _lazy_splitlines(text: str) -> Iterable[str]:
NEWLINES_PATTERN: str = r'(\r\n|\r|\n)'
NEWLINES_RE = re.compile(NEWLINES_PATTERN)
start = 0
for m in NEWLINES_RE.finditer(text):
begin, end = m.span()
if begin != start:
yield text[start:begin]
start = end
if text[start:]:
yield text[start:]
def line_slice(
data: Union[str, Iterable],
slice_start: Optional[int] = None,
slice_end: Optional[int] = None
) -> Union[str, Iterable]:
"""
Slice input data by lines - lazily, if possible.
Accepts a string (for normal parsers) or an iterable (for streaming
parsers). Uses normal start/stop slicing values, but will always slice
on lines instead of characters. Positive slices will use less memory as
the function will attempt to lazily iterate over the input. A negative
slice parameter will force the function to read in all of the data and
then slice, which will use more memory.
Parameters:
data: (string or iterable) - input to slice by lines
slice_start: (int) - starting line
slice_end: (int) - ending line
Returns:
string if input is a string.
iterable of strings if input is an iterable (for streaming parsers)
"""
if not slice_start is None or not slice_end is None:
# standard parsers UTF-8 input
if isinstance(data, str):
data_iter = _lazy_splitlines(data)
# positive slices
if (slice_start is None or slice_start >= 0) \
and (slice_end is None or slice_end >= 0):
return '\n'.join(islice(data_iter, slice_start, slice_end))
# negative slices found (non-lazy, uses more memory)
else:
return '\n'.join(list(data_iter)[slice_start:slice_end])
# standard parsers bytes input
elif isinstance(data, bytes):
raise ValueError('Cannot slice bytes data.')
# streaming parsers UTF-8 input
else:
# positive slices
if (slice_start is None or slice_start >= 0) \
and (slice_end is None or slice_end >= 0) \
and data:
return islice(data, slice_start, slice_end)
# negative slices found (non-lazy, uses more memory)
elif data:
return list(data)[slice_start:slice_end]
return data
class timestamp: class timestamp:
__slots__ = ('string', 'format', 'naive', 'utc', 'iso') __slots__ = ('string', 'format', 'naive', 'utc', 'iso')

View File

@ -1,4 +1,4 @@
.TH jc 1 2024-01-04 1.24.1 "JSON Convert" .TH jc 1 2024-01-05 1.24.1 "JSON Convert"
.SH NAME .SH NAME
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools, file-types, \fBjc\fP \- JSON Convert JSONifies the output of many CLI tools, file-types,
and strings and strings