mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-17 00:07:37 +02:00
add line_slice function
This commit is contained in:
@ -11,6 +11,7 @@
|
|||||||
* [convert\_to\_bool](#jc.utils.convert_to_bool)
|
* [convert\_to\_bool](#jc.utils.convert_to_bool)
|
||||||
* [convert\_size\_to\_int](#jc.utils.convert_size_to_int)
|
* [convert\_size\_to\_int](#jc.utils.convert_size_to_int)
|
||||||
* [input\_type\_check](#jc.utils.input_type_check)
|
* [input\_type\_check](#jc.utils.input_type_check)
|
||||||
|
* [line\_slice](#jc.utils.line_slice)
|
||||||
* [timestamp](#jc.utils.timestamp)
|
* [timestamp](#jc.utils.timestamp)
|
||||||
* [\_\_init\_\_](#jc.utils.timestamp.__init__)
|
* [\_\_init\_\_](#jc.utils.timestamp.__init__)
|
||||||
|
|
||||||
@ -231,6 +232,35 @@ def input_type_check(data: object) -> None
|
|||||||
|
|
||||||
Ensure input data is a string. Raises `TypeError` if not.
|
Ensure input data is a string. Raises `TypeError` if not.
|
||||||
|
|
||||||
|
<a id="jc.utils.line_slice"></a>
|
||||||
|
|
||||||
|
### line\_slice
|
||||||
|
|
||||||
|
```python
|
||||||
|
def line_slice(data: Union[str, Iterable],
|
||||||
|
slice_start: Optional[int] = None,
|
||||||
|
slice_end: Optional[int] = None) -> Union[str, Iterable]
|
||||||
|
```
|
||||||
|
|
||||||
|
Slice input data by lines - lazily, if possible.
|
||||||
|
|
||||||
|
Accepts a string (for normal parsers) or an iterable (for streaming
|
||||||
|
parsers). Uses normal start/stop slicing values, but will always slice
|
||||||
|
on lines instead of characters. Positive slices will use less memory as
|
||||||
|
the function will attempt to lazily iterate over the input. A negative
|
||||||
|
slice parameter will force the function to read in all of the data and
|
||||||
|
then slice, which will use more memory.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
data: (string or iterable) - input to slice by lines
|
||||||
|
slice_start: (int) - starting line
|
||||||
|
slice_end: (int) - ending line
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
string if input is a string.
|
||||||
|
iterable of strings if input is an iterable (for streaming parsers)
|
||||||
|
|
||||||
<a id="jc.utils.timestamp"></a>
|
<a id="jc.utils.timestamp"></a>
|
||||||
|
|
||||||
### timestamp Objects
|
### timestamp Objects
|
||||||
|
75
jc/utils.py
75
jc/utils.py
@ -3,6 +3,7 @@ import sys
|
|||||||
import re
|
import re
|
||||||
import locale
|
import locale
|
||||||
import shutil
|
import shutil
|
||||||
|
from itertools import islice
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from numbers import Number
|
from numbers import Number
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
@ -393,6 +394,80 @@ def input_type_check(data: object) -> None:
|
|||||||
raise TypeError("Input data must be a 'str' object.")
|
raise TypeError("Input data must be a 'str' object.")
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_splitlines(text: str) -> Iterable[str]:
|
||||||
|
NEWLINES_PATTERN: str = r'(\r\n|\r|\n)'
|
||||||
|
NEWLINES_RE = re.compile(NEWLINES_PATTERN)
|
||||||
|
start = 0
|
||||||
|
for m in NEWLINES_RE.finditer(text):
|
||||||
|
begin, end = m.span()
|
||||||
|
if begin != start:
|
||||||
|
yield text[start:begin]
|
||||||
|
start = end
|
||||||
|
|
||||||
|
if text[start:]:
|
||||||
|
yield text[start:]
|
||||||
|
|
||||||
|
|
||||||
|
def line_slice(
|
||||||
|
data: Union[str, Iterable],
|
||||||
|
slice_start: Optional[int] = None,
|
||||||
|
slice_end: Optional[int] = None
|
||||||
|
) -> Union[str, Iterable]:
|
||||||
|
"""
|
||||||
|
Slice input data by lines - lazily, if possible.
|
||||||
|
|
||||||
|
Accepts a string (for normal parsers) or an iterable (for streaming
|
||||||
|
parsers). Uses normal start/stop slicing values, but will always slice
|
||||||
|
on lines instead of characters. Positive slices will use less memory as
|
||||||
|
the function will attempt to lazily iterate over the input. A negative
|
||||||
|
slice parameter will force the function to read in all of the data and
|
||||||
|
then slice, which will use more memory.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
data: (string or iterable) - input to slice by lines
|
||||||
|
slice_start: (int) - starting line
|
||||||
|
slice_end: (int) - ending line
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
string if input is a string.
|
||||||
|
iterable of strings if input is an iterable (for streaming parsers)
|
||||||
|
"""
|
||||||
|
if not slice_start is None or not slice_end is None:
|
||||||
|
# standard parsers UTF-8 input
|
||||||
|
if isinstance(data, str):
|
||||||
|
data_iter = _lazy_splitlines(data)
|
||||||
|
|
||||||
|
# positive slices
|
||||||
|
if (slice_start is None or slice_start >= 0) \
|
||||||
|
and (slice_end is None or slice_end >= 0):
|
||||||
|
|
||||||
|
return '\n'.join(islice(data_iter, slice_start, slice_end))
|
||||||
|
|
||||||
|
# negative slices found (non-lazy, uses more memory)
|
||||||
|
else:
|
||||||
|
return '\n'.join(list(data_iter)[slice_start:slice_end])
|
||||||
|
|
||||||
|
# standard parsers bytes input
|
||||||
|
elif isinstance(data, bytes):
|
||||||
|
raise ValueError('Cannot slice bytes data.')
|
||||||
|
|
||||||
|
# streaming parsers UTF-8 input
|
||||||
|
else:
|
||||||
|
# positive slices
|
||||||
|
if (slice_start is None or slice_start >= 0) \
|
||||||
|
and (slice_end is None or slice_end >= 0) \
|
||||||
|
and data:
|
||||||
|
|
||||||
|
return islice(data, slice_start, slice_end)
|
||||||
|
|
||||||
|
# negative slices found (non-lazy, uses more memory)
|
||||||
|
elif data:
|
||||||
|
return list(data)[slice_start:slice_end]
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
class timestamp:
|
class timestamp:
|
||||||
__slots__ = ('string', 'format', 'naive', 'utc', 'iso')
|
__slots__ = ('string', 'format', 'naive', 'utc', 'iso')
|
||||||
|
|
||||||
|
2
man/jc.1
2
man/jc.1
@ -1,4 +1,4 @@
|
|||||||
.TH jc 1 2024-01-04 1.24.1 "JSON Convert"
|
.TH jc 1 2024-01-05 1.24.1 "JSON Convert"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools, file-types,
|
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools, file-types,
|
||||||
and strings
|
and strings
|
||||||
|
Reference in New Issue
Block a user