add line_slice function

2025-08-06 22:32:54 +02:00 · 2024-01-05 15:05:59 -08:00
parent 3a95407161
commit c97a8fb48f
3 changed files with 106 additions and 1 deletions
--- a/docs/utils.md
+++ b/docs/utils.md
@ -11,6 +11,7 @@
  * [convert\_to\_bool](#jc.utils.convert_to_bool)
  * [convert\_size\_to\_int](#jc.utils.convert_size_to_int)
  * [input\_type\_check](#jc.utils.input_type_check)
  * [line\_slice](#jc.utils.line_slice)
  * [timestamp](#jc.utils.timestamp)
    * [\_\_init\_\_](#jc.utils.timestamp.__init__)
@ -231,6 +232,35 @@ def input_type_check(data: object) -> None
 Ensure input data is a string. Raises `TypeError` if not.
 <a id="jc.utils.line_slice"></a>
 ### line\_slice
 ```python
 def line_slice(data: Union[str, Iterable],
               slice_start: Optional[int] = None,
               slice_end: Optional[int] = None) -> Union[str, Iterable]
 ```
 Slice input data by lines - lazily, if possible.
 Accepts a string (for normal parsers) or an iterable (for streaming
 parsers). Uses normal start/stop slicing values, but will always slice
 on lines instead of characters. Positive slices will use less memory as
 the function will attempt to lazily iterate over the input. A negative
 slice parameter will force the function to read in all of the data and
 then slice, which will use more memory.
 Parameters:
    data:              (string or iterable) - input to slice by lines
    slice_start:       (int) - starting line
    slice_end:         (int) - ending line
 Returns:
    string if input is a string.
    iterable of strings if input is an iterable (for streaming parsers)
 <a id="jc.utils.timestamp"></a>
 ### timestamp Objects
--- a/jc/utils.py
+++ b/jc/utils.py
@ -3,6 +3,7 @@ import sys
 import re
 import locale
 import shutil
 from itertools import islice
 from collections import namedtuple
 from numbers import Number
 from datetime import datetime, timezone
@ -393,6 +394,80 @@ def input_type_check(data: object) -> None:
        raise TypeError("Input data must be a 'str' object.")
 def _lazy_splitlines(text: str) -> Iterable[str]:
    NEWLINES_PATTERN: str = r'(\r\n|\r|\n)'
    NEWLINES_RE = re.compile(NEWLINES_PATTERN)
    start = 0
    for m in NEWLINES_RE.finditer(text):
        begin, end = m.span()
        if begin != start:
            yield text[start:begin]
        start = end
    if text[start:]:
        yield text[start:]
 def line_slice(
        data: Union[str, Iterable],
        slice_start: Optional[int] = None,
        slice_end: Optional[int] = None
 ) -> Union[str, Iterable]:
    """
    Slice input data by lines - lazily, if possible.
    Accepts a string (for normal parsers) or an iterable (for streaming
    parsers). Uses normal start/stop slicing values, but will always slice
    on lines instead of characters. Positive slices will use less memory as
    the function will attempt to lazily iterate over the input. A negative
    slice parameter will force the function to read in all of the data and
    then slice, which will use more memory.
    Parameters:
        data:              (string or iterable) - input to slice by lines
        slice_start:       (int) - starting line
        slice_end:         (int) - ending line
    Returns:
        string if input is a string.
        iterable of strings if input is an iterable (for streaming parsers)
    """
    if not slice_start is None or not slice_end is None:
        # standard parsers UTF-8 input
        if isinstance(data, str):
            data_iter = _lazy_splitlines(data)
            # positive slices
            if (slice_start is None or slice_start >= 0) \
                and (slice_end is None or slice_end >= 0):
                return '\n'.join(islice(data_iter, slice_start, slice_end))
            # negative slices found (non-lazy, uses more memory)
            else:
                return '\n'.join(list(data_iter)[slice_start:slice_end])
        # standard parsers bytes input
        elif isinstance(data, bytes):
            raise ValueError('Cannot slice bytes data.')
        # streaming parsers UTF-8 input
        else:
            # positive slices
            if (slice_start is None or slice_start >= 0) \
                and (slice_end is None or slice_end >= 0) \
                and data:
                return islice(data, slice_start, slice_end)
            # negative slices found (non-lazy, uses more memory)
            elif data:
                return list(data)[slice_start:slice_end]
    return data
 class timestamp:
    __slots__ = ('string', 'format', 'naive', 'utc', 'iso')
--- a/man/jc.1
+++ b/man/jc.1
@ -1,4 +1,4 @@
-.TH jc 1 2024-01-04 1.24.1 "JSON Convert"
+.TH jc 1 2024-01-05 1.24.1 "JSON Convert"
 .SH NAME
 \fBjc\fP \- JSON Convert JSONifies the output of many CLI tools, file-types,
 and strings