diff --git a/docs/utils.md b/docs/utils.md
index a4d45bc0..78c7ac29 100644
--- a/docs/utils.md
+++ b/docs/utils.md
@@ -11,6 +11,7 @@
* [convert\_to\_bool](#jc.utils.convert_to_bool)
* [convert\_size\_to\_int](#jc.utils.convert_size_to_int)
* [input\_type\_check](#jc.utils.input_type_check)
+ * [line\_slice](#jc.utils.line_slice)
* [timestamp](#jc.utils.timestamp)
* [\_\_init\_\_](#jc.utils.timestamp.__init__)
@@ -231,6 +232,35 @@ def input_type_check(data: object) -> None
Ensure input data is a string. Raises `TypeError` if not.
+
+
+### line\_slice
+
+```python
+def line_slice(data: Union[str, Iterable],
+ slice_start: Optional[int] = None,
+ slice_end: Optional[int] = None) -> Union[str, Iterable]
+```
+
+Slice input data by lines - lazily, if possible.
+
+Accepts a string (for normal parsers) or an iterable (for streaming
+parsers). Uses normal start/stop slicing values, but will always slice
+on lines instead of characters. Positive slices will use less memory as
+the function will attempt to lazily iterate over the input. A negative
+slice parameter will force the function to read in all of the data and
+then slice, which will use more memory.
+
+Parameters:
+
+ data: (string or iterable) - input to slice by lines
+ slice_start: (int) - starting line
+ slice_end: (int) - ending line
+
+Returns:
+ string if input is a string.
+ iterable of strings if input is an iterable (for streaming parsers)
+
### timestamp Objects
diff --git a/jc/utils.py b/jc/utils.py
index 257f1f5a..d616d6f0 100644
--- a/jc/utils.py
+++ b/jc/utils.py
@@ -3,6 +3,7 @@ import sys
import re
import locale
import shutil
+from itertools import islice
from collections import namedtuple
from numbers import Number
from datetime import datetime, timezone
@@ -393,6 +394,80 @@ def input_type_check(data: object) -> None:
raise TypeError("Input data must be a 'str' object.")
+def _lazy_splitlines(text: str) -> Iterable[str]:
+ NEWLINES_PATTERN: str = r'(\r\n|\r|\n)'
+ NEWLINES_RE = re.compile(NEWLINES_PATTERN)
+ start = 0
+ for m in NEWLINES_RE.finditer(text):
+ begin, end = m.span()
+ if begin != start:
+ yield text[start:begin]
+ start = end
+
+ if text[start:]:
+ yield text[start:]
+
+
+def line_slice(
+ data: Union[str, Iterable],
+ slice_start: Optional[int] = None,
+ slice_end: Optional[int] = None
+) -> Union[str, Iterable]:
+ """
+ Slice input data by lines - lazily, if possible.
+
+ Accepts a string (for normal parsers) or an iterable (for streaming
+ parsers). Uses normal start/stop slicing values, but will always slice
+ on lines instead of characters. Positive slices will use less memory as
+ the function will attempt to lazily iterate over the input. A negative
+ slice parameter will force the function to read in all of the data and
+ then slice, which will use more memory.
+
+ Parameters:
+
+ data: (string or iterable) - input to slice by lines
+ slice_start: (int) - starting line
+ slice_end: (int) - ending line
+
+ Returns:
+ string if input is a string.
+ iterable of strings if input is an iterable (for streaming parsers)
+ """
+ if not slice_start is None or not slice_end is None:
+ # standard parsers UTF-8 input
+ if isinstance(data, str):
+ data_iter = _lazy_splitlines(data)
+
+ # positive slices
+ if (slice_start is None or slice_start >= 0) \
+ and (slice_end is None or slice_end >= 0):
+
+ return '\n'.join(islice(data_iter, slice_start, slice_end))
+
+ # negative slices found (non-lazy, uses more memory)
+ else:
+ return '\n'.join(list(data_iter)[slice_start:slice_end])
+
+ # standard parsers bytes input
+ elif isinstance(data, bytes):
+ raise ValueError('Cannot slice bytes data.')
+
+ # streaming parsers UTF-8 input
+ else:
+ # positive slices
+ if (slice_start is None or slice_start >= 0) \
+ and (slice_end is None or slice_end >= 0) \
+ and data:
+
+ return islice(data, slice_start, slice_end)
+
+ # negative slices found (non-lazy, uses more memory)
+ elif data:
+ return list(data)[slice_start:slice_end]
+
+ return data
+
+
class timestamp:
__slots__ = ('string', 'format', 'naive', 'utc', 'iso')
diff --git a/man/jc.1 b/man/jc.1
index 7635ae8a..3e0a8e35 100644
--- a/man/jc.1
+++ b/man/jc.1
@@ -1,4 +1,4 @@
-.TH jc 1 2024-01-04 1.24.1 "JSON Convert"
+.TH jc 1 2024-01-05 1.24.1 "JSON Convert"
.SH NAME
\fBjc\fP \- JSON Convert JSONifies the output of many CLI tools, file-types,
and strings