From d54a81831f88fbb3cc3ad6d4a535d29eee27e739 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Fri, 5 Jan 2024 17:24:32 -0800 Subject: [PATCH] update slicer to use utils.line_slice() --- jc/cli.py | 45 +-------------------------------------------- jc/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 46 deletions(-) diff --git a/jc/cli.py b/jc/cli.py index d062d4e1..72f68677 100644 --- a/jc/cli.py +++ b/jc/cli.py @@ -652,18 +652,6 @@ class JcCli(): utils.error_message(['Parser returned an unsupported object type.']) self.exit_error() - @staticmethod - def lazy_splitlines(text: str) -> Iterable[str]: - start = 0 - for m in NEWLINES_RE.finditer(text): - begin, end = m.span() - if begin != start: - yield text[start:begin] - start = end - - if text[start:]: - yield text[start:] - def slicer(self) -> None: """Slice input data lazily, if possible. Updates self.data_in""" if self.slice_str: @@ -673,37 +661,7 @@ class JcCli(): if slice_end_str: self.slice_end = int(slice_end_str) - if not self.slice_start is None or not self.slice_end is None: - # standard parsers UTF-8 input - if isinstance(self.data_in, str): - data_in_iter = self.lazy_splitlines(self.data_in) - - # positive slices - if (self.slice_start is None or self.slice_start >= 0) \ - and (self.slice_end is None or self.slice_end >= 0): - - self.data_in = '\n'.join(islice(data_in_iter, self.slice_start, self.slice_end)) - - # negative slices found (non-lazy, uses more memory) - else: - self.data_in = '\n'.join(list(data_in_iter)[self.slice_start:self.slice_end]) - - # standard parsers bytes input - elif isinstance(self.data_in, bytes): - utils.warning_message(['Cannot slice bytes data.']) - - # streaming parsers UTF-8 input - else: - # positive slices - if (self.slice_start is None or self.slice_start >= 0) \ - and (self.slice_end is None or self.slice_end >= 0) \ - and self.data_in: - - self.data_in = islice(self.data_in, self.slice_start, self.slice_end) - - # negative slices found (non-lazy, uses more memory) - elif self.data_in: - self.data_in = list(self.data_in)[self.slice_start:self.slice_end] + self.data_in = utils.line_slice(self.data_in, self.slice_start, self.slice_end) def create_slurp_output(self) -> None: """Slurp output into an array. Only works for single-line strings.""" @@ -742,7 +700,6 @@ class JcCli(): self.run_timestamp = datetime.now(timezone.utc) self.add_metadata_to_output() - def streaming_parse_and_print(self) -> None: """only supports UTF-8 string data for now""" self.data_in = sys.stdin diff --git a/jc/utils.py b/jc/utils.py index d616d6f0..e6f2cbdc 100644 --- a/jc/utils.py +++ b/jc/utils.py @@ -409,10 +409,10 @@ def _lazy_splitlines(text: str) -> Iterable[str]: def line_slice( - data: Union[str, Iterable], + data: Union[str, Iterable[str], TextIO, bytes, None], slice_start: Optional[int] = None, slice_end: Optional[int] = None -) -> Union[str, Iterable]: +) -> Union[str, Iterable[str], TextIO, bytes, None]: """ Slice input data by lines - lazily, if possible.