mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-07-15 01:24:29 +02:00
@ -161,6 +161,7 @@ option.
|
||||
| ` --asciitable` | ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable) |
|
||||
| ` --asciitable-m` | multi-line ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable_m) |
|
||||
| ` --blkid` | `blkid` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/blkid) |
|
||||
| ` --cbt` | `cbt` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cbt) |
|
||||
| ` --cef` | CEF string parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef) |
|
||||
| ` --cef-s` | CEF string streaming parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef_s) |
|
||||
| ` --chage` | `chage --list` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/chage) |
|
||||
|
191
jc/parsers/cbt.py
Normal file
191
jc/parsers/cbt.py
Normal file
@ -0,0 +1,191 @@
|
||||
"""jc - JSON Convert `cbt` command output parser
|
||||
|
||||
Parses the human-, but not machine-, friendly output of the cbt command (for Google's BigTable).
|
||||
|
||||
No effort is made to convert the data types of the values in the cells.
|
||||
|
||||
The timestamps of the cells are converted to Python's isoformat.
|
||||
|
||||
Raw output contains all cells for each column (including timestamps in converted to Python's isoformat),
|
||||
while the normal output contains only the latest value for each column.
|
||||
|
||||
Usage (cli):
|
||||
|
||||
$ cbt | jc --cbt
|
||||
|
||||
or
|
||||
|
||||
$ jc cbt
|
||||
|
||||
Usage (module):
|
||||
|
||||
import jc
|
||||
result = jc.parse('cbt', cbt_command_output)
|
||||
|
||||
Schema:
|
||||
|
||||
[
|
||||
{
|
||||
"key": string,
|
||||
"cells": {
|
||||
string: {
|
||||
string: string
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Schema (raw):
|
||||
|
||||
[
|
||||
{
|
||||
"key": string,
|
||||
"cells": [
|
||||
{
|
||||
"column_family": string,
|
||||
"column": string,
|
||||
"timestamp": string,
|
||||
"value": string
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
Examples:
|
||||
|
||||
$ cbt -project=$PROJECT -instance=$INSTANCE lookup $TABLE foo | jc --cbt -p
|
||||
[
|
||||
{
|
||||
"key": "foo",
|
||||
"cells": {
|
||||
"foo": {
|
||||
"bar": "baz"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
$ cbt -project=$PROJECT -instance=$INSTANCE lookup $TABLE foo | jc --cbt -p -r
|
||||
[
|
||||
{
|
||||
"key": "foo",
|
||||
"cells": [
|
||||
{
|
||||
"column_family": "foo",
|
||||
"column": "bar",
|
||||
"timestamp": "1970-01-01T01:00:00",
|
||||
"value": "baz"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
"""
|
||||
import datetime
|
||||
from itertools import groupby
|
||||
from typing import List, Dict
|
||||
from jc.jc_types import JSONDictType
|
||||
import jc.utils
|
||||
|
||||
|
||||
class info():
|
||||
"""Provides parser metadata (version, author, etc.)"""
|
||||
version = '1.0'
|
||||
description = '`cbt` command parser'
|
||||
author = 'Andreas Weiden'
|
||||
author_email = 'andreas.weiden@gmail.com'
|
||||
# details = 'enter any other details here'
|
||||
|
||||
# compatible options: linux, darwin, cygwin, win32, aix, freebsd
|
||||
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
|
||||
magic_commands = ['cbt']
|
||||
|
||||
|
||||
__version__ = info.version
|
||||
|
||||
|
||||
def _process(proc_data: List[JSONDictType]) -> List[JSONDictType]:
|
||||
"""
|
||||
Final processing to conform to the schema.
|
||||
|
||||
Parameters:
|
||||
|
||||
proc_data: (List of Dictionaries) raw structured data to process
|
||||
|
||||
Returns:
|
||||
|
||||
List of Dictionaries. Structured to conform to the schema.
|
||||
"""
|
||||
|
||||
# process the data here
|
||||
# rebuild output for added semantic information
|
||||
# use helper functions in jc.utils for int, float, bool
|
||||
# conversions and timestamps
|
||||
out_data = []
|
||||
for row in proc_data:
|
||||
cells = {}
|
||||
key_func = lambda cell: (cell["column_family"], cell["column"])
|
||||
all_cells = sorted(row["cells"], key=key_func)
|
||||
for (column_family, column), group in groupby(all_cells, key=key_func):
|
||||
group = sorted(group, key=lambda cell: cell["timestamp"], reverse=True)
|
||||
if column_family not in cells:
|
||||
cells[column_family] = {}
|
||||
cells[column_family][column] = group[0]["value"]
|
||||
row["cells"] = cells
|
||||
out_data.append(row)
|
||||
return out_data
|
||||
|
||||
|
||||
def parse(
|
||||
data: str,
|
||||
raw: bool = False,
|
||||
quiet: bool = False
|
||||
) -> List[JSONDictType]:
|
||||
"""
|
||||
Main text parsing function
|
||||
|
||||
Parameters:
|
||||
|
||||
data: (string) text data to parse
|
||||
raw: (boolean) unprocessed output if True
|
||||
quiet: (boolean) suppress warning messages if True
|
||||
|
||||
Returns:
|
||||
|
||||
List of Dictionaries. Raw or processed structured data.
|
||||
"""
|
||||
jc.utils.compatibility(__name__, info.compatible, quiet)
|
||||
jc.utils.input_type_check(data)
|
||||
|
||||
raw_output: List[Dict] = []
|
||||
|
||||
if jc.utils.has_data(data):
|
||||
for line in filter(None, data.split("-" * 40)):
|
||||
# parse the content here
|
||||
# check out helper functions in jc.utils
|
||||
# and jc.parsers.universal
|
||||
key = None
|
||||
cells = []
|
||||
column_name = ""
|
||||
timestamp = None
|
||||
value_next = False
|
||||
for field in line.splitlines():
|
||||
if not field.strip():
|
||||
continue
|
||||
if field.startswith(" " * 4):
|
||||
value = field.strip(' "')
|
||||
if value_next:
|
||||
cells.append({
|
||||
"column_family": column_name.split(":", 1)[0],
|
||||
"column": column_name.split(":", 1)[1],
|
||||
"timestamp": datetime.datetime.strptime(timestamp, "%Y/%m/%d-%H:%M:%S.%f").isoformat(),
|
||||
"value": value
|
||||
})
|
||||
elif field.startswith(" " * 2):
|
||||
column_name, timestamp = map(str.strip, field.split("@"))
|
||||
value_next = True
|
||||
else:
|
||||
key = field
|
||||
if key is not None:
|
||||
raw_output.append({"key": key, "cells": cells})
|
||||
|
||||
return raw_output if raw else _process(raw_output)
|
1
tests/fixtures/generic/cbt-multiple-columns.json
vendored
Normal file
1
tests/fixtures/generic/cbt-multiple-columns.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
[{"key":"foo","cells":{"bat":{"bar":"baz"},"foo":{"bar1":"baz1","bar2":"baz2"}}}]
|
8
tests/fixtures/generic/cbt-multiple-columns.out
vendored
Normal file
8
tests/fixtures/generic/cbt-multiple-columns.out
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
----------------------------------------
|
||||
foo
|
||||
foo:bar1 @ 1970/01/01-01:00:00.000000
|
||||
"baz1"
|
||||
foo:bar2 @ 1970/01/01-01:00:00.000000
|
||||
"baz2"
|
||||
bat:bar @ 1970/01/01-01:00:00.000000
|
||||
"baz"
|
1
tests/fixtures/generic/cbt-multiple-rows-raw.json
vendored
Normal file
1
tests/fixtures/generic/cbt-multiple-rows-raw.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
[{"key":"foo","cells":[{"column_family":"foo","column":"bar","timestamp":"1970-01-01T01:00:00","value":"baz1"}]},{"key":"bar","cells":[{"column_family":"foo","column":"bar","timestamp":"1970-01-01T01:00:00","value":"baz2"}]}]
|
1
tests/fixtures/generic/cbt-multiple-rows.json
vendored
Normal file
1
tests/fixtures/generic/cbt-multiple-rows.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
[{"key":"foo","cells":{"foo":{"bar":"baz1"}}},{"key":"bar","cells":{"foo":{"bar":"baz2"}}}]
|
8
tests/fixtures/generic/cbt-multiple-rows.out
vendored
Normal file
8
tests/fixtures/generic/cbt-multiple-rows.out
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
----------------------------------------
|
||||
foo
|
||||
foo:bar @ 1970/01/01-01:00:00.000000
|
||||
"baz1"
|
||||
----------------------------------------
|
||||
bar
|
||||
foo:bar @ 1970/01/01-01:00:00.000000
|
||||
"baz2"
|
1
tests/fixtures/generic/cbt-single.json
vendored
Normal file
1
tests/fixtures/generic/cbt-single.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
[{"key":"foo","cells":{"foo":{"bar":"baz"}}}]
|
4
tests/fixtures/generic/cbt-single.out
vendored
Normal file
4
tests/fixtures/generic/cbt-single.out
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
----------------------------------------
|
||||
foo
|
||||
foo:bar @ 1970/01/01-01:00:00.000000
|
||||
"baz"
|
64
tests/test_cbt.py
Normal file
64
tests/test_cbt.py
Normal file
@ -0,0 +1,64 @@
|
||||
import json
|
||||
import os
|
||||
import unittest
|
||||
from jc.exceptions import ParseError
|
||||
import jc.parsers.cbt
|
||||
|
||||
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class MyTests(unittest.TestCase):
|
||||
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-single.out'), 'r', encoding='utf-8') as f:
|
||||
single = f.read()
|
||||
|
||||
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-columns.out'), 'r', encoding='utf-8') as f:
|
||||
multiple_columns = f.read()
|
||||
|
||||
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-rows.out'), 'r', encoding='utf-8') as f:
|
||||
multiple_rows = f.read()
|
||||
|
||||
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-single.json'), 'r', encoding='utf-8') as f:
|
||||
single_json = json.loads(f.read())
|
||||
|
||||
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-columns.json'), 'r', encoding='utf-8') as f:
|
||||
multiple_columns_json = json.loads(f.read())
|
||||
|
||||
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-rows.json'), 'r', encoding='utf-8') as f:
|
||||
multiple_rows_json = json.loads(f.read())
|
||||
|
||||
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-rows-raw.json'), 'r', encoding='utf-8') as f:
|
||||
multiple_rows_raw_json = json.loads(f.read())
|
||||
|
||||
def test_cbt_nodata(self):
|
||||
"""
|
||||
Test 'cbt' with no data
|
||||
"""
|
||||
self.assertEqual(jc.parsers.cbt.parse('', quiet=True), [])
|
||||
|
||||
def test_cbt_single_row(self):
|
||||
"""
|
||||
Test 'cbt' with a single row
|
||||
"""
|
||||
self.assertEqual(jc.parsers.cbt.parse(self.single, quiet=True), self.single_json)
|
||||
|
||||
def test_cbt_multiple_column_families(self):
|
||||
"""
|
||||
Test 'cbt' with multiple columns from multiple column families
|
||||
"""
|
||||
self.assertEqual(jc.parsers.cbt.parse(self.multiple_columns, quiet=True), self.multiple_columns_json)
|
||||
|
||||
def test_cbt_multiple_rows(self):
|
||||
"""
|
||||
Test 'cbt' with multiple rows
|
||||
"""
|
||||
self.assertEqual(jc.parsers.cbt.parse(self.multiple_rows, quiet=True), self.multiple_rows_json)
|
||||
|
||||
def test_cbt_multiple_rows_raw(self):
|
||||
"""
|
||||
Test 'cbt' with multiple rows raw
|
||||
"""
|
||||
self.assertEqual(jc.parsers.cbt.parse(self.multiple_rows, quiet=True, raw=True), self.multiple_rows_raw_json)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Reference in New Issue
Block a user