mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-07-15 01:24:29 +02:00
@ -161,6 +161,7 @@ option.
|
|||||||
| ` --asciitable` | ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable) |
|
| ` --asciitable` | ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable) |
|
||||||
| ` --asciitable-m` | multi-line ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable_m) |
|
| ` --asciitable-m` | multi-line ASCII and Unicode table parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/asciitable_m) |
|
||||||
| ` --blkid` | `blkid` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/blkid) |
|
| ` --blkid` | `blkid` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/blkid) |
|
||||||
|
| ` --cbt` | `cbt` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cbt) |
|
||||||
| ` --cef` | CEF string parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef) |
|
| ` --cef` | CEF string parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef) |
|
||||||
| ` --cef-s` | CEF string streaming parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef_s) |
|
| ` --cef-s` | CEF string streaming parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cef_s) |
|
||||||
| ` --chage` | `chage --list` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/chage) |
|
| ` --chage` | `chage --list` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/chage) |
|
||||||
|
191
jc/parsers/cbt.py
Normal file
191
jc/parsers/cbt.py
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
"""jc - JSON Convert `cbt` command output parser
|
||||||
|
|
||||||
|
Parses the human-, but not machine-, friendly output of the cbt command (for Google's BigTable).
|
||||||
|
|
||||||
|
No effort is made to convert the data types of the values in the cells.
|
||||||
|
|
||||||
|
The timestamps of the cells are converted to Python's isoformat.
|
||||||
|
|
||||||
|
Raw output contains all cells for each column (including timestamps in converted to Python's isoformat),
|
||||||
|
while the normal output contains only the latest value for each column.
|
||||||
|
|
||||||
|
Usage (cli):
|
||||||
|
|
||||||
|
$ cbt | jc --cbt
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
$ jc cbt
|
||||||
|
|
||||||
|
Usage (module):
|
||||||
|
|
||||||
|
import jc
|
||||||
|
result = jc.parse('cbt', cbt_command_output)
|
||||||
|
|
||||||
|
Schema:
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"key": string,
|
||||||
|
"cells": {
|
||||||
|
string: {
|
||||||
|
string: string
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
Schema (raw):
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"key": string,
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"column_family": string,
|
||||||
|
"column": string,
|
||||||
|
"timestamp": string,
|
||||||
|
"value": string
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
$ cbt -project=$PROJECT -instance=$INSTANCE lookup $TABLE foo | jc --cbt -p
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"key": "foo",
|
||||||
|
"cells": {
|
||||||
|
"foo": {
|
||||||
|
"bar": "baz"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
$ cbt -project=$PROJECT -instance=$INSTANCE lookup $TABLE foo | jc --cbt -p -r
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"key": "foo",
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"column_family": "foo",
|
||||||
|
"column": "bar",
|
||||||
|
"timestamp": "1970-01-01T01:00:00",
|
||||||
|
"value": "baz"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
import datetime
|
||||||
|
from itertools import groupby
|
||||||
|
from typing import List, Dict
|
||||||
|
from jc.jc_types import JSONDictType
|
||||||
|
import jc.utils
|
||||||
|
|
||||||
|
|
||||||
|
class info():
|
||||||
|
"""Provides parser metadata (version, author, etc.)"""
|
||||||
|
version = '1.0'
|
||||||
|
description = '`cbt` command parser'
|
||||||
|
author = 'Andreas Weiden'
|
||||||
|
author_email = 'andreas.weiden@gmail.com'
|
||||||
|
# details = 'enter any other details here'
|
||||||
|
|
||||||
|
# compatible options: linux, darwin, cygwin, win32, aix, freebsd
|
||||||
|
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
|
||||||
|
magic_commands = ['cbt']
|
||||||
|
|
||||||
|
|
||||||
|
__version__ = info.version
|
||||||
|
|
||||||
|
|
||||||
|
def _process(proc_data: List[JSONDictType]) -> List[JSONDictType]:
|
||||||
|
"""
|
||||||
|
Final processing to conform to the schema.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
proc_data: (List of Dictionaries) raw structured data to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
List of Dictionaries. Structured to conform to the schema.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# process the data here
|
||||||
|
# rebuild output for added semantic information
|
||||||
|
# use helper functions in jc.utils for int, float, bool
|
||||||
|
# conversions and timestamps
|
||||||
|
out_data = []
|
||||||
|
for row in proc_data:
|
||||||
|
cells = {}
|
||||||
|
key_func = lambda cell: (cell["column_family"], cell["column"])
|
||||||
|
all_cells = sorted(row["cells"], key=key_func)
|
||||||
|
for (column_family, column), group in groupby(all_cells, key=key_func):
|
||||||
|
group = sorted(group, key=lambda cell: cell["timestamp"], reverse=True)
|
||||||
|
if column_family not in cells:
|
||||||
|
cells[column_family] = {}
|
||||||
|
cells[column_family][column] = group[0]["value"]
|
||||||
|
row["cells"] = cells
|
||||||
|
out_data.append(row)
|
||||||
|
return out_data
|
||||||
|
|
||||||
|
|
||||||
|
def parse(
|
||||||
|
data: str,
|
||||||
|
raw: bool = False,
|
||||||
|
quiet: bool = False
|
||||||
|
) -> List[JSONDictType]:
|
||||||
|
"""
|
||||||
|
Main text parsing function
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
data: (string) text data to parse
|
||||||
|
raw: (boolean) unprocessed output if True
|
||||||
|
quiet: (boolean) suppress warning messages if True
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
List of Dictionaries. Raw or processed structured data.
|
||||||
|
"""
|
||||||
|
jc.utils.compatibility(__name__, info.compatible, quiet)
|
||||||
|
jc.utils.input_type_check(data)
|
||||||
|
|
||||||
|
raw_output: List[Dict] = []
|
||||||
|
|
||||||
|
if jc.utils.has_data(data):
|
||||||
|
for line in filter(None, data.split("-" * 40)):
|
||||||
|
# parse the content here
|
||||||
|
# check out helper functions in jc.utils
|
||||||
|
# and jc.parsers.universal
|
||||||
|
key = None
|
||||||
|
cells = []
|
||||||
|
column_name = ""
|
||||||
|
timestamp = None
|
||||||
|
value_next = False
|
||||||
|
for field in line.splitlines():
|
||||||
|
if not field.strip():
|
||||||
|
continue
|
||||||
|
if field.startswith(" " * 4):
|
||||||
|
value = field.strip(' "')
|
||||||
|
if value_next:
|
||||||
|
cells.append({
|
||||||
|
"column_family": column_name.split(":", 1)[0],
|
||||||
|
"column": column_name.split(":", 1)[1],
|
||||||
|
"timestamp": datetime.datetime.strptime(timestamp, "%Y/%m/%d-%H:%M:%S.%f").isoformat(),
|
||||||
|
"value": value
|
||||||
|
})
|
||||||
|
elif field.startswith(" " * 2):
|
||||||
|
column_name, timestamp = map(str.strip, field.split("@"))
|
||||||
|
value_next = True
|
||||||
|
else:
|
||||||
|
key = field
|
||||||
|
if key is not None:
|
||||||
|
raw_output.append({"key": key, "cells": cells})
|
||||||
|
|
||||||
|
return raw_output if raw else _process(raw_output)
|
1
tests/fixtures/generic/cbt-multiple-columns.json
vendored
Normal file
1
tests/fixtures/generic/cbt-multiple-columns.json
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
[{"key":"foo","cells":{"bat":{"bar":"baz"},"foo":{"bar1":"baz1","bar2":"baz2"}}}]
|
8
tests/fixtures/generic/cbt-multiple-columns.out
vendored
Normal file
8
tests/fixtures/generic/cbt-multiple-columns.out
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
----------------------------------------
|
||||||
|
foo
|
||||||
|
foo:bar1 @ 1970/01/01-01:00:00.000000
|
||||||
|
"baz1"
|
||||||
|
foo:bar2 @ 1970/01/01-01:00:00.000000
|
||||||
|
"baz2"
|
||||||
|
bat:bar @ 1970/01/01-01:00:00.000000
|
||||||
|
"baz"
|
1
tests/fixtures/generic/cbt-multiple-rows-raw.json
vendored
Normal file
1
tests/fixtures/generic/cbt-multiple-rows-raw.json
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
[{"key":"foo","cells":[{"column_family":"foo","column":"bar","timestamp":"1970-01-01T01:00:00","value":"baz1"}]},{"key":"bar","cells":[{"column_family":"foo","column":"bar","timestamp":"1970-01-01T01:00:00","value":"baz2"}]}]
|
1
tests/fixtures/generic/cbt-multiple-rows.json
vendored
Normal file
1
tests/fixtures/generic/cbt-multiple-rows.json
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
[{"key":"foo","cells":{"foo":{"bar":"baz1"}}},{"key":"bar","cells":{"foo":{"bar":"baz2"}}}]
|
8
tests/fixtures/generic/cbt-multiple-rows.out
vendored
Normal file
8
tests/fixtures/generic/cbt-multiple-rows.out
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
----------------------------------------
|
||||||
|
foo
|
||||||
|
foo:bar @ 1970/01/01-01:00:00.000000
|
||||||
|
"baz1"
|
||||||
|
----------------------------------------
|
||||||
|
bar
|
||||||
|
foo:bar @ 1970/01/01-01:00:00.000000
|
||||||
|
"baz2"
|
1
tests/fixtures/generic/cbt-single.json
vendored
Normal file
1
tests/fixtures/generic/cbt-single.json
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
[{"key":"foo","cells":{"foo":{"bar":"baz"}}}]
|
4
tests/fixtures/generic/cbt-single.out
vendored
Normal file
4
tests/fixtures/generic/cbt-single.out
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
----------------------------------------
|
||||||
|
foo
|
||||||
|
foo:bar @ 1970/01/01-01:00:00.000000
|
||||||
|
"baz"
|
64
tests/test_cbt.py
Normal file
64
tests/test_cbt.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
from jc.exceptions import ParseError
|
||||||
|
import jc.parsers.cbt
|
||||||
|
|
||||||
|
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
class MyTests(unittest.TestCase):
|
||||||
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-single.out'), 'r', encoding='utf-8') as f:
|
||||||
|
single = f.read()
|
||||||
|
|
||||||
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-columns.out'), 'r', encoding='utf-8') as f:
|
||||||
|
multiple_columns = f.read()
|
||||||
|
|
||||||
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-rows.out'), 'r', encoding='utf-8') as f:
|
||||||
|
multiple_rows = f.read()
|
||||||
|
|
||||||
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-single.json'), 'r', encoding='utf-8') as f:
|
||||||
|
single_json = json.loads(f.read())
|
||||||
|
|
||||||
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-columns.json'), 'r', encoding='utf-8') as f:
|
||||||
|
multiple_columns_json = json.loads(f.read())
|
||||||
|
|
||||||
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-rows.json'), 'r', encoding='utf-8') as f:
|
||||||
|
multiple_rows_json = json.loads(f.read())
|
||||||
|
|
||||||
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/cbt-multiple-rows-raw.json'), 'r', encoding='utf-8') as f:
|
||||||
|
multiple_rows_raw_json = json.loads(f.read())
|
||||||
|
|
||||||
|
def test_cbt_nodata(self):
|
||||||
|
"""
|
||||||
|
Test 'cbt' with no data
|
||||||
|
"""
|
||||||
|
self.assertEqual(jc.parsers.cbt.parse('', quiet=True), [])
|
||||||
|
|
||||||
|
def test_cbt_single_row(self):
|
||||||
|
"""
|
||||||
|
Test 'cbt' with a single row
|
||||||
|
"""
|
||||||
|
self.assertEqual(jc.parsers.cbt.parse(self.single, quiet=True), self.single_json)
|
||||||
|
|
||||||
|
def test_cbt_multiple_column_families(self):
|
||||||
|
"""
|
||||||
|
Test 'cbt' with multiple columns from multiple column families
|
||||||
|
"""
|
||||||
|
self.assertEqual(jc.parsers.cbt.parse(self.multiple_columns, quiet=True), self.multiple_columns_json)
|
||||||
|
|
||||||
|
def test_cbt_multiple_rows(self):
|
||||||
|
"""
|
||||||
|
Test 'cbt' with multiple rows
|
||||||
|
"""
|
||||||
|
self.assertEqual(jc.parsers.cbt.parse(self.multiple_rows, quiet=True), self.multiple_rows_json)
|
||||||
|
|
||||||
|
def test_cbt_multiple_rows_raw(self):
|
||||||
|
"""
|
||||||
|
Test 'cbt' with multiple rows raw
|
||||||
|
"""
|
||||||
|
self.assertEqual(jc.parsers.cbt.parse(self.multiple_rows, quiet=True, raw=True), self.multiple_rows_raw_json)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Reference in New Issue
Block a user