mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-17 00:07:37 +02:00
171 lines
7.6 KiB
Python
171 lines
7.6 KiB
Python
import os
|
|
import json
|
|
import unittest
|
|
import jc.parsers.csv_s
|
|
from jc.exceptions import ParseError
|
|
|
|
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
# To create streaming output use:
|
|
# $ cat file.csv | jc --csv-s | jello -c > csv-file-streaming.json
|
|
|
|
|
|
class MyTests(unittest.TestCase):
|
|
|
|
# input
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_biostats = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-cities.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_cities = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-deniro.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_deniro = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-example.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_example = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna.tsv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_flyrna = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna2.tsv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_flyrna2 = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes-pipe.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_homes_pipe = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_homes = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-10k-sales-records.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_10k_sales_records = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doublequoted.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_doublequoted = f.read()
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom.csv'), 'r', encoding='utf-8') as f:
|
|
generic_csv_utf8_bom = f.read()
|
|
|
|
# output
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_biostats_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-cities-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_cities_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-deniro-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_deniro_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-example-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_example_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_flyrna_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna2-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_flyrna2_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes-pipe-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_homes_pipe_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_homes_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-10k-sales-records-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_10k_sales_records_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doublequoted-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_doublequoted_streaming_json = json.loads(f.read())
|
|
|
|
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom-streaming.json'), 'r', encoding='utf-8') as f:
|
|
generic_csv_utf8_bom_streaming_json = json.loads(f.read())
|
|
|
|
|
|
def test_csv_s_nodata(self):
|
|
"""
|
|
Test CSV parser with no data
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse([], quiet=True)), [])
|
|
|
|
def test_csv_unparsable(self):
|
|
"""
|
|
Test CSV streaming parser with '\r' newlines. This will raise ParseError due to a Python bug
|
|
that does not correctly iterate on that line ending with sys.stdin. This is not a great test.
|
|
https://bugs.python.org/issue45617
|
|
"""
|
|
data = r'unparsable\rdata' # raw mode simulates unrecognized line separator - not great
|
|
g = jc.parsers.csv_s.parse(data.splitlines(), quiet=True)
|
|
with self.assertRaises(ParseError):
|
|
list(g)
|
|
|
|
def test_csv_s_biostats(self):
|
|
"""
|
|
Test 'biostats.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_biostats.splitlines(), quiet=True)), self.generic_csv_biostats_streaming_json)
|
|
|
|
def test_csv_s_cities(self):
|
|
"""
|
|
Test 'cities.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_cities.splitlines(), quiet=True)), self.generic_csv_cities_streaming_json)
|
|
|
|
def test_csv_s_deniro(self):
|
|
"""
|
|
Test 'deniro.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_deniro.splitlines(), quiet=True)), self.generic_csv_deniro_streaming_json)
|
|
|
|
def test_csv_s_example(self):
|
|
"""
|
|
Test 'example.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_example.splitlines(), quiet=True)), self.generic_csv_example_streaming_json)
|
|
|
|
def test_csv_s_flyrna(self):
|
|
"""
|
|
Test 'flyrna.tsv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_flyrna.splitlines(), quiet=True)), self.generic_csv_flyrna_streaming_json)
|
|
|
|
def test_csv_s_flyrna2(self):
|
|
"""
|
|
Test 'flyrna2.tsv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_flyrna2.splitlines(), quiet=True)), self.generic_csv_flyrna2_streaming_json)
|
|
|
|
def test_csv_s_homes_pipe(self):
|
|
"""
|
|
Test 'homes-pipe.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_homes_pipe.splitlines(), quiet=True)), self.generic_csv_homes_pipe_streaming_json)
|
|
|
|
def test_csv_s_homes(self):
|
|
"""
|
|
Test 'homes.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_homes.splitlines(), quiet=True)), self.generic_csv_homes_streaming_json)
|
|
|
|
def test_csv_s_10k_records(self):
|
|
"""
|
|
Test '10k-sales-records.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_10k_sales_records.splitlines(), quiet=True)), self.generic_csv_10k_sales_records_streaming_json)
|
|
|
|
def test_csv_s_doublequoted(self):
|
|
"""
|
|
Test 'doublequoted.csv' file
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_doublequoted.splitlines(), quiet=True)), self.generic_csv_doublequoted_streaming_json)
|
|
|
|
def test_csv_s_utf8_bom(self):
|
|
"""
|
|
Test 'csv-utf-8-bom.csv' file to ensure the first column is correct if UTF-8 BOM bytes are present
|
|
"""
|
|
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_utf8_bom.splitlines(), quiet=True)), self.generic_csv_utf8_bom_streaming_json)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|