1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00
Files
jc/tests/test_csv_s.py
Kelly Brazil 8644f70db4 fix typos
2022-10-25 13:46:22 -07:00

171 lines
7.6 KiB
Python

import os
import json
import unittest
import jc.parsers.csv_s
from jc.exceptions import ParseError
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
# To create streaming output use:
# $ cat file.csv | jc --csv-s | jello -c > csv-file-streaming.json
class MyTests(unittest.TestCase):
# input
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats.csv'), 'r', encoding='utf-8') as f:
generic_csv_biostats = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-cities.csv'), 'r', encoding='utf-8') as f:
generic_csv_cities = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-deniro.csv'), 'r', encoding='utf-8') as f:
generic_csv_deniro = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-example.csv'), 'r', encoding='utf-8') as f:
generic_csv_example = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna.tsv'), 'r', encoding='utf-8') as f:
generic_csv_flyrna = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna2.tsv'), 'r', encoding='utf-8') as f:
generic_csv_flyrna2 = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes-pipe.csv'), 'r', encoding='utf-8') as f:
generic_csv_homes_pipe = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes.csv'), 'r', encoding='utf-8') as f:
generic_csv_homes = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-10k-sales-records.csv'), 'r', encoding='utf-8') as f:
generic_csv_10k_sales_records = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doublequoted.csv'), 'r', encoding='utf-8') as f:
generic_csv_doublequoted = f.read()
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom.csv'), 'r', encoding='utf-8') as f:
generic_csv_utf8_bom = f.read()
# output
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_biostats_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-cities-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_cities_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-deniro-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_deniro_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-example-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_example_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_flyrna_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-flyrna2-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_flyrna2_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes-pipe-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_homes_pipe_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-homes-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_homes_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-10k-sales-records-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_10k_sales_records_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doublequoted-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_doublequoted_streaming_json = json.loads(f.read())
with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom-streaming.json'), 'r', encoding='utf-8') as f:
generic_csv_utf8_bom_streaming_json = json.loads(f.read())
def test_csv_s_nodata(self):
"""
Test CSV parser with no data
"""
self.assertEqual(list(jc.parsers.csv_s.parse([], quiet=True)), [])
def test_csv_unparsable(self):
"""
Test CSV streaming parser with '\r' newlines. This will raise ParseError due to a Python bug
that does not correctly iterate on that line ending with sys.stdin. This is not a great test.
https://bugs.python.org/issue45617
"""
data = r'unparsable\rdata' # raw mode simulates unrecognized line separator - not great
g = jc.parsers.csv_s.parse(data.splitlines(), quiet=True)
with self.assertRaises(ParseError):
list(g)
def test_csv_s_biostats(self):
"""
Test 'biostats.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_biostats.splitlines(), quiet=True)), self.generic_csv_biostats_streaming_json)
def test_csv_s_cities(self):
"""
Test 'cities.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_cities.splitlines(), quiet=True)), self.generic_csv_cities_streaming_json)
def test_csv_s_deniro(self):
"""
Test 'deniro.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_deniro.splitlines(), quiet=True)), self.generic_csv_deniro_streaming_json)
def test_csv_s_example(self):
"""
Test 'example.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_example.splitlines(), quiet=True)), self.generic_csv_example_streaming_json)
def test_csv_s_flyrna(self):
"""
Test 'flyrna.tsv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_flyrna.splitlines(), quiet=True)), self.generic_csv_flyrna_streaming_json)
def test_csv_s_flyrna2(self):
"""
Test 'flyrna2.tsv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_flyrna2.splitlines(), quiet=True)), self.generic_csv_flyrna2_streaming_json)
def test_csv_s_homes_pipe(self):
"""
Test 'homes-pipe.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_homes_pipe.splitlines(), quiet=True)), self.generic_csv_homes_pipe_streaming_json)
def test_csv_s_homes(self):
"""
Test 'homes.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_homes.splitlines(), quiet=True)), self.generic_csv_homes_streaming_json)
def test_csv_s_10k_records(self):
"""
Test '10k-sales-records.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_10k_sales_records.splitlines(), quiet=True)), self.generic_csv_10k_sales_records_streaming_json)
def test_csv_s_doublequoted(self):
"""
Test 'doublequoted.csv' file
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_doublequoted.splitlines(), quiet=True)), self.generic_csv_doublequoted_streaming_json)
def test_csv_s_utf8_bom(self):
"""
Test 'csv-utf-8-bom.csv' file to ensure the first column is correct if UTF-8 BOM bytes are present
"""
self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_utf8_bom.splitlines(), quiet=True)), self.generic_csv_utf8_bom_streaming_json)
if __name__ == '__main__':
unittest.main()