From fd5cbbb4d57a86d949f6ab908613b7b905b7eeb4 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Tue, 25 Oct 2022 11:46:31 -0700 Subject: [PATCH] add csv utf-8 bom tests --- .../fixtures/generic/csv-utf-8-bom-streaming.json | 1 + tests/fixtures/generic/csv-utf-8-bom.csv | 6 ++++++ tests/fixtures/generic/csv-utf-8-bom.json | 1 + tests/test_csv.py | 14 +++++++++++++- tests/test_csv_s.py | 12 ++++++++++++ 5 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/generic/csv-utf-8-bom-streaming.json create mode 100644 tests/fixtures/generic/csv-utf-8-bom.csv create mode 100644 tests/fixtures/generic/csv-utf-8-bom.json diff --git a/tests/fixtures/generic/csv-utf-8-bom-streaming.json b/tests/fixtures/generic/csv-utf-8-bom-streaming.json new file mode 100644 index 00000000..1ecd1f1e --- /dev/null +++ b/tests/fixtures/generic/csv-utf-8-bom-streaming.json @@ -0,0 +1 @@ +[{"col1":"a","col2":"b","col3":"c"},{"col1":"x","col2":"y","col3":"z"},{"col1":"abc","col2":"def","col3":"ghi"},{"col1":"1","col2":"2","col3":"3"},{"col1":"foo","col2":"bar","col3":"baz"}] diff --git a/tests/fixtures/generic/csv-utf-8-bom.csv b/tests/fixtures/generic/csv-utf-8-bom.csv new file mode 100644 index 00000000..deef3167 --- /dev/null +++ b/tests/fixtures/generic/csv-utf-8-bom.csv @@ -0,0 +1,6 @@ +col1,col2,col3 +a,b,c +x,y,z +abc,def,ghi +1,2,3 +foo,bar,baz \ No newline at end of file diff --git a/tests/fixtures/generic/csv-utf-8-bom.json b/tests/fixtures/generic/csv-utf-8-bom.json new file mode 100644 index 00000000..1ecd1f1e --- /dev/null +++ b/tests/fixtures/generic/csv-utf-8-bom.json @@ -0,0 +1 @@ +[{"col1":"a","col2":"b","col3":"c"},{"col1":"x","col2":"y","col3":"z"},{"col1":"abc","col2":"def","col3":"ghi"},{"col1":"1","col2":"2","col3":"3"},{"col1":"foo","col2":"bar","col3":"baz"}] diff --git a/tests/test_csv.py b/tests/test_csv.py index 6e4f5cda..54df01ab 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -39,6 +39,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.csv'), 'r', encoding='utf-8') as f: generic_csv_doubleqouted = f.read() + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom.csv'), 'r', encoding='utf-8') as f: + generic_csv_utf8_bom = f.read() + # output with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats.json'), 'r', encoding='utf-8') as f: generic_csv_biostats_json = json.loads(f.read()) @@ -70,6 +73,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.json'), 'r', encoding='utf-8') as f: generic_csv_doubleqouted_json = json.loads(f.read()) + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom.json'), 'r', encoding='utf-8') as f: + generic_csv_utf8_bom_json = json.loads(f.read()) + def test_csv_nodata(self): """ @@ -131,12 +137,18 @@ class MyTests(unittest.TestCase): """ self.assertEqual(jc.parsers.csv.parse(self.generic_csv_insurance, quiet=True), self.generic_csv_insurance_json) - def test_doubleqouted(self): + def test_csv_doubleqouted(self): """ Test 'csv-doubleqouted.csv' file """ self.assertEqual(jc.parsers.csv.parse(self.generic_csv_doubleqouted, quiet=True), self.generic_csv_doubleqouted_json) + def test_csv_utf8_bom(self): + """ + Test 'csv-utf-8-bom.csv' file to ensure the first column is correct if UTF-8 BOM bytes are present + """ + self.assertEqual(jc.parsers.csv.parse(self.generic_csv_utf8_bom, quiet=True), self.generic_csv_utf8_bom_json) + if __name__ == '__main__': unittest.main() diff --git a/tests/test_csv_s.py b/tests/test_csv_s.py index a33da660..8fd76d92 100644 --- a/tests/test_csv_s.py +++ b/tests/test_csv_s.py @@ -44,6 +44,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.csv'), 'r', encoding='utf-8') as f: generic_csv_doubleqouted = f.read() + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom.csv'), 'r', encoding='utf-8') as f: + generic_csv_utf8_bom = f.read() + # output with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats-streaming.json'), 'r', encoding='utf-8') as f: generic_csv_biostats_streaming_json = json.loads(f.read()) @@ -75,6 +78,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted-streaming.json'), 'r', encoding='utf-8') as f: generic_csv_doublequoted_streaming_json = json.loads(f.read()) + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-utf-8-bom-streaming.json'), 'r', encoding='utf-8') as f: + generic_csv_utf8_bom_streaming_json = json.loads(f.read()) + def test_csv_s_nodata(self): """ @@ -153,6 +159,12 @@ class MyTests(unittest.TestCase): """ self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_doubleqouted.splitlines(), quiet=True)), self.generic_csv_doublequoted_streaming_json) + def test_csv_s_utf8_bom(self): + """ + Test 'csv-utf-8-bom.csv' file to ensure the first column is correct if UTF-8 BOM bytes are present + """ + self.assertEqual(list(jc.parsers.csv_s.parse(self.generic_csv_utf8_bom.splitlines(), quiet=True)), self.generic_csv_utf8_bom_streaming_json) + if __name__ == '__main__': unittest.main()