From 2a40f842743a8745d651b4b6cae645045e6c6aab Mon Sep 17 00:00:00 2001 From: shaik Date: Sun, 2 Jan 2022 17:11:20 +0200 Subject: [PATCH] fix doubleqoute in csv --- jc/parsers/csv.py | 4 +++- tests/fixtures/generic/csv-doubleqouted.csv | 3 +++ tests/fixtures/generic/csv-doubleqouted.json | 4 ++++ tests/test_csv.py | 12 ++++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/generic/csv-doubleqouted.csv create mode 100644 tests/fixtures/generic/csv-doubleqouted.json diff --git a/jc/parsers/csv.py b/jc/parsers/csv.py index 924d731c..731e0ddb 100644 --- a/jc/parsers/csv.py +++ b/jc/parsers/csv.py @@ -130,9 +130,11 @@ def parse(data, raw=False, quiet=False): if jc.utils.has_data(data): - dialect = None + dialect = "excel" # default in csv module try: dialect = csv.Sniffer().sniff(data[:1024]) + if '""' in data: + dialect.doublequote = True except Exception: pass diff --git a/tests/fixtures/generic/csv-doubleqouted.csv b/tests/fixtures/generic/csv-doubleqouted.csv new file mode 100644 index 00000000..9b2ef10f --- /dev/null +++ b/tests/fixtures/generic/csv-doubleqouted.csv @@ -0,0 +1,3 @@ +A,B +1,"this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this" +2,"this is a field with "" in it" diff --git a/tests/fixtures/generic/csv-doubleqouted.json b/tests/fixtures/generic/csv-doubleqouted.json new file mode 100644 index 00000000..a1dab8e1 --- /dev/null +++ b/tests/fixtures/generic/csv-doubleqouted.json @@ -0,0 +1,4 @@ +[ + {"A": "1", "B": "this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this"}, + {"A": "2", "B": "this is a field with \" in it"} +] diff --git a/tests/test_csv.py b/tests/test_csv.py index 6d3b2d91..a62cf0f2 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -37,6 +37,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-insurance.csv'), 'r', encoding='utf-8') as f: self.generic_csv_insurance = f.read() + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.csv'), 'r', encoding='utf-8') as f: + self.generic_csv_doubleqouted = f.read() + # output with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats.json'), 'r', encoding='utf-8') as f: self.generic_csv_biostats_json = json.loads(f.read()) @@ -65,6 +68,9 @@ class MyTests(unittest.TestCase): with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-insurance.json'), 'r', encoding='utf-8') as f: self.generic_csv_insurance_json = json.loads(f.read()) + with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.json'), 'r', encoding='utf-8') as f: + self.generic_csv_doubleqouted_json = json.loads(f.read()) + def test_csv_nodata(self): """ Test with no data @@ -125,6 +131,12 @@ class MyTests(unittest.TestCase): """ self.assertEqual(jc.parsers.csv.parse(self.generic_csv_insurance, quiet=True), self.generic_csv_insurance_json) + def test_doubleqouted(self): + """ + Test 'csv-doubleqouted.csv' file + """ + self.assertEqual(jc.parsers.csv.parse(self.generic_csv_doubleqouted, quiet=True), self.generic_csv_doubleqouted_json) + if __name__ == '__main__': unittest.main()