fix doubleqoute in csv

2025-10-08 23:22:21 +02:00 · 2022-01-02 17:11:20 +02:00
parent f7331001d4
commit 2a40f84274
4 changed files with 22 additions and 1 deletions
--- a/jc/parsers/csv.py
+++ b/jc/parsers/csv.py
@@ -130,9 +130,11 @@ def parse(data, raw=False, quiet=False):

    if jc.utils.has_data(data):

-        dialect = None
+        dialect = "excel" # default in csv module
        try:
            dialect = csv.Sniffer().sniff(data[:1024])
+            if '""' in data:
+                dialect.doublequote = True
        except Exception:
            pass

--- a/tests/fixtures/generic/csv-doubleqouted.csv
+++ b/tests/fixtures/generic/csv-doubleqouted.csv
@@ -0,0 +1,3 @@
+A,B
+1,"this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this"
+2,"this is a field with "" in it"
--- a/tests/fixtures/generic/csv-doubleqouted.json
+++ b/tests/fixtures/generic/csv-doubleqouted.json
@@ -0,0 +1,4 @@
+[
+    {"A": "1", "B": "this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this"},
+    {"A": "2", "B": "this is a field with \" in it"}
+]
--- a/tests/test_csv.py
+++ b/tests/test_csv.py
@@ -37,6 +37,9 @@ class MyTests(unittest.TestCase):
        with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-insurance.csv'), 'r', encoding='utf-8') as f:
            self.generic_csv_insurance = f.read()

+        with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.csv'), 'r', encoding='utf-8') as f:
+            self.generic_csv_doubleqouted = f.read()
+
        # output
        with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats.json'), 'r', encoding='utf-8') as f:
            self.generic_csv_biostats_json = json.loads(f.read())
@@ -65,6 +68,9 @@ class MyTests(unittest.TestCase):
        with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-insurance.json'), 'r', encoding='utf-8') as f:
            self.generic_csv_insurance_json = json.loads(f.read())

+        with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.json'), 'r', encoding='utf-8') as f:
+            self.generic_csv_doubleqouted_json = json.loads(f.read())
+
    def test_csv_nodata(self):
        """
        Test with no data
@@ -125,6 +131,12 @@ class MyTests(unittest.TestCase):
        """
        self.assertEqual(jc.parsers.csv.parse(self.generic_csv_insurance, quiet=True), self.generic_csv_insurance_json)

+    def test_doubleqouted(self):
+        """
+        Test 'csv-doubleqouted.csv' file
+        """
+        self.assertEqual(jc.parsers.csv.parse(self.generic_csv_doubleqouted, quiet=True), self.generic_csv_doubleqouted_json)
+

 if __name__ == '__main__':
    unittest.main()