From 2a40f842743a8745d651b4b6cae645045e6c6aab Mon Sep 17 00:00:00 2001
From: shaik <shai.kustin@torq.io>
Date: Sun, 2 Jan 2022 17:11:20 +0200
Subject: [PATCH] fix doubleqoute in csv

---
 jc/parsers/csv.py                            |  4 +++-
 tests/fixtures/generic/csv-doubleqouted.csv  |  3 +++
 tests/fixtures/generic/csv-doubleqouted.json |  4 ++++
 tests/test_csv.py                            | 12 ++++++++++++
 4 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 tests/fixtures/generic/csv-doubleqouted.csv
 create mode 100644 tests/fixtures/generic/csv-doubleqouted.json

diff --git a/jc/parsers/csv.py b/jc/parsers/csv.py
index 924d731c..731e0ddb 100644
--- a/jc/parsers/csv.py
+++ b/jc/parsers/csv.py
@@ -130,9 +130,11 @@ def parse(data, raw=False, quiet=False):
 
     if jc.utils.has_data(data):
 
-        dialect = None
+        dialect = "excel" # default in csv module
         try:
             dialect = csv.Sniffer().sniff(data[:1024])
+            if '""' in data:
+                dialect.doublequote = True
         except Exception:
             pass
 
diff --git a/tests/fixtures/generic/csv-doubleqouted.csv b/tests/fixtures/generic/csv-doubleqouted.csv
new file mode 100644
index 00000000..9b2ef10f
--- /dev/null
+++ b/tests/fixtures/generic/csv-doubleqouted.csv
@@ -0,0 +1,3 @@
+A,B
+1,"this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this"
+2,"this is a field with "" in it"
diff --git a/tests/fixtures/generic/csv-doubleqouted.json b/tests/fixtures/generic/csv-doubleqouted.json
new file mode 100644
index 00000000..a1dab8e1
--- /dev/null
+++ b/tests/fixtures/generic/csv-doubleqouted.json
@@ -0,0 +1,4 @@
+[
+    {"A": "1", "B": "this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this is 1024 bytes long field this"},
+    {"A": "2", "B": "this is a field with \" in it"}
+]
diff --git a/tests/test_csv.py b/tests/test_csv.py
index 6d3b2d91..a62cf0f2 100644
--- a/tests/test_csv.py
+++ b/tests/test_csv.py
@@ -37,6 +37,9 @@ class MyTests(unittest.TestCase):
         with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-insurance.csv'), 'r', encoding='utf-8') as f:
             self.generic_csv_insurance = f.read()
 
+        with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.csv'), 'r', encoding='utf-8') as f:
+            self.generic_csv_doubleqouted = f.read()
+
         # output
         with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-biostats.json'), 'r', encoding='utf-8') as f:
             self.generic_csv_biostats_json = json.loads(f.read())
@@ -65,6 +68,9 @@ class MyTests(unittest.TestCase):
         with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-insurance.json'), 'r', encoding='utf-8') as f:
             self.generic_csv_insurance_json = json.loads(f.read())
 
+        with open(os.path.join(THIS_DIR, os.pardir, 'tests/fixtures/generic/csv-doubleqouted.json'), 'r', encoding='utf-8') as f:
+            self.generic_csv_doubleqouted_json = json.loads(f.read())
+
     def test_csv_nodata(self):
         """
         Test with no data
@@ -125,6 +131,12 @@ class MyTests(unittest.TestCase):
         """
         self.assertEqual(jc.parsers.csv.parse(self.generic_csv_insurance, quiet=True), self.generic_csv_insurance_json)
 
+    def test_doubleqouted(self):
+        """
+        Test 'csv-doubleqouted.csv' file
+        """
+        self.assertEqual(jc.parsers.csv.parse(self.generic_csv_doubleqouted, quiet=True), self.generic_csv_doubleqouted_json)
+
 
 if __name__ == '__main__':
     unittest.main()