diff --git a/jc/parsers/csv.py b/jc/parsers/csv.py index 5a4bbf6a..7fbc1fd6 100644 --- a/jc/parsers/csv.py +++ b/jc/parsers/csv.py @@ -78,7 +78,7 @@ import csv class info(): """Provides parser metadata (version, author, etc.)""" - version = '1.4' + version = '1.5' description = 'CSV file parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' @@ -124,6 +124,12 @@ def parse(data, raw=False, quiet=False): jc.utils.compatibility(__name__, info.compatible, quiet) jc.utils.input_type_check(data) + # remove BOM bytes, if present + if isinstance(data, str): + data = data.encode('utf-8') + + data = data.decode('utf-8-sig') + raw_output = [] cleandata = data.splitlines() diff --git a/jc/parsers/csv_s.py b/jc/parsers/csv_s.py index f3e84681..98a8ce97 100644 --- a/jc/parsers/csv_s.py +++ b/jc/parsers/csv_s.py @@ -63,7 +63,7 @@ from jc.exceptions import ParseError class info(): """Provides parser metadata (version, author, etc.)""" - version = '1.3' + version = '1.4' description = 'CSV file streaming parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' @@ -127,7 +127,14 @@ def parse(data, raw=False, quiet=False, ignore_exceptions=False): if len(temp_list) == 1: raise ParseError('Unable to detect line endings. Please try the non-streaming CSV parser instead.') - sniffdata = '\n'.join(temp_list)[:1024] + # remove BOM bytes from first row, if present + if temp_list: + if isinstance(temp_list[0], str): + temp_list[0] = temp_list[0].encode('utf-8') + + temp_list[0] = temp_list[0].decode('utf-8-sig') + + sniffdata = '\r\n'.join(temp_list)[:1024] dialect = 'excel' # default in csv module try: