mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-07-13 01:20:24 +02:00
fix for UTF-8 csv files with leading BOM bytes
This commit is contained in:
@ -78,7 +78,7 @@ import csv
|
|||||||
|
|
||||||
class info():
|
class info():
|
||||||
"""Provides parser metadata (version, author, etc.)"""
|
"""Provides parser metadata (version, author, etc.)"""
|
||||||
version = '1.4'
|
version = '1.5'
|
||||||
description = 'CSV file parser'
|
description = 'CSV file parser'
|
||||||
author = 'Kelly Brazil'
|
author = 'Kelly Brazil'
|
||||||
author_email = 'kellyjonbrazil@gmail.com'
|
author_email = 'kellyjonbrazil@gmail.com'
|
||||||
@ -124,6 +124,12 @@ def parse(data, raw=False, quiet=False):
|
|||||||
jc.utils.compatibility(__name__, info.compatible, quiet)
|
jc.utils.compatibility(__name__, info.compatible, quiet)
|
||||||
jc.utils.input_type_check(data)
|
jc.utils.input_type_check(data)
|
||||||
|
|
||||||
|
# remove BOM bytes, if present
|
||||||
|
if isinstance(data, str):
|
||||||
|
data = data.encode('utf-8')
|
||||||
|
|
||||||
|
data = data.decode('utf-8-sig')
|
||||||
|
|
||||||
raw_output = []
|
raw_output = []
|
||||||
cleandata = data.splitlines()
|
cleandata = data.splitlines()
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ from jc.exceptions import ParseError
|
|||||||
|
|
||||||
class info():
|
class info():
|
||||||
"""Provides parser metadata (version, author, etc.)"""
|
"""Provides parser metadata (version, author, etc.)"""
|
||||||
version = '1.3'
|
version = '1.4'
|
||||||
description = 'CSV file streaming parser'
|
description = 'CSV file streaming parser'
|
||||||
author = 'Kelly Brazil'
|
author = 'Kelly Brazil'
|
||||||
author_email = 'kellyjonbrazil@gmail.com'
|
author_email = 'kellyjonbrazil@gmail.com'
|
||||||
@ -127,7 +127,14 @@ def parse(data, raw=False, quiet=False, ignore_exceptions=False):
|
|||||||
if len(temp_list) == 1:
|
if len(temp_list) == 1:
|
||||||
raise ParseError('Unable to detect line endings. Please try the non-streaming CSV parser instead.')
|
raise ParseError('Unable to detect line endings. Please try the non-streaming CSV parser instead.')
|
||||||
|
|
||||||
sniffdata = '\n'.join(temp_list)[:1024]
|
# remove BOM bytes from first row, if present
|
||||||
|
if temp_list:
|
||||||
|
if isinstance(temp_list[0], str):
|
||||||
|
temp_list[0] = temp_list[0].encode('utf-8')
|
||||||
|
|
||||||
|
temp_list[0] = temp_list[0].decode('utf-8-sig')
|
||||||
|
|
||||||
|
sniffdata = '\r\n'.join(temp_list)[:1024]
|
||||||
dialect = 'excel' # default in csv module
|
dialect = 'excel' # default in csv module
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
Reference in New Issue
Block a user