Skip to content
Snippets Groups Projects
Commit 86d8873cb842 authored by Bob Ippolito's avatar Bob Ippolito
Browse files

generalize BOM stripping to any use of raw_decode

parent 457493073c75
No related branches found
No related tags found
No related merge requests found
Version 3.6.0 released 2014-07-21
* Automatically strip any UTF-8 BOM from input to more closely
follow the latest specs
https://github.com/simplejson/simplejson/pull/101
Version 3.5.3 released 2014-06-24 Version 3.5.3 released 2014-06-24
* Fix lower bound checking in scan_once / raw_decode API * Fix lower bound checking in scan_once / raw_decode API
......
...@@ -42,5 +42,5 @@ ...@@ -42,5 +42,5 @@
# other places throughout the built documents. # other places throughout the built documents.
# #
# The short X.Y version. # The short X.Y version.
version = '3.5' version = '3.6'
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
...@@ -46,5 +46,5 @@ ...@@ -46,5 +46,5 @@
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
release = '3.5.3' release = '3.6.0'
# There are two options for replacing |today|: either, you set today to some # There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used: # non-false value, then it is used:
......
...@@ -98,7 +98,7 @@ ...@@ -98,7 +98,7 @@
Expecting property name: line 1 column 3 (char 2) Expecting property name: line 1 column 3 (char 2)
""" """
from __future__ import absolute_import from __future__ import absolute_import
__version__ = '3.5.3' __version__ = '3.6.0'
__all__ = [ __all__ = [
'dump', 'dumps', 'load', 'loads', 'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
...@@ -437,16 +437,7 @@ ...@@ -437,16 +437,7 @@
of subclassing whenever possible. of subclassing whenever possible.
""" """
# Strip the UTF-8 BOM return loads(fp.read(),
contents = fp.read()
ord0 = ord(contents[0])
if ord0 in (0xef, 0xfeff):
if ord0 == 0xfeff:
contents = contents[1:]
elif contents[:3] == '\xef\xbb\xbf':
contents = contents[3:]
return loads(contents,
encoding=encoding, cls=cls, object_hook=object_hook, encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int, parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
......
...@@ -390,4 +390,11 @@ ...@@ -390,4 +390,11 @@
raise JSONDecodeError('Expecting value', s, idx) raise JSONDecodeError('Expecting value', s, idx)
if _PY3 and not isinstance(s, text_type): if _PY3 and not isinstance(s, text_type):
raise TypeError("Input string must be text, not bytes") raise TypeError("Input string must be text, not bytes")
# strip UTF-8 bom
if len(s) > idx:
ord0 = ord(s[idx])
if ord0 == 0xfeff:
idx += 1
elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
idx += 3
return self.scan_once(s, idx=_w(s, idx).end()) return self.scan_once(s, idx=_w(s, idx).end())
import sys import sys
import os.path import codecs
from unittest import TestCase from unittest import TestCase
import simplejson as json import simplejson as json
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
from unittest import TestCase from unittest import TestCase
import simplejson as json import simplejson as json
from simplejson.compat import unichr, text_type, b, u from simplejson.compat import unichr, text_type, b, u, BytesIO
class TestUnicode(TestCase): class TestUnicode(TestCase):
def test_encoding1(self): def test_encoding1(self):
...@@ -146,9 +146,8 @@ ...@@ -146,9 +146,8 @@
'"' + c + '"') '"' + c + '"')
def test_strip_bom(self): def test_strip_bom(self):
thisdir = os.path.dirname(__file__) content = u"\u3053\u3093\u306b\u3061\u308f"
json_file = os.path.join(thisdir, "utf-8-bom.json") json_doc = codecs.BOM_UTF8 + b(json.dumps(content))
doc_ascii = { self.assertEqual(json.load(BytesIO(json_doc)), content)
u"content": u"\u3053\u3093\u306b\u3061\u308f" for doc in json_doc, json_doc.decode('utf8'):
} self.assertEqual(json.loads(doc), content)
self.assertEqual(json.load(open(json_file)), doc_ascii)
{
"content": "こんにちわ"
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment