diff --git a/CHANGES.txt b/CHANGES.txt index 457493073c752083526c5370e51d7f3e2b1fea8e_Q0hBTkdFUy50eHQ=..86d8873cb8427b11c58db64d3835172cec2e3f80_Q0hBTkdFUy50eHQ= 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,9 @@ +Version 3.6.0 released 2014-07-21 + +* Automatically strip any UTF-8 BOM from input to more closely + follow the latest specs + https://github.com/simplejson/simplejson/pull/101 + Version 3.5.3 released 2014-06-24 * Fix lower bound checking in scan_once / raw_decode API diff --git a/conf.py b/conf.py index 457493073c752083526c5370e51d7f3e2b1fea8e_Y29uZi5weQ==..86d8873cb8427b11c58db64d3835172cec2e3f80_Y29uZi5weQ== 100644 --- a/conf.py +++ b/conf.py @@ -42,5 +42,5 @@ # other places throughout the built documents. # # The short X.Y version. -version = '3.5' +version = '3.6' # The full version, including alpha/beta/rc tags. @@ -46,5 +46,5 @@ # The full version, including alpha/beta/rc tags. -release = '3.5.3' +release = '3.6.0' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/simplejson/__init__.py b/simplejson/__init__.py index 457493073c752083526c5370e51d7f3e2b1fea8e_c2ltcGxlanNvbi9fX2luaXRfXy5weQ==..86d8873cb8427b11c58db64d3835172cec2e3f80_c2ltcGxlanNvbi9fX2luaXRfXy5weQ== 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -98,7 +98,7 @@ Expecting property name: line 1 column 3 (char 2) """ from __future__ import absolute_import -__version__ = '3.5.3' +__version__ = '3.6.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', @@ -437,16 +437,7 @@ of subclassing whenever possible. """ - # Strip the UTF-8 BOM - contents = fp.read() - ord0 = ord(contents[0]) - if ord0 in (0xef, 0xfeff): - if ord0 == 0xfeff: - contents = contents[1:] - elif contents[:3] == '\xef\xbb\xbf': - contents = contents[3:] - - return loads(contents, + return loads(fp.read(), encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 457493073c752083526c5370e51d7f3e2b1fea8e_c2ltcGxlanNvbi9kZWNvZGVyLnB5..86d8873cb8427b11c58db64d3835172cec2e3f80_c2ltcGxlanNvbi9kZWNvZGVyLnB5 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -390,4 +390,11 @@ raise JSONDecodeError('Expecting value', s, idx) if _PY3 and not isinstance(s, text_type): raise TypeError("Input string must be text, not bytes") + # strip UTF-8 bom + if len(s) > idx: + ord0 = ord(s[idx]) + if ord0 == 0xfeff: + idx += 1 + elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': + idx += 3 return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 457493073c752083526c5370e51d7f3e2b1fea8e_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk=..86d8873cb8427b11c58db64d3835172cec2e3f80_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk= 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,5 +1,5 @@ import sys -import os.path +import codecs from unittest import TestCase import simplejson as json @@ -3,7 +3,7 @@ from unittest import TestCase import simplejson as json -from simplejson.compat import unichr, text_type, b, u +from simplejson.compat import unichr, text_type, b, u, BytesIO class TestUnicode(TestCase): def test_encoding1(self): @@ -146,9 +146,8 @@ '"' + c + '"') def test_strip_bom(self): - thisdir = os.path.dirname(__file__) - json_file = os.path.join(thisdir, "utf-8-bom.json") - doc_ascii = { - u"content": u"\u3053\u3093\u306b\u3061\u308f" - } - self.assertEqual(json.load(open(json_file)), doc_ascii) + content = u"\u3053\u3093\u306b\u3061\u308f" + json_doc = codecs.BOM_UTF8 + b(json.dumps(content)) + self.assertEqual(json.load(BytesIO(json_doc)), content) + for doc in json_doc, json_doc.decode('utf8'): + self.assertEqual(json.loads(doc), content) diff --git a/simplejson/tests/utf-8-bom.json b/simplejson/tests/utf-8-bom.json deleted file mode 100644 index 457493073c752083526c5370e51d7f3e2b1fea8e_c2ltcGxlanNvbi90ZXN0cy91dGYtOC1ib20uanNvbg==..0000000000000000000000000000000000000000 --- a/simplejson/tests/utf-8-bom.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "content": "こんにちわ" -}