# HG changeset patch # User Ryo Takahashi <rt.sporty@gmail.com> # Date 1405928031 -32400 # Mon Jul 21 16:33:51 2014 +0900 # Node ID 457493073c752083526c5370e51d7f3e2b1fea8e # Parent 080ad34e4efca4f24d8d722d8831f48cec246bee strip utf-8 bom. diff --git a/simplejson/__init__.py b/simplejson/__init__.py --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -437,7 +437,16 @@ of subclassing whenever possible. """ - return loads(fp.read(), + # Strip the UTF-8 BOM + contents = fp.read() + ord0 = ord(contents[0]) + if ord0 in (0xef, 0xfeff): + if ord0 == 0xfeff: + contents = contents[1:] + elif contents[:3] == '\xef\xbb\xbf': + contents = contents[3:] + + return loads(contents, encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,4 +1,5 @@ import sys +import os.path from unittest import TestCase import simplejson as json @@ -143,3 +144,11 @@ self.assertEqual( json.dumps(c, ensure_ascii=False), '"' + c + '"') + + def test_strip_bom(self): + thisdir = os.path.dirname(__file__) + json_file = os.path.join(thisdir, "utf-8-bom.json") + doc_ascii = { + u"content": u"\u3053\u3093\u306b\u3061\u308f" + } + self.assertEqual(json.load(open(json_file)), doc_ascii) diff --git a/simplejson/tests/utf-8-bom.json b/simplejson/tests/utf-8-bom.json new file mode 100644 --- /dev/null +++ b/simplejson/tests/utf-8-bom.json @@ -0,0 +1,3 @@ +{ + "content": "こんにちわ" +}