diff --git a/simplejson/__init__.py b/simplejson/__init__.py index 080ad34e4efca4f24d8d722d8831f48cec246bee_c2ltcGxlanNvbi9fX2luaXRfXy5weQ==..457493073c752083526c5370e51d7f3e2b1fea8e_c2ltcGxlanNvbi9fX2luaXRfXy5weQ== 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -437,7 +437,16 @@ of subclassing whenever possible. """ - return loads(fp.read(), + # Strip the UTF-8 BOM + contents = fp.read() + ord0 = ord(contents[0]) + if ord0 in (0xef, 0xfeff): + if ord0 == 0xfeff: + contents = contents[1:] + elif contents[:3] == '\xef\xbb\xbf': + contents = contents[3:] + + return loads(contents, encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 080ad34e4efca4f24d8d722d8831f48cec246bee_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk=..457493073c752083526c5370e51d7f3e2b1fea8e_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk= 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,4 +1,5 @@ import sys +import os.path from unittest import TestCase import simplejson as json @@ -143,3 +144,11 @@ self.assertEqual( json.dumps(c, ensure_ascii=False), '"' + c + '"') + + def test_strip_bom(self): + thisdir = os.path.dirname(__file__) + json_file = os.path.join(thisdir, "utf-8-bom.json") + doc_ascii = { + u"content": u"\u3053\u3093\u306b\u3061\u308f" + } + self.assertEqual(json.load(open(json_file)), doc_ascii) diff --git a/simplejson/tests/utf-8-bom.json b/simplejson/tests/utf-8-bom.json new file mode 100644 index 0000000000000000000000000000000000000000..457493073c752083526c5370e51d7f3e2b1fea8e_c2ltcGxlanNvbi90ZXN0cy91dGYtOC1ib20uanNvbg== --- /dev/null +++ b/simplejson/tests/utf-8-bom.json @@ -0,0 +1,3 @@ +{ + "content": "こんにちわ" +}