Skip to content
Snippets Groups Projects
Commit 133ee7b17ff1 authored by Stefan Behnel's avatar Stefan Behnel
Browse files

Using Py_UNICODE to store lone surrogates makes Py3 join surrogate pairs on...

Using Py_UNICODE to store lone surrogates makes Py3 join surrogate pairs on 16-bit Unicode platforms (Windows) when reading them back in, although we correctly processed them before.
Instead, we now use the "unicode_escape" codec to store byte strings, because it can return surrogate characters (which the other codecs cannot).
parent 044c0d743245
No related branches found
No related tags found
No related merge requests found
......@@ -1632,9 +1632,10 @@
# lone (unpaired) surrogates are not really portable and cannot be
# decoded by the UTF-8 codec in Py3.3
self.result_code = code.get_py_const(py_object_type, 'ustring')
data_cname = code.get_pyunicode_ptr_const(self.value)
data_cname = code.get_string_const(
StringEncoding.BytesLiteral(self.value.encode('unicode_escape')))
const_code = code.get_cached_constants_writer(self.result_code)
if const_code is None:
return # already initialised
const_code.mark_pos(self.pos)
const_code.putln(
......@@ -1636,9 +1637,9 @@
const_code = code.get_cached_constants_writer(self.result_code)
if const_code is None:
return # already initialised
const_code.mark_pos(self.pos)
const_code.putln(
"%s = PyUnicode_FromUnicode(%s, (sizeof(%s) / sizeof(Py_UNICODE))-1); %s" % (
"%s = PyUnicode_DecodeUnicodeEscape(%s, sizeof(%s) - 1, NULL); %s" % (
self.result_code,
data_cname,
data_cname,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment