Commit 74b501cd by Ben Frederickson Committed by Jason Rhinelander

Fix passing in utf8 encoded strings with python 2

Passing utf8 encoded strings from python to a C++ function taking a
std::string was broken.  The previous version was trying to call
'PyUnicode_FromObject' on this data, which failed to convert the string
to unicode with the default ascii codec. Also this incurs an unnecessary
conversion to unicode for data this is immediately converted back to
utf8.

Fix by treating python 2 strings the same python 3 bytes objects, and just
copying over the data if possible.
parent 0365d491
...@@ -734,9 +734,14 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s ...@@ -734,9 +734,14 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
return load_bytes(load_src); return load_bytes(load_src);
#else #else
if (sizeof(CharT) == 1) {
return load_bytes(load_src);
}
// The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
if (!PYBIND11_BYTES_CHECK(load_src.ptr())) if (!PYBIND11_BYTES_CHECK(load_src.ptr()))
return false; return false;
temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr())); temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
if (!temp) { PyErr_Clear(); return false; } if (!temp) { PyErr_Clear(); return false; }
load_src = temp; load_src = temp;
...@@ -780,9 +785,8 @@ private: ...@@ -780,9 +785,8 @@ private:
#endif #endif
} }
#if PY_MAJOR_VERSION >= 3 // When loading into a std::string or char*, accept a bytes object as-is (i.e.
// In Python 3, when loading into a std::string or char*, accept a bytes object as-is (i.e. // without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op. Python 2,
// which supports loading a unicode from a str, doesn't take this path. // which supports loading a unicode from a str, doesn't take this path.
template <typename C = CharT> template <typename C = CharT>
bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src) { bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src) {
...@@ -798,9 +802,9 @@ private: ...@@ -798,9 +802,9 @@ private:
return false; return false;
} }
template <typename C = CharT> template <typename C = CharT>
bool load_bytes(enable_if_t<sizeof(C) != 1, handle>) { return false; } bool load_bytes(enable_if_t<sizeof(C) != 1, handle>) { return false; }
#endif
}; };
// Type caster for C-style strings. We basically use a std::string type caster, but also add the // Type caster for C-style strings. We basically use a std::string type caster, but also add the
......
...@@ -554,6 +554,9 @@ def test_bytes_to_string(): ...@@ -554,6 +554,9 @@ def test_bytes_to_string():
assert string_length(byte("a\x00b")) == 3 assert string_length(byte("a\x00b")) == 3
assert strlen(byte("a\x00b")) == 1 # C-string limitation assert strlen(byte("a\x00b")) == 1 # C-string limitation
# passing in a utf8 encoded string should work
assert string_length(u'💩'.encode("utf8")) == 4
def test_builtins_cast_return_none(): def test_builtins_cast_return_none():
"""Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None""" """Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment