Meta PR for Google Patches

parent 02746cb6
...@@ -1623,6 +1623,14 @@ struct pyobject_caster { ...@@ -1623,6 +1623,14 @@ struct pyobject_caster {
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0> template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
bool load(handle src, bool /* convert */) { bool load(handle src, bool /* convert */) {
#if defined(PYBIND11_STR_NON_PERMISSIVE) && !defined(PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE)
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
if (!str_from_bytes) throw error_already_set();
value = reinterpret_steal<type>(str_from_bytes);
return true;
}
#endif
if (!isinstance<type>(src)) if (!isinstance<type>(src))
return false; return false;
value = reinterpret_borrow<type>(src); value = reinterpret_borrow<type>(src);
......
...@@ -161,6 +161,18 @@ ...@@ -161,6 +161,18 @@
#include <typeindex> #include <typeindex>
#include <type_traits> #include <type_traits>
#define PYBIND11_STR_NON_PERMISSIVE
// If UNDEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
// (probably surprising, but this is the legacy behavior). As a side-effect,
// pybind11::isinstance<str>() is true for both pybind11::str and pybind11::bytes.
// If DEFINED, pybind11::str can only hold PyUnicodeObject, and
// pybind11::isinstance<str>() is true only for pybind11::str.
//#define PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
// This macro has an effect only if PYBIND11_STR_NON_PERMISSIVE is defined.
// If UNDEFINED, the pybind11::str caster will implicitly decode bytes to PyUnicodeObject.
// If DEFINED, the pybind11::str caster will only accept PyUnicodeObject.
#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions #if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr) #define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)
#define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check #define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check
......
...@@ -754,7 +754,12 @@ inline bool PyIterable_Check(PyObject *obj) { ...@@ -754,7 +754,12 @@ inline bool PyIterable_Check(PyObject *obj) {
inline bool PyNone_Check(PyObject *o) { return o == Py_None; } inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; } inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
#ifdef PYBIND11_STR_NON_PERMISSIVE
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
#else
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); } inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
#endif
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; } inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
...@@ -934,7 +939,7 @@ class bytes; ...@@ -934,7 +939,7 @@ class bytes;
class str : public object { class str : public object {
public: public:
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str) PYBIND11_OBJECT_CVT(str, object, PYBIND11_STR_CHECK_FUN, raw_str)
str(const char *c, size_t n) str(const char *c, size_t n)
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) { : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {
......
...@@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster { ...@@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
using value_conv = make_caster<Value>; using value_conv = make_caster<Value>;
bool load(handle src, bool convert) { bool load(handle src, bool convert) {
if (!isinstance<sequence>(src) || isinstance<str>(src)) if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
return false; return false;
auto s = reinterpret_borrow<sequence>(src); auto s = reinterpret_borrow<sequence>(src);
value.clear(); value.clear();
......
...@@ -22,6 +22,8 @@ def test_evals(capture): ...@@ -22,6 +22,8 @@ def test_evals(capture):
@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError) @pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
def test_eval_file(): def test_eval_file():
filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py") filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
if env.PY2:
filename = filename.decode('utf-8')
assert m.test_eval_file(filename) assert m.test_eval_file(filename)
assert m.test_eval_file_failure() assert m.test_eval_file_failure()
......
...@@ -68,7 +68,7 @@ def test_python_alreadyset_in_destructor(monkeypatch, capsys): ...@@ -68,7 +68,7 @@ def test_python_alreadyset_in_destructor(monkeypatch, capsys):
# Use monkeypatch so pytest can apply and remove the patch as appropriate # Use monkeypatch so pytest can apply and remove the patch as appropriate
monkeypatch.setattr(sys, "unraisablehook", hook) monkeypatch.setattr(sys, "unraisablehook", hook)
assert m.python_alreadyset_in_destructor("already_set demo") is True assert m.python_alreadyset_in_destructor(u"already_set demo") is True
if hooked: if hooked:
assert triggered[0] is True assert triggered[0] is True
......
...@@ -410,4 +410,18 @@ TEST_SUBMODULE(pytypes, m) { ...@@ -410,4 +410,18 @@ TEST_SUBMODULE(pytypes, m) {
// test_builtin_functions // test_builtin_functions
m.def("get_len", [](py::handle h) { return py::len(h); }); m.def("get_len", [](py::handle h) { return py::len(h); });
#ifdef PYBIND11_STR_NON_PERMISSIVE
m.attr("has_str_non_permissive") = true;
#endif
#ifdef PYBIND11_STR_CASTER_NO_IMPLICIT_DECODE
m.attr("has_str_caster_no_implicit_decode") = true;
#endif
m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });
m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance<py::str>(o); });
m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); });
m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); });
m.def("pass_to_std_string", [](std::string s) { return s.size(); });
} }
...@@ -120,14 +120,19 @@ def test_str(doc): ...@@ -120,14 +120,19 @@ def test_str(doc):
assert s1 == s2 assert s1 == s2
malformed_utf8 = b"\x80" malformed_utf8 = b"\x80"
assert m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
if env.PY2: if env.PY2:
# with pytest.raises(UnicodeDecodeError): if hasattr(m, "has_str_non_permissive"):
# m.str_from_object(malformed_utf8) with pytest.raises(UnicodeDecodeError):
m.str_from_object(malformed_utf8)
else:
m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
with pytest.raises(UnicodeDecodeError): with pytest.raises(UnicodeDecodeError):
m.str_from_handle(malformed_utf8) m.str_from_handle(malformed_utf8)
else: else:
# assert m.str_from_object(malformed_utf8) == "b'\\x80'" if hasattr(m, "has_str_non_permissive"):
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
else:
assert m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
assert m.str_from_handle(malformed_utf8) == "b'\\x80'" assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
...@@ -301,6 +306,7 @@ def test_pybind11_str_raw_str(): ...@@ -301,6 +306,7 @@ def test_pybind11_str_raw_str():
valid_orig = u"DZ" valid_orig = u"DZ"
valid_utf8 = valid_orig.encode("utf-8") valid_utf8 = valid_orig.encode("utf-8")
valid_cvt = cvt(valid_utf8) valid_cvt = cvt(valid_utf8)
<<<<<<< HEAD
assert type(valid_cvt) == bytes # Probably surprising. assert type(valid_cvt) == bytes # Probably surprising.
assert valid_cvt == b"\xc7\xb1" assert valid_cvt == b"\xc7\xb1"
...@@ -308,6 +314,28 @@ def test_pybind11_str_raw_str(): ...@@ -308,6 +314,28 @@ def test_pybind11_str_raw_str():
malformed_cvt = cvt(malformed_utf8) malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) == bytes # Probably surprising. assert type(malformed_cvt) == bytes # Probably surprising.
assert malformed_cvt == b"\x80" assert malformed_cvt == b"\x80"
=======
if hasattr(m, "has_str_non_permissive"):
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
if env.PY2:
assert valid_cvt == valid_orig
else:
assert valid_cvt == u"b'\\xc7\\xb1'"
else:
assert valid_cvt is valid_utf8
malformed_utf8 = b'\x80'
if hasattr(m, "has_str_non_permissive"):
if env.PY2:
with pytest.raises(UnicodeDecodeError):
cvt(malformed_utf8)
else:
malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) is unicode if env.PY2 else str # noqa: F821
assert malformed_cvt == u"b'\\x80'"
else:
assert cvt(malformed_utf8) is malformed_utf8
>>>>>>> 58c1719 (Meta PR for Google Patches)
def test_implicit_casting(): def test_implicit_casting():
...@@ -486,3 +514,41 @@ def test_builtin_functions(): ...@@ -486,3 +514,41 @@ def test_builtin_functions():
"object of type 'generator' has no len()", "object of type 'generator' has no len()",
"'generator' has no length", "'generator' has no length",
] # PyPy ] # PyPy
def test_isinstance_string_types():
assert m.isinstance_pybind11_bytes(b"")
assert not m.isinstance_pybind11_bytes(u"")
assert m.isinstance_pybind11_str(u"")
if hasattr(m, "has_str_non_permissive"):
assert not m.isinstance_pybind11_str(b"")
else:
assert m.isinstance_pybind11_str(b"")
def test_pass_bytes_or_unicode_to_string_types():
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
with pytest.raises(TypeError):
m.pass_to_pybind11_bytes(u"Str")
if hasattr(m, "has_str_caster_no_implicit_decode"):
with pytest.raises(TypeError):
m.pass_to_pybind11_str(b"Bytes")
else:
assert m.pass_to_pybind11_str(b"Bytes") == 5
assert m.pass_to_pybind11_str(u"Str") == 3
assert m.pass_to_std_string(b"Bytes") == 5
assert m.pass_to_std_string(u"Str") == 3
malformed_utf8 = b"\x80"
if hasattr(m, "has_str_non_permissive"):
if hasattr(m, "has_str_caster_no_implicit_decode"):
with pytest.raises(TypeError):
m.pass_to_pybind11_str(malformed_utf8)
else:
with pytest.raises(UnicodeDecodeError):
m.pass_to_pybind11_str(malformed_utf8)
else:
assert m.pass_to_pybind11_str(malformed_utf8) == 1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment