Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
pybind11
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
pybind11
Commits
6e39b765
Commit
6e39b765
authored
Dec 19, 2019
by
Vemund Handeland
Committed by
Wenzel Jakob
Dec 19, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add C++20 char8_t/u8string support (#2026)
* Fix test build in C++20 * Add C++20 char8_t/u8string support
parent
37d04abd
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
72 additions
and
5 deletions
+72
-5
include/pybind11/cast.h
+13
-3
tests/test_builtin_casters.cpp
+20
-2
tests/test_builtin_casters.py
+39
-0
No files found.
include/pybind11/cast.h
View file @
6e39b765
...
@@ -32,6 +32,10 @@
...
@@ -32,6 +32,10 @@
#include <string_view>
#include <string_view>
#endif
#endif
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif
NAMESPACE_BEGIN
(
PYBIND11_NAMESPACE
)
NAMESPACE_BEGIN
(
PYBIND11_NAMESPACE
)
NAMESPACE_BEGIN
(
detail
)
NAMESPACE_BEGIN
(
detail
)
...
@@ -988,6 +992,9 @@ public:
...
@@ -988,6 +992,9 @@ public:
template
<
typename
CharT
>
using
is_std_char_type
=
any_of
<
template
<
typename
CharT
>
using
is_std_char_type
=
any_of
<
std
::
is_same
<
CharT
,
char
>
,
/* std::string */
std
::
is_same
<
CharT
,
char
>
,
/* std::string */
#if defined(PYBIND11_HAS_U8STRING)
std
::
is_same
<
CharT
,
char8_t
>
,
/* std::u8string */
#endif
std
::
is_same
<
CharT
,
char16_t
>
,
/* std::u16string */
std
::
is_same
<
CharT
,
char16_t
>
,
/* std::u16string */
std
::
is_same
<
CharT
,
char32_t
>
,
/* std::u32string */
std
::
is_same
<
CharT
,
char32_t
>
,
/* std::u32string */
std
::
is_same
<
CharT
,
wchar_t
>
/* std::wstring */
std
::
is_same
<
CharT
,
wchar_t
>
/* std::wstring */
...
@@ -1191,6 +1198,9 @@ template <typename StringType, bool IsView = false> struct string_caster {
...
@@ -1191,6 +1198,9 @@ template <typename StringType, bool IsView = false> struct string_caster {
// Simplify life by being able to assume standard char sizes (the standard only guarantees
// Simplify life by being able to assume standard char sizes (the standard only guarantees
// minimums, but Python requires exact sizes)
// minimums, but Python requires exact sizes)
static_assert
(
!
std
::
is_same
<
CharT
,
char
>::
value
||
sizeof
(
CharT
)
==
1
,
"Unsupported char size != 1"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char
>::
value
||
sizeof
(
CharT
)
==
1
,
"Unsupported char size != 1"
);
#if defined(PYBIND11_HAS_U8STRING)
static_assert
(
!
std
::
is_same
<
CharT
,
char8_t
>::
value
||
sizeof
(
CharT
)
==
1
,
"Unsupported char8_t size != 1"
);
#endif
static_assert
(
!
std
::
is_same
<
CharT
,
char16_t
>::
value
||
sizeof
(
CharT
)
==
2
,
"Unsupported char16_t size != 2"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char16_t
>::
value
||
sizeof
(
CharT
)
==
2
,
"Unsupported char16_t size != 2"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char32_t
>::
value
||
sizeof
(
CharT
)
==
4
,
"Unsupported char32_t size != 4"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char32_t
>::
value
||
sizeof
(
CharT
)
==
4
,
"Unsupported char32_t size != 4"
);
// wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
// wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
...
@@ -1209,7 +1219,7 @@ template <typename StringType, bool IsView = false> struct string_caster {
...
@@ -1209,7 +1219,7 @@ template <typename StringType, bool IsView = false> struct string_caster {
#if PY_MAJOR_VERSION >= 3
#if PY_MAJOR_VERSION >= 3
return
load_bytes
(
load_src
);
return
load_bytes
(
load_src
);
#else
#else
if
(
s
izeof
(
CharT
)
==
1
)
{
if
(
s
td
::
is_same
<
CharT
,
char
>::
value
)
{
return
load_bytes
(
load_src
);
return
load_bytes
(
load_src
);
}
}
...
@@ -1269,7 +1279,7 @@ private:
...
@@ -1269,7 +1279,7 @@ private:
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
// which supports loading a unicode from a str, doesn't take this path.
// which supports loading a unicode from a str, doesn't take this path.
template
<
typename
C
=
CharT
>
template
<
typename
C
=
CharT
>
bool
load_bytes
(
enable_if_t
<
s
izeof
(
C
)
==
1
,
handle
>
src
)
{
bool
load_bytes
(
enable_if_t
<
s
td
::
is_same
<
C
,
char
>::
value
,
handle
>
src
)
{
if
(
PYBIND11_BYTES_CHECK
(
src
.
ptr
()))
{
if
(
PYBIND11_BYTES_CHECK
(
src
.
ptr
()))
{
// We were passed a Python 3 raw bytes; accept it into a std::string or char*
// We were passed a Python 3 raw bytes; accept it into a std::string or char*
// without any encoding attempt.
// without any encoding attempt.
...
@@ -1284,7 +1294,7 @@ private:
...
@@ -1284,7 +1294,7 @@ private:
}
}
template
<
typename
C
=
CharT
>
template
<
typename
C
=
CharT
>
bool
load_bytes
(
enable_if_t
<
sizeof
(
C
)
!=
1
,
handle
>
)
{
return
false
;
}
bool
load_bytes
(
enable_if_t
<
!
std
::
is_same
<
C
,
char
>::
value
,
handle
>
)
{
return
false
;
}
};
};
template
<
typename
CharT
,
class
Traits
,
class
Allocator
>
template
<
typename
CharT
,
class
Traits
,
class
Allocator
>
...
...
tests/test_builtin_casters.cpp
View file @
6e39b765
...
@@ -30,7 +30,7 @@ TEST_SUBMODULE(builtin_casters, m) {
...
@@ -30,7 +30,7 @@ TEST_SUBMODULE(builtin_casters, m) {
else
{
wstr
.
push_back
((
wchar_t
)
mathbfA32
);
}
// 𝐀, utf32
else
{
wstr
.
push_back
((
wchar_t
)
mathbfA32
);
}
// 𝐀, utf32
wstr
.
push_back
(
0x7a
);
// z
wstr
.
push_back
(
0x7a
);
// z
m
.
def
(
"good_utf8_string"
,
[]()
{
return
std
::
string
(
u8"Say utf8\u203d \U0001f382 \U0001d400"
);
});
// Say utf8‽ 🎂 𝐀
m
.
def
(
"good_utf8_string"
,
[]()
{
return
std
::
string
(
(
const
char
*
)
u8"Say utf8\u203d \U0001f382 \U0001d400"
);
});
// Say utf8‽ 🎂 𝐀
m
.
def
(
"good_utf16_string"
,
[
=
]()
{
return
std
::
u16string
({
b16
,
ib16
,
cake16_1
,
cake16_2
,
mathbfA16_1
,
mathbfA16_2
,
z16
});
});
// b‽🎂𝐀z
m
.
def
(
"good_utf16_string"
,
[
=
]()
{
return
std
::
u16string
({
b16
,
ib16
,
cake16_1
,
cake16_2
,
mathbfA16_1
,
mathbfA16_2
,
z16
});
});
// b‽🎂𝐀z
m
.
def
(
"good_utf32_string"
,
[
=
]()
{
return
std
::
u32string
({
a32
,
mathbfA32
,
cake32
,
ib32
,
z32
});
});
// a𝐀🎂‽z
m
.
def
(
"good_utf32_string"
,
[
=
]()
{
return
std
::
u32string
({
a32
,
mathbfA32
,
cake32
,
ib32
,
z32
});
});
// a𝐀🎂‽z
m
.
def
(
"good_wchar_string"
,
[
=
]()
{
return
wstr
;
});
// a‽𝐀z
m
.
def
(
"good_wchar_string"
,
[
=
]()
{
return
wstr
;
});
// a‽𝐀z
...
@@ -60,6 +60,18 @@ TEST_SUBMODULE(builtin_casters, m) {
...
@@ -60,6 +60,18 @@ TEST_SUBMODULE(builtin_casters, m) {
m
.
def
(
"strlen"
,
[](
char
*
s
)
{
return
strlen
(
s
);
});
m
.
def
(
"strlen"
,
[](
char
*
s
)
{
return
strlen
(
s
);
});
m
.
def
(
"string_length"
,
[](
std
::
string
s
)
{
return
s
.
length
();
});
m
.
def
(
"string_length"
,
[](
std
::
string
s
)
{
return
s
.
length
();
});
#ifdef PYBIND11_HAS_U8STRING
m
.
attr
(
"has_u8string"
)
=
true
;
m
.
def
(
"good_utf8_u8string"
,
[]()
{
return
std
::
u8string
(
u8"Say utf8\u203d \U0001f382 \U0001d400"
);
});
// Say utf8‽ 🎂 𝐀
m
.
def
(
"bad_utf8_u8string"
,
[]()
{
return
std
::
u8string
((
const
char8_t
*
)
"abc
\xd0
"
"def"
);
});
m
.
def
(
"u8_char8_Z"
,
[]()
->
char8_t
{
return
u8'Z'
;
});
// test_single_char_arguments
m
.
def
(
"ord_char8"
,
[](
char8_t
c
)
->
int
{
return
static_cast
<
unsigned
char
>
(
c
);
});
m
.
def
(
"ord_char8_lv"
,
[](
char8_t
&
c
)
->
int
{
return
static_cast
<
unsigned
char
>
(
c
);
});
#endif
// test_string_view
// test_string_view
#ifdef PYBIND11_HAS_STRING_VIEW
#ifdef PYBIND11_HAS_STRING_VIEW
m
.
attr
(
"has_string_view"
)
=
true
;
m
.
attr
(
"has_string_view"
)
=
true
;
...
@@ -69,9 +81,15 @@ TEST_SUBMODULE(builtin_casters, m) {
...
@@ -69,9 +81,15 @@ TEST_SUBMODULE(builtin_casters, m) {
m
.
def
(
"string_view_chars"
,
[](
std
::
string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
std
::
uint8_t
)
c
);
return
l
;
});
m
.
def
(
"string_view_chars"
,
[](
std
::
string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
std
::
uint8_t
)
c
);
return
l
;
});
m
.
def
(
"string_view16_chars"
,
[](
std
::
u16string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view16_chars"
,
[](
std
::
u16string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view32_chars"
,
[](
std
::
u32string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view32_chars"
,
[](
std
::
u32string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view_return"
,
[]()
{
return
std
::
string_view
(
u8"utf8 secret \U0001f382"
);
});
m
.
def
(
"string_view_return"
,
[]()
{
return
std
::
string_view
(
(
const
char
*
)
u8"utf8 secret \U0001f382"
);
});
m
.
def
(
"string_view16_return"
,
[]()
{
return
std
::
u16string_view
(
u"utf16 secret \U0001f382"
);
});
m
.
def
(
"string_view16_return"
,
[]()
{
return
std
::
u16string_view
(
u"utf16 secret \U0001f382"
);
});
m
.
def
(
"string_view32_return"
,
[]()
{
return
std
::
u32string_view
(
U"utf32 secret \U0001f382"
);
});
m
.
def
(
"string_view32_return"
,
[]()
{
return
std
::
u32string_view
(
U"utf32 secret \U0001f382"
);
});
# ifdef PYBIND11_HAS_U8STRING
m
.
def
(
"string_view8_print"
,
[](
std
::
u8string_view
s
)
{
py
::
print
(
s
,
s
.
size
());
});
m
.
def
(
"string_view8_chars"
,
[](
std
::
u8string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
std
::
uint8_t
)
c
);
return
l
;
});
m
.
def
(
"string_view8_return"
,
[]()
{
return
std
::
u8string_view
(
u8"utf8 secret \U0001f382"
);
});
# endif
#endif
#endif
// test_integer_casting
// test_integer_casting
...
...
tests/test_builtin_casters.py
View file @
6e39b765
...
@@ -15,6 +15,8 @@ def test_unicode_conversion():
...
@@ -15,6 +15,8 @@ def test_unicode_conversion():
assert
m
.
good_utf16_string
()
==
u"b‽🎂𝐀z"
assert
m
.
good_utf16_string
()
==
u"b‽🎂𝐀z"
assert
m
.
good_utf32_string
()
==
u"a𝐀🎂‽z"
assert
m
.
good_utf32_string
()
==
u"a𝐀🎂‽z"
assert
m
.
good_wchar_string
()
==
u"a⸘𝐀z"
assert
m
.
good_wchar_string
()
==
u"a⸘𝐀z"
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
good_utf8_u8string
()
==
u"Say utf8‽ 🎂 𝐀"
with
pytest
.
raises
(
UnicodeDecodeError
):
with
pytest
.
raises
(
UnicodeDecodeError
):
m
.
bad_utf8_string
()
m
.
bad_utf8_string
()
...
@@ -29,12 +31,17 @@ def test_unicode_conversion():
...
@@ -29,12 +31,17 @@ def test_unicode_conversion():
if
hasattr
(
m
,
"bad_wchar_string"
):
if
hasattr
(
m
,
"bad_wchar_string"
):
with
pytest
.
raises
(
UnicodeDecodeError
):
with
pytest
.
raises
(
UnicodeDecodeError
):
m
.
bad_wchar_string
()
m
.
bad_wchar_string
()
if
hasattr
(
m
,
"has_u8string"
):
with
pytest
.
raises
(
UnicodeDecodeError
):
m
.
bad_utf8_u8string
()
assert
m
.
u8_Z
()
==
'Z'
assert
m
.
u8_Z
()
==
'Z'
assert
m
.
u8_eacute
()
==
u'é'
assert
m
.
u8_eacute
()
==
u'é'
assert
m
.
u16_ibang
()
==
u'‽'
assert
m
.
u16_ibang
()
==
u'‽'
assert
m
.
u32_mathbfA
()
==
u'𝐀'
assert
m
.
u32_mathbfA
()
==
u'𝐀'
assert
m
.
wchar_heart
()
==
u'♥'
assert
m
.
wchar_heart
()
==
u'♥'
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
u8_char8_Z
()
==
'Z'
def
test_single_char_arguments
():
def
test_single_char_arguments
():
...
@@ -92,6 +99,17 @@ def test_single_char_arguments():
...
@@ -92,6 +99,17 @@ def test_single_char_arguments():
assert
m
.
ord_wchar
(
u'aa'
)
assert
m
.
ord_wchar
(
u'aa'
)
assert
str
(
excinfo
.
value
)
==
toolong_message
assert
str
(
excinfo
.
value
)
==
toolong_message
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
ord_char8
(
u'a'
)
==
0x61
# simple ASCII
assert
m
.
ord_char8_lv
(
u'b'
)
==
0x62
assert
m
.
ord_char8
(
u'é'
)
==
0xE9
# requires 2 bytes in utf-8, but can be stuffed in a char
with
pytest
.
raises
(
ValueError
)
as
excinfo
:
assert
m
.
ord_char8
(
u'Ā'
)
==
0x100
# requires 2 bytes, doesn't fit in a char
assert
str
(
excinfo
.
value
)
==
toobig_message
(
0x100
)
with
pytest
.
raises
(
ValueError
)
as
excinfo
:
assert
m
.
ord_char8
(
u'ab'
)
assert
str
(
excinfo
.
value
)
==
toolong_message
def
test_bytes_to_string
():
def
test_bytes_to_string
():
"""Tests the ability to pass bytes to C++ string-accepting functions. Note that this is
"""Tests the ability to pass bytes to C++ string-accepting functions. Note that this is
...
@@ -116,10 +134,15 @@ def test_string_view(capture):
...
@@ -116,10 +134,15 @@ def test_string_view(capture):
assert
m
.
string_view_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xf0
,
0x9f
,
0x8e
,
0x82
]
assert
m
.
string_view_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xf0
,
0x9f
,
0x8e
,
0x82
]
assert
m
.
string_view16_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xd83c
,
0xdf82
]
assert
m
.
string_view16_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xd83c
,
0xdf82
]
assert
m
.
string_view32_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
127874
]
assert
m
.
string_view32_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
127874
]
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
string_view8_chars
(
"Hi"
)
==
[
72
,
105
]
assert
m
.
string_view8_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xf0
,
0x9f
,
0x8e
,
0x82
]
assert
m
.
string_view_return
()
==
"utf8 secret 🎂"
assert
m
.
string_view_return
()
==
"utf8 secret 🎂"
assert
m
.
string_view16_return
()
==
"utf16 secret 🎂"
assert
m
.
string_view16_return
()
==
"utf16 secret 🎂"
assert
m
.
string_view32_return
()
==
"utf32 secret 🎂"
assert
m
.
string_view32_return
()
==
"utf32 secret 🎂"
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
string_view8_return
()
==
"utf8 secret 🎂"
with
capture
:
with
capture
:
m
.
string_view_print
(
"Hi"
)
m
.
string_view_print
(
"Hi"
)
...
@@ -132,6 +155,14 @@ def test_string_view(capture):
...
@@ -132,6 +155,14 @@ def test_string_view(capture):
utf16 🎂 8
utf16 🎂 8
utf32 🎂 7
utf32 🎂 7
"""
"""
if
hasattr
(
m
,
"has_u8string"
):
with
capture
:
m
.
string_view8_print
(
"Hi"
)
m
.
string_view8_print
(
"utf8 🎂"
)
assert
capture
==
"""
Hi 2
utf8 🎂 9
"""
with
capture
:
with
capture
:
m
.
string_view_print
(
"Hi, ascii"
)
m
.
string_view_print
(
"Hi, ascii"
)
...
@@ -144,6 +175,14 @@ def test_string_view(capture):
...
@@ -144,6 +175,14 @@ def test_string_view(capture):
Hi, utf16 🎂 12
Hi, utf16 🎂 12
Hi, utf32 🎂 11
Hi, utf32 🎂 11
"""
"""
if
hasattr
(
m
,
"has_u8string"
):
with
capture
:
m
.
string_view8_print
(
"Hi, ascii"
)
m
.
string_view8_print
(
"Hi, utf8 🎂"
)
assert
capture
==
"""
Hi, ascii 9
Hi, utf8 🎂 13
"""
def
test_integer_casting
():
def
test_integer_casting
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment