Commit ae6d64de by Raphael Yancey Committed by GitHub

Normalize author name unicode before matching (#2006)

* Fix accented characters not being matched in author name

Fixes #2004

* Normalized the strings instead of modifying the pattern

* Applied isort & black
parent ab66bb9d
......@@ -5,6 +5,7 @@ import re
from contextlib import contextmanager
from typing import Union
from unicodedata import normalize
from warnings import warn
from poetry.semver import Version
......@@ -160,7 +161,7 @@ class Package(object):
if not self._authors:
return {"name": None, "email": None}
m = AUTHOR_REGEX.match(self._authors[0])
m = AUTHOR_REGEX.match(normalize("NFC", self._authors[0]))
name = m.group("name")
email = m.group("email")
......@@ -171,7 +172,7 @@ class Package(object):
if not self._maintainers:
return {"name": None, "email": None}
m = AUTHOR_REGEX.match(self._maintainers[0])
m = AUTHOR_REGEX.match(normalize("NFC", self._maintainers[0]))
name = m.group("name")
email = m.group("email")
......
......@@ -13,6 +13,18 @@ def test_package_authors():
assert package.author_name == "Sébastien Eustace"
assert package.author_email == "sebastien@eustace.io"
package.authors.insert(
0, "Raphaël Yancey <raphael@badfile.net>"
) # With combining diacritics (ë = e + ¨ = e\u0308)
assert package.author_name == "Raphaël Yancey" # Is normalized into \u00EB
assert package.author_email == "raphael@badfile.net"
package.authors.insert(
0, "Raphaël Yancey <raphael@badfile.net>"
) # Without (ë = \u00EB)
assert package.author_name == "Raphaël Yancey"
assert package.author_email == "raphael@badfile.net"
package.authors.insert(0, "John Doe")
assert package.author_name == "John Doe"
assert package.author_email is None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment