Commit ae6d64de by Raphael Yancey Committed by GitHub

Normalize author name unicode before matching (#2006)

* Fix accented characters not being matched in author name

Fixes #2004

* Normalized the strings instead of modifying the pattern

* Applied isort & black
parent ab66bb9d
...@@ -5,6 +5,7 @@ import re ...@@ -5,6 +5,7 @@ import re
from contextlib import contextmanager from contextlib import contextmanager
from typing import Union from typing import Union
from unicodedata import normalize
from warnings import warn from warnings import warn
from poetry.semver import Version from poetry.semver import Version
...@@ -160,7 +161,7 @@ class Package(object): ...@@ -160,7 +161,7 @@ class Package(object):
if not self._authors: if not self._authors:
return {"name": None, "email": None} return {"name": None, "email": None}
m = AUTHOR_REGEX.match(self._authors[0]) m = AUTHOR_REGEX.match(normalize("NFC", self._authors[0]))
name = m.group("name") name = m.group("name")
email = m.group("email") email = m.group("email")
...@@ -171,7 +172,7 @@ class Package(object): ...@@ -171,7 +172,7 @@ class Package(object):
if not self._maintainers: if not self._maintainers:
return {"name": None, "email": None} return {"name": None, "email": None}
m = AUTHOR_REGEX.match(self._maintainers[0]) m = AUTHOR_REGEX.match(normalize("NFC", self._maintainers[0]))
name = m.group("name") name = m.group("name")
email = m.group("email") email = m.group("email")
......
...@@ -13,6 +13,18 @@ def test_package_authors(): ...@@ -13,6 +13,18 @@ def test_package_authors():
assert package.author_name == "Sébastien Eustace" assert package.author_name == "Sébastien Eustace"
assert package.author_email == "sebastien@eustace.io" assert package.author_email == "sebastien@eustace.io"
package.authors.insert(
0, "Raphaël Yancey <raphael@badfile.net>"
) # With combining diacritics (ë = e + ¨ = e\u0308)
assert package.author_name == "Raphaël Yancey" # Is normalized into \u00EB
assert package.author_email == "raphael@badfile.net"
package.authors.insert(
0, "Raphaël Yancey <raphael@badfile.net>"
) # Without (ë = \u00EB)
assert package.author_name == "Raphaël Yancey"
assert package.author_email == "raphael@badfile.net"
package.authors.insert(0, "John Doe") package.authors.insert(0, "John Doe")
assert package.author_name == "John Doe" assert package.author_name == "John Doe"
assert package.author_email is None assert package.author_email is None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment