[Utils/Trivia] Handle smart quotes (#2162)

Adds a new filter function for substituting out smart-quotes.

Makes trivia use it.
This commit is contained in:
Michael H
2018-10-05 18:39:52 -04:00
committed by Toby Harradine
parent d79996aeea
commit 139329233a
3 changed files with 40 additions and 1 deletions

View File

@@ -8,6 +8,7 @@ __all__ = [
"filter_invites",
"filter_mass_mentions",
"filter_various_mentions",
"normalize_smartquotes",
]
# regexes
@@ -19,6 +20,16 @@ MASS_MENTION_RE = re.compile(r"(@)(?=everyone|here)") # This only matches the @
OTHER_MENTION_RE = re.compile(r"(<)(@[!&]?|#)(\d+>)")
SMART_QUOTE_REPLACEMENT_DICT = {
"\u2018": "'", # Left single quote
"\u2019": "'", # Right single quote
"\u201C": '"', # Left double quote
"\u201D": '"', # Right double quote
}
SMART_QUOTE_REPLACE_RE = re.compile("|".join(SMART_QUOTE_REPLACEMENT_DICT.keys()))
# convenience wrappers
def filter_urls(to_filter: str) -> str:
"""Get a string with URLs sanitized.
@@ -101,3 +112,24 @@ def filter_various_mentions(to_filter: str) -> str:
The sanitized string.
"""
return OTHER_MENTION_RE.sub(r"\1\\\2\3", to_filter)
def normalize_smartquotes(to_normalize: str) -> str:
"""
Get a string with smart quotes replaced with normal ones
Parameters
----------
to_normalize : str
The string to normalize.
Returns
-------
str
The normalized string.
"""
def replacement_for(obj):
return SMART_QUOTE_REPLACEMENT_DICT.get(obj.group(0), "")
return SMART_QUOTE_REPLACE_RE.sub(replacement_for, to_normalize)