[Filter] Performance increases. (#2509)

* [Filter] Performance increases.

The filter was already using re to split words, this just does the entire search in re instead.

A further improvement to this would cache patterns used and update them if the wordlist changes.

* Add a pattern cache

* exit on no-words

* formatting pass

* keep the return type consistent, even though this doesnt break core since this is available to cogs

* ...

* Quit being an idiot

* Slight further improvements, a fix, and restructure

Moved actual set creation out of the inner portion.
Reduced config lookups in case of no filter.
Fixed channel wordlist fetching.

* I really should go back to using a pre-commit hook for the style stuff
This commit is contained in:
Michael H 2019-04-02 22:42:13 -04:00 committed by Will
parent e08e95c04e
commit 8ab39512d9

View File

@ -7,7 +7,6 @@ from redbot.core.bot import Red
from redbot.core.i18n import Translator, cog_i18n from redbot.core.i18n import Translator, cog_i18n
from redbot.core.utils.chat_formatting import pagify from redbot.core.utils.chat_formatting import pagify
RE_WORD_SPLIT = re.compile(r"[^\w]")
_ = Translator("Filter", __file__) _ = Translator("Filter", __file__)
@ -32,6 +31,7 @@ class Filter(commands.Cog):
self.settings.register_member(**default_member_settings) self.settings.register_member(**default_member_settings)
self.settings.register_channel(**default_channel_settings) self.settings.register_channel(**default_channel_settings)
self.register_task = self.bot.loop.create_task(self.register_filterban()) self.register_task = self.bot.loop.create_task(self.register_filterban())
self.pattern_cache = {}
def __unload(self): def __unload(self):
self.register_task.cancel() self.register_task.cancel()
@ -165,6 +165,7 @@ class Filter(commands.Cog):
tmp += word + " " tmp += word + " "
added = await self.add_to_filter(channel, word_list) added = await self.add_to_filter(channel, word_list)
if added: if added:
self.invalidate_cache(ctx.guild, ctx.channel)
await ctx.send(_("Words added to filter.")) await ctx.send(_("Words added to filter."))
else: else:
await ctx.send(_("Words already in the filter.")) await ctx.send(_("Words already in the filter."))
@ -198,6 +199,7 @@ class Filter(commands.Cog):
removed = await self.remove_from_filter(channel, word_list) removed = await self.remove_from_filter(channel, word_list)
if removed: if removed:
await ctx.send(_("Words removed from filter.")) await ctx.send(_("Words removed from filter."))
self.invalidate_cache(ctx.guild, ctx.channel)
else: else:
await ctx.send(_("Those words weren't in the filter.")) await ctx.send(_("Those words weren't in the filter."))
@ -229,6 +231,7 @@ class Filter(commands.Cog):
tmp += word + " " tmp += word + " "
added = await self.add_to_filter(server, word_list) added = await self.add_to_filter(server, word_list)
if added: if added:
self.invalidate_cache(ctx.guild)
await ctx.send(_("Words successfully added to filter.")) await ctx.send(_("Words successfully added to filter."))
else: else:
await ctx.send(_("Those words were already in the filter.")) await ctx.send(_("Those words were already in the filter."))
@ -261,6 +264,7 @@ class Filter(commands.Cog):
tmp += word + " " tmp += word + " "
removed = await self.remove_from_filter(server, word_list) removed = await self.remove_from_filter(server, word_list)
if removed: if removed:
self.invalidate_cache(ctx.guild)
await ctx.send(_("Words successfully removed from filter.")) await ctx.send(_("Words successfully removed from filter."))
else: else:
await ctx.send(_("Those words weren't in the filter.")) await ctx.send(_("Those words weren't in the filter."))
@ -279,6 +283,10 @@ class Filter(commands.Cog):
else: else:
await ctx.send(_("Names and nicknames will now be filtered.")) await ctx.send(_("Names and nicknames will now be filtered."))
def invalidate_cache(self, guild: discord.Guild, channel: discord.TextChannel = None):
""" Invalidate a cached pattern"""
self.pattern_cache.pop((guild, channel), None)
async def add_to_filter( async def add_to_filter(
self, server_or_channel: Union[discord.Guild, discord.TextChannel], words: list self, server_or_channel: Union[discord.Guild, discord.TextChannel], words: list
) -> bool: ) -> bool:
@ -322,24 +330,34 @@ class Filter(commands.Cog):
async def filter_hits( async def filter_hits(
self, text: str, server_or_channel: Union[discord.Guild, discord.TextChannel] self, text: str, server_or_channel: Union[discord.Guild, discord.TextChannel]
) -> Set[str]: ) -> Set[str]:
if isinstance(server_or_channel, discord.Guild):
word_list = set(await self.settings.guild(server_or_channel).filter()) try:
elif isinstance(server_or_channel, discord.TextChannel): guild = server_or_channel.guild
word_list = set( channel = server_or_channel
await self.settings.guild(server_or_channel.guild).filter() except AttributeError:
+ await self.settings.channel(server_or_channel).filter() guild = server_or_channel
channel = None
hits: Set[str] = set()
try:
pattern = self.pattern_cache[(guild, channel)]
except KeyError:
word_list = set(await self.settings.guild(guild).filter())
if channel:
word_list |= set(await self.settings.channel(channel).filter())
if word_list:
pattern = re.compile(
"|".join(rf"\b{re.escape(w)}\b" for w in word_list), flags=re.I
) )
else: else:
raise TypeError("%r should be Guild or TextChannel" % server_or_channel) pattern = None
content = text.lower() self.pattern_cache[(guild, channel)] = pattern
msg_words = set(RE_WORD_SPLIT.split(content))
filtered_phrases = {x for x in word_list if len(RE_WORD_SPLIT.split(x)) > 1} if pattern:
filtered_words = word_list - filtered_phrases hits |= set(pattern.findall(text))
hits = {p for p in filtered_phrases if p in content}
hits |= filtered_words & msg_words
return hits return hits
async def check_filter(self, message: discord.Message): async def check_filter(self, message: discord.Message):