From 533f036ed25f2222d554111c83fe697277c34a0a Mon Sep 17 00:00:00 2001 From: Jakub Kuczys Date: Thu, 13 Apr 2023 20:52:54 +0200 Subject: [PATCH] Improve performance of pagify() (#5698) Co-authored-by: jack1142 <6032823+jack1142@users.noreply.github.com> --- docs/framework_utils.rst | 4 + redbot/core/utils/chat_formatting.py | 116 +++++++++++++++++-------- tests/core/test_utils.py | 121 +++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 35 deletions(-) diff --git a/docs/framework_utils.rst b/docs/framework_utils.rst index 9d719a795..dc7668716 100644 --- a/docs/framework_utils.rst +++ b/docs/framework_utils.rst @@ -27,6 +27,10 @@ Chat Formatting .. automodule:: redbot.core.utils.chat_formatting :members: + :exclude-members: pagify + + .. autofunction:: pagify(text, delims=('\n',), *, priority=False, escape_mass_mentions=True, shorten_by=8, page_length=2000) + :for: Embed Helpers ============= diff --git a/redbot/core/utils/chat_formatting.py b/redbot/core/utils/chat_formatting.py index a36fb761d..a62867555 100644 --- a/redbot/core/utils/chat_formatting.py +++ b/redbot/core/utils/chat_formatting.py @@ -1,5 +1,8 @@ +from __future__ import annotations + import datetime import itertools +import math import textwrap from io import BytesIO from typing import Iterator, List, Optional, Sequence, SupportsInt, Union @@ -200,17 +203,11 @@ def spoiler(text: str, escape_formatting: bool = True) -> str: return f"||{escape(text, formatting=escape_formatting)}||" -def pagify( - text: str, - delims: Sequence[str] = ["\n"], - *, - priority: bool = False, - escape_mass_mentions: bool = True, - shorten_by: int = 8, - page_length: int = 2000, -) -> Iterator[str]: +class pagify(Iterator[str]): """Generate multiple pages from the given text. + The returned iterator supports length estimation with :func:`operator.length_hint()`. + Note ---- This does not respect code blocks or inline code. @@ -244,33 +241,82 @@ def pagify( Pages of the given text. """ - in_text = text - page_length -= shorten_by - while len(in_text) > page_length: - this_page_len = page_length - if escape_mass_mentions: - this_page_len -= in_text.count("@here", 0, page_length) + in_text.count( - "@everyone", 0, page_length - ) - closest_delim = (in_text.rfind(d, 1, this_page_len) for d in delims) - if priority: - closest_delim = next((x for x in closest_delim if x > 0), -1) - else: - closest_delim = max(closest_delim) - closest_delim = closest_delim if closest_delim != -1 else this_page_len - if escape_mass_mentions: - to_send = escape(in_text[:closest_delim], mass_mentions=True) - else: - to_send = in_text[:closest_delim] - if len(to_send.strip()) > 0: - yield to_send - in_text = in_text[closest_delim:] - if len(in_text.strip()) > 0: - if escape_mass_mentions: - yield escape(in_text, mass_mentions=True) - else: - yield in_text + # when changing signature of this method, please update it in docs/framework_utils.rst as well + def __init__( + self, + text: str, + delims: Sequence[str] = ("\n",), + *, + priority: bool = False, + escape_mass_mentions: bool = True, + shorten_by: int = 8, + page_length: int = 2000, + ) -> None: + self._text = text + self._delims = delims + self._priority = priority + self._escape_mass_mentions = escape_mass_mentions + self._shorten_by = shorten_by + self._page_length = page_length - shorten_by + + self._start = 0 + self._end = len(text) + + def __repr__(self) -> str: + text = self._text + if len(text) > 20: + text = f"{text[:19]}\N{HORIZONTAL ELLIPSIS}" + return ( + "pagify(" + f"{text!r}," + f" {self._delims!r}," + f" priority={self._priority!r}," + f" escape_mass_mentions={self._escape_mass_mentions!r}," + f" shorten_by={self._shorten_by!r}," + f" page_length={self._page_length + self._shorten_by!r}" + ")" + ) + + def __length_hint__(self) -> int: + return math.ceil((self._end - self._start) / self._page_length) + + def __iter__(self) -> pagify: + return self + + def __next__(self) -> str: + text = self._text + escape_mass_mentions = self._escape_mass_mentions + page_length = self._page_length + start = self._start + end = self._end + + while (end - start) > page_length: + stop = start + page_length + if escape_mass_mentions: + stop -= text.count("@here", start, stop) + text.count("@everyone", start, stop) + closest_delim_it = (text.rfind(d, start + 1, stop) for d in self._delims) + if self._priority: + closest_delim = next((x for x in closest_delim_it if x > 0), -1) + else: + closest_delim = max(closest_delim_it) + stop = closest_delim if closest_delim != -1 else stop + if escape_mass_mentions: + to_send = escape(text[start:stop], mass_mentions=True) + else: + to_send = text[start:stop] + start = self._start = stop + if len(to_send.strip()) > 0: + return to_send + + if len(text[start:end].strip()) > 0: + self._start = end + if escape_mass_mentions: + return escape(text[start:end], mass_mentions=True) + else: + return text[start:end] + + raise StopIteration def strikethrough(text: str, escape_formatting: bool = True) -> str: diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 01c354f1c..6340357b4 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -1,5 +1,6 @@ import asyncio import pytest +import operator import random from redbot.core.utils import ( bounded_gather, @@ -7,6 +8,8 @@ from redbot.core.utils import ( deduplicate_iterables, common_filters, ) +from redbot.core.utils.chat_formatting import pagify +from typing import List def test_deduplicate_iterables(): @@ -137,3 +140,121 @@ async def test_bounded_gather_iter_cancel(): def test_normalize_smartquotes(): assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize" assert common_filters.normalize_smartquotes("Same String") == "Same String" + + +@pytest.mark.parametrize( + "text,pages,page_length", + ( + # base case + ( + "Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line", + [ + "Line 1\nA", + " longer line 2", + "\n'tis a", + " veeeeery long", + " line numero", + " tres\nand the", + " last line", + ], + 15, + ), + # mid-word split + ( + "Interdisciplinary collaboration improves the quality\nof care.", + ["Interdisciplinar", "y collaboration", " improves the", " quality\nof", " care."], + 16, + ), + # off-by-one errors + ("Lorem ipsum dolor sit amet.", ["Lorem", " ipsum", " dolor", " sit", " amet."], 6), + ( + "Lorem ipsum dolor sit amet.", + # TODO: "r" and " sit" can fit together but current logic doesn't support it properly + ["Lorem", " ipsu", "m", " dolo", "r", " sit", " amet", "."], + 5, + ), + ( + "Lorem ipsum dolor sit amet.", + ["Lore", "m", " ips", "um", " dol", "or", " sit", " ame", "t."], + 4, + ), + # mass mentions + ( + "@everyone listen to me!", + # TODO: off-by-one: " listen" and " to me!" should have been " listen to" and " me!" + ["@\u200beveryone", " listen", " to me!"], + 10, + ), + ( + "@everyone listen to me!", + ["@everyon", "e listen", " to me!"], + 9, + ), + ( + "@everyone listen to me!", + ["@everyon", "e", " listen", " to me!"], + 8, + ), + ("Is anyone @here?", ["Is anyone", " @\u200bhere?"], 10), + # whitespace-only page skipping (`\n` skipped) + ("Split:\n Long-word", ["Split:", " Long-", "word"], 6), + ), +) +def test_pagify(text: str, pages: List[str], page_length: int): + result = [] + for page in pagify(text, ("\n", " "), shorten_by=0, page_length=page_length): + # sanity check + assert len(page) <= page_length + result.append(page) + + assert pages == result + + +@pytest.mark.parametrize( + "text,pages,page_length", + ( + # base case + ( + "Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line", + [ + "Line 1", + "\nA longer line", + " 2", + "\n'tis a", + " veeeeery long", + " line numero", + " tres", + "\nand the last", + " line", + ], + 15, + ), + # mid-word split + ( + "Interdisciplinary collaboration improves the quality\nof care.", + ["Interdisciplinar", "y collaboration", " improves the", " quality", "\nof care."], + 16, + ), + ), +) +def test_pagify_priority(text: str, pages: List[str], page_length: int): + result = [] + for page in pagify(text, ("\n", " "), priority=True, shorten_by=0, page_length=page_length): + # sanity check + assert len(page) <= page_length + result.append(page) + + assert pages == result + + +def test_pagify_length_hint(): + it = pagify("A" * 100, shorten_by=0, page_length=10) + remaining = 100 // 10 + + assert operator.length_hint(it) == remaining + + for page in it: + remaining -= 1 + assert operator.length_hint(it) == remaining + + assert operator.length_hint(it) == 0