Improve performance of pagify() (#5698)

Co-authored-by: jack1142 <6032823+jack1142@users.noreply.github.com>
This commit is contained in:
Jakub Kuczys 2023-04-13 20:52:54 +02:00 committed by GitHub
parent 79d11e947c
commit 533f036ed2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 206 additions and 35 deletions

View File

@ -27,6 +27,10 @@ Chat Formatting
.. automodule:: redbot.core.utils.chat_formatting .. automodule:: redbot.core.utils.chat_formatting
:members: :members:
:exclude-members: pagify
.. autofunction:: pagify(text, delims=('\n',), *, priority=False, escape_mass_mentions=True, shorten_by=8, page_length=2000)
:for:
Embed Helpers Embed Helpers
============= =============

View File

@ -1,5 +1,8 @@
from __future__ import annotations
import datetime import datetime
import itertools import itertools
import math
import textwrap import textwrap
from io import BytesIO from io import BytesIO
from typing import Iterator, List, Optional, Sequence, SupportsInt, Union from typing import Iterator, List, Optional, Sequence, SupportsInt, Union
@ -200,17 +203,11 @@ def spoiler(text: str, escape_formatting: bool = True) -> str:
return f"||{escape(text, formatting=escape_formatting)}||" return f"||{escape(text, formatting=escape_formatting)}||"
def pagify( class pagify(Iterator[str]):
text: str,
delims: Sequence[str] = ["\n"],
*,
priority: bool = False,
escape_mass_mentions: bool = True,
shorten_by: int = 8,
page_length: int = 2000,
) -> Iterator[str]:
"""Generate multiple pages from the given text. """Generate multiple pages from the given text.
The returned iterator supports length estimation with :func:`operator.length_hint()`.
Note Note
---- ----
This does not respect code blocks or inline code. This does not respect code blocks or inline code.
@ -244,33 +241,82 @@ def pagify(
Pages of the given text. Pages of the given text.
""" """
in_text = text
page_length -= shorten_by
while len(in_text) > page_length:
this_page_len = page_length
if escape_mass_mentions:
this_page_len -= in_text.count("@here", 0, page_length) + in_text.count(
"@everyone", 0, page_length
)
closest_delim = (in_text.rfind(d, 1, this_page_len) for d in delims)
if priority:
closest_delim = next((x for x in closest_delim if x > 0), -1)
else:
closest_delim = max(closest_delim)
closest_delim = closest_delim if closest_delim != -1 else this_page_len
if escape_mass_mentions:
to_send = escape(in_text[:closest_delim], mass_mentions=True)
else:
to_send = in_text[:closest_delim]
if len(to_send.strip()) > 0:
yield to_send
in_text = in_text[closest_delim:]
if len(in_text.strip()) > 0: # when changing signature of this method, please update it in docs/framework_utils.rst as well
def __init__(
self,
text: str,
delims: Sequence[str] = ("\n",),
*,
priority: bool = False,
escape_mass_mentions: bool = True,
shorten_by: int = 8,
page_length: int = 2000,
) -> None:
self._text = text
self._delims = delims
self._priority = priority
self._escape_mass_mentions = escape_mass_mentions
self._shorten_by = shorten_by
self._page_length = page_length - shorten_by
self._start = 0
self._end = len(text)
def __repr__(self) -> str:
text = self._text
if len(text) > 20:
text = f"{text[:19]}\N{HORIZONTAL ELLIPSIS}"
return (
"pagify("
f"{text!r},"
f" {self._delims!r},"
f" priority={self._priority!r},"
f" escape_mass_mentions={self._escape_mass_mentions!r},"
f" shorten_by={self._shorten_by!r},"
f" page_length={self._page_length + self._shorten_by!r}"
")"
)
def __length_hint__(self) -> int:
return math.ceil((self._end - self._start) / self._page_length)
def __iter__(self) -> pagify:
return self
def __next__(self) -> str:
text = self._text
escape_mass_mentions = self._escape_mass_mentions
page_length = self._page_length
start = self._start
end = self._end
while (end - start) > page_length:
stop = start + page_length
if escape_mass_mentions: if escape_mass_mentions:
yield escape(in_text, mass_mentions=True) stop -= text.count("@here", start, stop) + text.count("@everyone", start, stop)
closest_delim_it = (text.rfind(d, start + 1, stop) for d in self._delims)
if self._priority:
closest_delim = next((x for x in closest_delim_it if x > 0), -1)
else: else:
yield in_text closest_delim = max(closest_delim_it)
stop = closest_delim if closest_delim != -1 else stop
if escape_mass_mentions:
to_send = escape(text[start:stop], mass_mentions=True)
else:
to_send = text[start:stop]
start = self._start = stop
if len(to_send.strip()) > 0:
return to_send
if len(text[start:end].strip()) > 0:
self._start = end
if escape_mass_mentions:
return escape(text[start:end], mass_mentions=True)
else:
return text[start:end]
raise StopIteration
def strikethrough(text: str, escape_formatting: bool = True) -> str: def strikethrough(text: str, escape_formatting: bool = True) -> str:

View File

@ -1,5 +1,6 @@
import asyncio import asyncio
import pytest import pytest
import operator
import random import random
from redbot.core.utils import ( from redbot.core.utils import (
bounded_gather, bounded_gather,
@ -7,6 +8,8 @@ from redbot.core.utils import (
deduplicate_iterables, deduplicate_iterables,
common_filters, common_filters,
) )
from redbot.core.utils.chat_formatting import pagify
from typing import List
def test_deduplicate_iterables(): def test_deduplicate_iterables():
@ -137,3 +140,121 @@ async def test_bounded_gather_iter_cancel():
def test_normalize_smartquotes(): def test_normalize_smartquotes():
assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize" assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize"
assert common_filters.normalize_smartquotes("Same String") == "Same String" assert common_filters.normalize_smartquotes("Same String") == "Same String"
@pytest.mark.parametrize(
"text,pages,page_length",
(
# base case
(
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
[
"Line 1\nA",
" longer line 2",
"\n'tis a",
" veeeeery long",
" line numero",
" tres\nand the",
" last line",
],
15,
),
# mid-word split
(
"Interdisciplinary collaboration improves the quality\nof care.",
["Interdisciplinar", "y collaboration", " improves the", " quality\nof", " care."],
16,
),
# off-by-one errors
("Lorem ipsum dolor sit amet.", ["Lorem", " ipsum", " dolor", " sit", " amet."], 6),
(
"Lorem ipsum dolor sit amet.",
# TODO: "r" and " sit" can fit together but current logic doesn't support it properly
["Lorem", " ipsu", "m", " dolo", "r", " sit", " amet", "."],
5,
),
(
"Lorem ipsum dolor sit amet.",
["Lore", "m", " ips", "um", " dol", "or", " sit", " ame", "t."],
4,
),
# mass mentions
(
"@everyone listen to me!",
# TODO: off-by-one: " listen" and " to me!" should have been " listen to" and " me!"
["@\u200beveryone", " listen", " to me!"],
10,
),
(
"@everyone listen to me!",
["@everyon", "e listen", " to me!"],
9,
),
(
"@everyone listen to me!",
["@everyon", "e", " listen", " to me!"],
8,
),
("Is anyone @here?", ["Is anyone", " @\u200bhere?"], 10),
# whitespace-only page skipping (`\n` skipped)
("Split:\n Long-word", ["Split:", " Long-", "word"], 6),
),
)
def test_pagify(text: str, pages: List[str], page_length: int):
result = []
for page in pagify(text, ("\n", " "), shorten_by=0, page_length=page_length):
# sanity check
assert len(page) <= page_length
result.append(page)
assert pages == result
@pytest.mark.parametrize(
"text,pages,page_length",
(
# base case
(
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
[
"Line 1",
"\nA longer line",
" 2",
"\n'tis a",
" veeeeery long",
" line numero",
" tres",
"\nand the last",
" line",
],
15,
),
# mid-word split
(
"Interdisciplinary collaboration improves the quality\nof care.",
["Interdisciplinar", "y collaboration", " improves the", " quality", "\nof care."],
16,
),
),
)
def test_pagify_priority(text: str, pages: List[str], page_length: int):
result = []
for page in pagify(text, ("\n", " "), priority=True, shorten_by=0, page_length=page_length):
# sanity check
assert len(page) <= page_length
result.append(page)
assert pages == result
def test_pagify_length_hint():
it = pagify("A" * 100, shorten_by=0, page_length=10)
remaining = 100 // 10
assert operator.length_hint(it) == remaining
for page in it:
remaining -= 1
assert operator.length_hint(it) == remaining
assert operator.length_hint(it) == 0