mirror of
https://github.com/Cog-Creators/Red-DiscordBot.git
synced 2025-11-06 03:08:55 -05:00
Improve performance of pagify() (#5698)
Co-authored-by: jack1142 <6032823+jack1142@users.noreply.github.com>
This commit is contained in:
parent
79d11e947c
commit
533f036ed2
@ -27,6 +27,10 @@ Chat Formatting
|
|||||||
|
|
||||||
.. automodule:: redbot.core.utils.chat_formatting
|
.. automodule:: redbot.core.utils.chat_formatting
|
||||||
:members:
|
:members:
|
||||||
|
:exclude-members: pagify
|
||||||
|
|
||||||
|
.. autofunction:: pagify(text, delims=('\n',), *, priority=False, escape_mass_mentions=True, shorten_by=8, page_length=2000)
|
||||||
|
:for:
|
||||||
|
|
||||||
Embed Helpers
|
Embed Helpers
|
||||||
=============
|
=============
|
||||||
|
|||||||
@ -1,5 +1,8 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import itertools
|
import itertools
|
||||||
|
import math
|
||||||
import textwrap
|
import textwrap
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Iterator, List, Optional, Sequence, SupportsInt, Union
|
from typing import Iterator, List, Optional, Sequence, SupportsInt, Union
|
||||||
@ -200,17 +203,11 @@ def spoiler(text: str, escape_formatting: bool = True) -> str:
|
|||||||
return f"||{escape(text, formatting=escape_formatting)}||"
|
return f"||{escape(text, formatting=escape_formatting)}||"
|
||||||
|
|
||||||
|
|
||||||
def pagify(
|
class pagify(Iterator[str]):
|
||||||
text: str,
|
|
||||||
delims: Sequence[str] = ["\n"],
|
|
||||||
*,
|
|
||||||
priority: bool = False,
|
|
||||||
escape_mass_mentions: bool = True,
|
|
||||||
shorten_by: int = 8,
|
|
||||||
page_length: int = 2000,
|
|
||||||
) -> Iterator[str]:
|
|
||||||
"""Generate multiple pages from the given text.
|
"""Generate multiple pages from the given text.
|
||||||
|
|
||||||
|
The returned iterator supports length estimation with :func:`operator.length_hint()`.
|
||||||
|
|
||||||
Note
|
Note
|
||||||
----
|
----
|
||||||
This does not respect code blocks or inline code.
|
This does not respect code blocks or inline code.
|
||||||
@ -244,33 +241,82 @@ def pagify(
|
|||||||
Pages of the given text.
|
Pages of the given text.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
in_text = text
|
|
||||||
page_length -= shorten_by
|
|
||||||
while len(in_text) > page_length:
|
|
||||||
this_page_len = page_length
|
|
||||||
if escape_mass_mentions:
|
|
||||||
this_page_len -= in_text.count("@here", 0, page_length) + in_text.count(
|
|
||||||
"@everyone", 0, page_length
|
|
||||||
)
|
|
||||||
closest_delim = (in_text.rfind(d, 1, this_page_len) for d in delims)
|
|
||||||
if priority:
|
|
||||||
closest_delim = next((x for x in closest_delim if x > 0), -1)
|
|
||||||
else:
|
|
||||||
closest_delim = max(closest_delim)
|
|
||||||
closest_delim = closest_delim if closest_delim != -1 else this_page_len
|
|
||||||
if escape_mass_mentions:
|
|
||||||
to_send = escape(in_text[:closest_delim], mass_mentions=True)
|
|
||||||
else:
|
|
||||||
to_send = in_text[:closest_delim]
|
|
||||||
if len(to_send.strip()) > 0:
|
|
||||||
yield to_send
|
|
||||||
in_text = in_text[closest_delim:]
|
|
||||||
|
|
||||||
if len(in_text.strip()) > 0:
|
# when changing signature of this method, please update it in docs/framework_utils.rst as well
|
||||||
if escape_mass_mentions:
|
def __init__(
|
||||||
yield escape(in_text, mass_mentions=True)
|
self,
|
||||||
else:
|
text: str,
|
||||||
yield in_text
|
delims: Sequence[str] = ("\n",),
|
||||||
|
*,
|
||||||
|
priority: bool = False,
|
||||||
|
escape_mass_mentions: bool = True,
|
||||||
|
shorten_by: int = 8,
|
||||||
|
page_length: int = 2000,
|
||||||
|
) -> None:
|
||||||
|
self._text = text
|
||||||
|
self._delims = delims
|
||||||
|
self._priority = priority
|
||||||
|
self._escape_mass_mentions = escape_mass_mentions
|
||||||
|
self._shorten_by = shorten_by
|
||||||
|
self._page_length = page_length - shorten_by
|
||||||
|
|
||||||
|
self._start = 0
|
||||||
|
self._end = len(text)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
text = self._text
|
||||||
|
if len(text) > 20:
|
||||||
|
text = f"{text[:19]}\N{HORIZONTAL ELLIPSIS}"
|
||||||
|
return (
|
||||||
|
"pagify("
|
||||||
|
f"{text!r},"
|
||||||
|
f" {self._delims!r},"
|
||||||
|
f" priority={self._priority!r},"
|
||||||
|
f" escape_mass_mentions={self._escape_mass_mentions!r},"
|
||||||
|
f" shorten_by={self._shorten_by!r},"
|
||||||
|
f" page_length={self._page_length + self._shorten_by!r}"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __length_hint__(self) -> int:
|
||||||
|
return math.ceil((self._end - self._start) / self._page_length)
|
||||||
|
|
||||||
|
def __iter__(self) -> pagify:
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self) -> str:
|
||||||
|
text = self._text
|
||||||
|
escape_mass_mentions = self._escape_mass_mentions
|
||||||
|
page_length = self._page_length
|
||||||
|
start = self._start
|
||||||
|
end = self._end
|
||||||
|
|
||||||
|
while (end - start) > page_length:
|
||||||
|
stop = start + page_length
|
||||||
|
if escape_mass_mentions:
|
||||||
|
stop -= text.count("@here", start, stop) + text.count("@everyone", start, stop)
|
||||||
|
closest_delim_it = (text.rfind(d, start + 1, stop) for d in self._delims)
|
||||||
|
if self._priority:
|
||||||
|
closest_delim = next((x for x in closest_delim_it if x > 0), -1)
|
||||||
|
else:
|
||||||
|
closest_delim = max(closest_delim_it)
|
||||||
|
stop = closest_delim if closest_delim != -1 else stop
|
||||||
|
if escape_mass_mentions:
|
||||||
|
to_send = escape(text[start:stop], mass_mentions=True)
|
||||||
|
else:
|
||||||
|
to_send = text[start:stop]
|
||||||
|
start = self._start = stop
|
||||||
|
if len(to_send.strip()) > 0:
|
||||||
|
return to_send
|
||||||
|
|
||||||
|
if len(text[start:end].strip()) > 0:
|
||||||
|
self._start = end
|
||||||
|
if escape_mass_mentions:
|
||||||
|
return escape(text[start:end], mass_mentions=True)
|
||||||
|
else:
|
||||||
|
return text[start:end]
|
||||||
|
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
|
||||||
def strikethrough(text: str, escape_formatting: bool = True) -> str:
|
def strikethrough(text: str, escape_formatting: bool = True) -> str:
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import pytest
|
import pytest
|
||||||
|
import operator
|
||||||
import random
|
import random
|
||||||
from redbot.core.utils import (
|
from redbot.core.utils import (
|
||||||
bounded_gather,
|
bounded_gather,
|
||||||
@ -7,6 +8,8 @@ from redbot.core.utils import (
|
|||||||
deduplicate_iterables,
|
deduplicate_iterables,
|
||||||
common_filters,
|
common_filters,
|
||||||
)
|
)
|
||||||
|
from redbot.core.utils.chat_formatting import pagify
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
def test_deduplicate_iterables():
|
def test_deduplicate_iterables():
|
||||||
@ -137,3 +140,121 @@ async def test_bounded_gather_iter_cancel():
|
|||||||
def test_normalize_smartquotes():
|
def test_normalize_smartquotes():
|
||||||
assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize"
|
assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize"
|
||||||
assert common_filters.normalize_smartquotes("Same String") == "Same String"
|
assert common_filters.normalize_smartquotes("Same String") == "Same String"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text,pages,page_length",
|
||||||
|
(
|
||||||
|
# base case
|
||||||
|
(
|
||||||
|
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
|
||||||
|
[
|
||||||
|
"Line 1\nA",
|
||||||
|
" longer line 2",
|
||||||
|
"\n'tis a",
|
||||||
|
" veeeeery long",
|
||||||
|
" line numero",
|
||||||
|
" tres\nand the",
|
||||||
|
" last line",
|
||||||
|
],
|
||||||
|
15,
|
||||||
|
),
|
||||||
|
# mid-word split
|
||||||
|
(
|
||||||
|
"Interdisciplinary collaboration improves the quality\nof care.",
|
||||||
|
["Interdisciplinar", "y collaboration", " improves the", " quality\nof", " care."],
|
||||||
|
16,
|
||||||
|
),
|
||||||
|
# off-by-one errors
|
||||||
|
("Lorem ipsum dolor sit amet.", ["Lorem", " ipsum", " dolor", " sit", " amet."], 6),
|
||||||
|
(
|
||||||
|
"Lorem ipsum dolor sit amet.",
|
||||||
|
# TODO: "r" and " sit" can fit together but current logic doesn't support it properly
|
||||||
|
["Lorem", " ipsu", "m", " dolo", "r", " sit", " amet", "."],
|
||||||
|
5,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Lorem ipsum dolor sit amet.",
|
||||||
|
["Lore", "m", " ips", "um", " dol", "or", " sit", " ame", "t."],
|
||||||
|
4,
|
||||||
|
),
|
||||||
|
# mass mentions
|
||||||
|
(
|
||||||
|
"@everyone listen to me!",
|
||||||
|
# TODO: off-by-one: " listen" and " to me!" should have been " listen to" and " me!"
|
||||||
|
["@\u200beveryone", " listen", " to me!"],
|
||||||
|
10,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"@everyone listen to me!",
|
||||||
|
["@everyon", "e listen", " to me!"],
|
||||||
|
9,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"@everyone listen to me!",
|
||||||
|
["@everyon", "e", " listen", " to me!"],
|
||||||
|
8,
|
||||||
|
),
|
||||||
|
("Is anyone @here?", ["Is anyone", " @\u200bhere?"], 10),
|
||||||
|
# whitespace-only page skipping (`\n` skipped)
|
||||||
|
("Split:\n Long-word", ["Split:", " Long-", "word"], 6),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_pagify(text: str, pages: List[str], page_length: int):
|
||||||
|
result = []
|
||||||
|
for page in pagify(text, ("\n", " "), shorten_by=0, page_length=page_length):
|
||||||
|
# sanity check
|
||||||
|
assert len(page) <= page_length
|
||||||
|
result.append(page)
|
||||||
|
|
||||||
|
assert pages == result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text,pages,page_length",
|
||||||
|
(
|
||||||
|
# base case
|
||||||
|
(
|
||||||
|
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
|
||||||
|
[
|
||||||
|
"Line 1",
|
||||||
|
"\nA longer line",
|
||||||
|
" 2",
|
||||||
|
"\n'tis a",
|
||||||
|
" veeeeery long",
|
||||||
|
" line numero",
|
||||||
|
" tres",
|
||||||
|
"\nand the last",
|
||||||
|
" line",
|
||||||
|
],
|
||||||
|
15,
|
||||||
|
),
|
||||||
|
# mid-word split
|
||||||
|
(
|
||||||
|
"Interdisciplinary collaboration improves the quality\nof care.",
|
||||||
|
["Interdisciplinar", "y collaboration", " improves the", " quality", "\nof care."],
|
||||||
|
16,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_pagify_priority(text: str, pages: List[str], page_length: int):
|
||||||
|
result = []
|
||||||
|
for page in pagify(text, ("\n", " "), priority=True, shorten_by=0, page_length=page_length):
|
||||||
|
# sanity check
|
||||||
|
assert len(page) <= page_length
|
||||||
|
result.append(page)
|
||||||
|
|
||||||
|
assert pages == result
|
||||||
|
|
||||||
|
|
||||||
|
def test_pagify_length_hint():
|
||||||
|
it = pagify("A" * 100, shorten_by=0, page_length=10)
|
||||||
|
remaining = 100 // 10
|
||||||
|
|
||||||
|
assert operator.length_hint(it) == remaining
|
||||||
|
|
||||||
|
for page in it:
|
||||||
|
remaining -= 1
|
||||||
|
assert operator.length_hint(it) == remaining
|
||||||
|
|
||||||
|
assert operator.length_hint(it) == 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user