mirror of
https://github.com/Cog-Creators/Red-DiscordBot.git
synced 2025-11-06 03:08:55 -05:00
Improve performance of pagify() (#5698)
Co-authored-by: jack1142 <6032823+jack1142@users.noreply.github.com>
This commit is contained in:
parent
79d11e947c
commit
533f036ed2
@ -27,6 +27,10 @@ Chat Formatting
|
||||
|
||||
.. automodule:: redbot.core.utils.chat_formatting
|
||||
:members:
|
||||
:exclude-members: pagify
|
||||
|
||||
.. autofunction:: pagify(text, delims=('\n',), *, priority=False, escape_mass_mentions=True, shorten_by=8, page_length=2000)
|
||||
:for:
|
||||
|
||||
Embed Helpers
|
||||
=============
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import itertools
|
||||
import math
|
||||
import textwrap
|
||||
from io import BytesIO
|
||||
from typing import Iterator, List, Optional, Sequence, SupportsInt, Union
|
||||
@ -200,17 +203,11 @@ def spoiler(text: str, escape_formatting: bool = True) -> str:
|
||||
return f"||{escape(text, formatting=escape_formatting)}||"
|
||||
|
||||
|
||||
def pagify(
|
||||
text: str,
|
||||
delims: Sequence[str] = ["\n"],
|
||||
*,
|
||||
priority: bool = False,
|
||||
escape_mass_mentions: bool = True,
|
||||
shorten_by: int = 8,
|
||||
page_length: int = 2000,
|
||||
) -> Iterator[str]:
|
||||
class pagify(Iterator[str]):
|
||||
"""Generate multiple pages from the given text.
|
||||
|
||||
The returned iterator supports length estimation with :func:`operator.length_hint()`.
|
||||
|
||||
Note
|
||||
----
|
||||
This does not respect code blocks or inline code.
|
||||
@ -244,33 +241,82 @@ def pagify(
|
||||
Pages of the given text.
|
||||
|
||||
"""
|
||||
in_text = text
|
||||
page_length -= shorten_by
|
||||
while len(in_text) > page_length:
|
||||
this_page_len = page_length
|
||||
if escape_mass_mentions:
|
||||
this_page_len -= in_text.count("@here", 0, page_length) + in_text.count(
|
||||
"@everyone", 0, page_length
|
||||
)
|
||||
closest_delim = (in_text.rfind(d, 1, this_page_len) for d in delims)
|
||||
if priority:
|
||||
closest_delim = next((x for x in closest_delim if x > 0), -1)
|
||||
else:
|
||||
closest_delim = max(closest_delim)
|
||||
closest_delim = closest_delim if closest_delim != -1 else this_page_len
|
||||
if escape_mass_mentions:
|
||||
to_send = escape(in_text[:closest_delim], mass_mentions=True)
|
||||
else:
|
||||
to_send = in_text[:closest_delim]
|
||||
if len(to_send.strip()) > 0:
|
||||
yield to_send
|
||||
in_text = in_text[closest_delim:]
|
||||
|
||||
if len(in_text.strip()) > 0:
|
||||
# when changing signature of this method, please update it in docs/framework_utils.rst as well
|
||||
def __init__(
|
||||
self,
|
||||
text: str,
|
||||
delims: Sequence[str] = ("\n",),
|
||||
*,
|
||||
priority: bool = False,
|
||||
escape_mass_mentions: bool = True,
|
||||
shorten_by: int = 8,
|
||||
page_length: int = 2000,
|
||||
) -> None:
|
||||
self._text = text
|
||||
self._delims = delims
|
||||
self._priority = priority
|
||||
self._escape_mass_mentions = escape_mass_mentions
|
||||
self._shorten_by = shorten_by
|
||||
self._page_length = page_length - shorten_by
|
||||
|
||||
self._start = 0
|
||||
self._end = len(text)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
text = self._text
|
||||
if len(text) > 20:
|
||||
text = f"{text[:19]}\N{HORIZONTAL ELLIPSIS}"
|
||||
return (
|
||||
"pagify("
|
||||
f"{text!r},"
|
||||
f" {self._delims!r},"
|
||||
f" priority={self._priority!r},"
|
||||
f" escape_mass_mentions={self._escape_mass_mentions!r},"
|
||||
f" shorten_by={self._shorten_by!r},"
|
||||
f" page_length={self._page_length + self._shorten_by!r}"
|
||||
")"
|
||||
)
|
||||
|
||||
def __length_hint__(self) -> int:
|
||||
return math.ceil((self._end - self._start) / self._page_length)
|
||||
|
||||
def __iter__(self) -> pagify:
|
||||
return self
|
||||
|
||||
def __next__(self) -> str:
|
||||
text = self._text
|
||||
escape_mass_mentions = self._escape_mass_mentions
|
||||
page_length = self._page_length
|
||||
start = self._start
|
||||
end = self._end
|
||||
|
||||
while (end - start) > page_length:
|
||||
stop = start + page_length
|
||||
if escape_mass_mentions:
|
||||
yield escape(in_text, mass_mentions=True)
|
||||
stop -= text.count("@here", start, stop) + text.count("@everyone", start, stop)
|
||||
closest_delim_it = (text.rfind(d, start + 1, stop) for d in self._delims)
|
||||
if self._priority:
|
||||
closest_delim = next((x for x in closest_delim_it if x > 0), -1)
|
||||
else:
|
||||
yield in_text
|
||||
closest_delim = max(closest_delim_it)
|
||||
stop = closest_delim if closest_delim != -1 else stop
|
||||
if escape_mass_mentions:
|
||||
to_send = escape(text[start:stop], mass_mentions=True)
|
||||
else:
|
||||
to_send = text[start:stop]
|
||||
start = self._start = stop
|
||||
if len(to_send.strip()) > 0:
|
||||
return to_send
|
||||
|
||||
if len(text[start:end].strip()) > 0:
|
||||
self._start = end
|
||||
if escape_mass_mentions:
|
||||
return escape(text[start:end], mass_mentions=True)
|
||||
else:
|
||||
return text[start:end]
|
||||
|
||||
raise StopIteration
|
||||
|
||||
|
||||
def strikethrough(text: str, escape_formatting: bool = True) -> str:
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import asyncio
|
||||
import pytest
|
||||
import operator
|
||||
import random
|
||||
from redbot.core.utils import (
|
||||
bounded_gather,
|
||||
@ -7,6 +8,8 @@ from redbot.core.utils import (
|
||||
deduplicate_iterables,
|
||||
common_filters,
|
||||
)
|
||||
from redbot.core.utils.chat_formatting import pagify
|
||||
from typing import List
|
||||
|
||||
|
||||
def test_deduplicate_iterables():
|
||||
@ -137,3 +140,121 @@ async def test_bounded_gather_iter_cancel():
|
||||
def test_normalize_smartquotes():
|
||||
assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize"
|
||||
assert common_filters.normalize_smartquotes("Same String") == "Same String"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,pages,page_length",
|
||||
(
|
||||
# base case
|
||||
(
|
||||
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
|
||||
[
|
||||
"Line 1\nA",
|
||||
" longer line 2",
|
||||
"\n'tis a",
|
||||
" veeeeery long",
|
||||
" line numero",
|
||||
" tres\nand the",
|
||||
" last line",
|
||||
],
|
||||
15,
|
||||
),
|
||||
# mid-word split
|
||||
(
|
||||
"Interdisciplinary collaboration improves the quality\nof care.",
|
||||
["Interdisciplinar", "y collaboration", " improves the", " quality\nof", " care."],
|
||||
16,
|
||||
),
|
||||
# off-by-one errors
|
||||
("Lorem ipsum dolor sit amet.", ["Lorem", " ipsum", " dolor", " sit", " amet."], 6),
|
||||
(
|
||||
"Lorem ipsum dolor sit amet.",
|
||||
# TODO: "r" and " sit" can fit together but current logic doesn't support it properly
|
||||
["Lorem", " ipsu", "m", " dolo", "r", " sit", " amet", "."],
|
||||
5,
|
||||
),
|
||||
(
|
||||
"Lorem ipsum dolor sit amet.",
|
||||
["Lore", "m", " ips", "um", " dol", "or", " sit", " ame", "t."],
|
||||
4,
|
||||
),
|
||||
# mass mentions
|
||||
(
|
||||
"@everyone listen to me!",
|
||||
# TODO: off-by-one: " listen" and " to me!" should have been " listen to" and " me!"
|
||||
["@\u200beveryone", " listen", " to me!"],
|
||||
10,
|
||||
),
|
||||
(
|
||||
"@everyone listen to me!",
|
||||
["@everyon", "e listen", " to me!"],
|
||||
9,
|
||||
),
|
||||
(
|
||||
"@everyone listen to me!",
|
||||
["@everyon", "e", " listen", " to me!"],
|
||||
8,
|
||||
),
|
||||
("Is anyone @here?", ["Is anyone", " @\u200bhere?"], 10),
|
||||
# whitespace-only page skipping (`\n` skipped)
|
||||
("Split:\n Long-word", ["Split:", " Long-", "word"], 6),
|
||||
),
|
||||
)
|
||||
def test_pagify(text: str, pages: List[str], page_length: int):
|
||||
result = []
|
||||
for page in pagify(text, ("\n", " "), shorten_by=0, page_length=page_length):
|
||||
# sanity check
|
||||
assert len(page) <= page_length
|
||||
result.append(page)
|
||||
|
||||
assert pages == result
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,pages,page_length",
|
||||
(
|
||||
# base case
|
||||
(
|
||||
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
|
||||
[
|
||||
"Line 1",
|
||||
"\nA longer line",
|
||||
" 2",
|
||||
"\n'tis a",
|
||||
" veeeeery long",
|
||||
" line numero",
|
||||
" tres",
|
||||
"\nand the last",
|
||||
" line",
|
||||
],
|
||||
15,
|
||||
),
|
||||
# mid-word split
|
||||
(
|
||||
"Interdisciplinary collaboration improves the quality\nof care.",
|
||||
["Interdisciplinar", "y collaboration", " improves the", " quality", "\nof care."],
|
||||
16,
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_pagify_priority(text: str, pages: List[str], page_length: int):
|
||||
result = []
|
||||
for page in pagify(text, ("\n", " "), priority=True, shorten_by=0, page_length=page_length):
|
||||
# sanity check
|
||||
assert len(page) <= page_length
|
||||
result.append(page)
|
||||
|
||||
assert pages == result
|
||||
|
||||
|
||||
def test_pagify_length_hint():
|
||||
it = pagify("A" * 100, shorten_by=0, page_length=10)
|
||||
remaining = 100 // 10
|
||||
|
||||
assert operator.length_hint(it) == remaining
|
||||
|
||||
for page in it:
|
||||
remaining -= 1
|
||||
assert operator.length_hint(it) == remaining
|
||||
|
||||
assert operator.length_hint(it) == 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user