strip unicode bidi control characters in cleanup_code (#6649)

This commit is contained in:
cswimr
2026-02-02 11:48:57 -06:00
committed by GitHub
parent fe574189d6
commit 01a3c17f8e

View File

@@ -42,6 +42,18 @@ _ = Translator("Dev", __file__)
# - or "```" and potentially also strip a single "\n" if it follows it immediately # - or "```" and potentially also strip a single "\n" if it follows it immediately
START_CODE_BLOCK_RE = re.compile(r"^((```[\w.+\-]+\n+(?!```))|(```\n*))") START_CODE_BLOCK_RE = re.compile(r"^((```[\w.+\-]+\n+(?!```))|(```\n*))")
REMOVE_CONTROL_CHARS = [
"\u2066",
"\u2067",
"\u2068",
"\u202A",
"\u202B",
"\u202D",
"\u202E",
"\u2069",
"\u202C",
]
T = TypeVar("T") T = TypeVar("T")
@@ -75,6 +87,8 @@ async def maybe_await(coro: Union[T, Awaitable[T], Awaitable[Awaitable[T]]]) ->
def cleanup_code(content: str) -> str: def cleanup_code(content: str) -> str:
"""Automatically removes code blocks from the code.""" """Automatically removes code blocks from the code."""
content = content.strip("".join(REMOVE_CONTROL_CHARS))
# remove ```py\n``` # remove ```py\n```
if content.startswith("```") and content.endswith("```"): if content.startswith("```") and content.endswith("```"):
return START_CODE_BLOCK_RE.sub("", content)[:-3].rstrip("\n") return START_CODE_BLOCK_RE.sub("", content)[:-3].rstrip("\n")