Update text decorations. Fix 'mention' generator and emoji offsets.

This commit is contained in:
Alex Root Junior 2020-05-02 01:56:01 +03:00
parent e9fef19129
commit a64359a6de
4 changed files with 165 additions and 109 deletions

View file

@ -1,22 +1,5 @@
from .text_decorations import html_decoration, markdown_decoration
LIST_MD_SYMBOLS = "*_`["
MD_SYMBOLS = (
(LIST_MD_SYMBOLS[0], LIST_MD_SYMBOLS[0]),
(LIST_MD_SYMBOLS[1], LIST_MD_SYMBOLS[1]),
(LIST_MD_SYMBOLS[2], LIST_MD_SYMBOLS[2]),
(LIST_MD_SYMBOLS[2] * 3 + "\n", "\n" + LIST_MD_SYMBOLS[2] * 3),
("<b>", "</b>"),
("<i>", "</i>"),
("<code>", "</code>"),
("<pre>", "</pre>"),
)
HTML_QUOTES_MAP = {"<": "&lt;", ">": "&gt;", "&": "&amp;", '"': "&quot;"}
_HQS = HTML_QUOTES_MAP.keys() # HQS for HTML QUOTES SYMBOLS
def _join(*content, sep=" "):
return sep.join(map(str, content))
@ -41,7 +24,7 @@ def bold(*content, sep=" "):
:param sep:
:return:
"""
return markdown_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep)))
return markdown_decoration.bold(value=html_decoration.quote(_join(*content, sep=sep)))
def hbold(*content, sep=" "):
@ -52,7 +35,7 @@ def hbold(*content, sep=" "):
:param sep:
:return:
"""
return html_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep)))
return html_decoration.bold(value=html_decoration.quote(_join(*content, sep=sep)))
def italic(*content, sep=" "):
@ -63,7 +46,7 @@ def italic(*content, sep=" "):
:param sep:
:return:
"""
return markdown_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep)))
return markdown_decoration.italic(value=html_decoration.quote(_join(*content, sep=sep)))
def hitalic(*content, sep=" "):
@ -74,7 +57,7 @@ def hitalic(*content, sep=" "):
:param sep:
:return:
"""
return html_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep)))
return html_decoration.italic(value=html_decoration.quote(_join(*content, sep=sep)))
def code(*content, sep=" "):
@ -85,7 +68,7 @@ def code(*content, sep=" "):
:param sep:
:return:
"""
return markdown_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep)))
return markdown_decoration.code(value=html_decoration.quote(_join(*content, sep=sep)))
def hcode(*content, sep=" "):
@ -96,7 +79,7 @@ def hcode(*content, sep=" "):
:param sep:
:return:
"""
return html_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep)))
return html_decoration.code(value=html_decoration.quote(_join(*content, sep=sep)))
def pre(*content, sep="\n"):
@ -107,7 +90,7 @@ def pre(*content, sep="\n"):
:param sep:
:return:
"""
return markdown_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep)))
return markdown_decoration.pre(value=html_decoration.quote(_join(*content, sep=sep)))
def hpre(*content, sep="\n"):
@ -118,7 +101,7 @@ def hpre(*content, sep="\n"):
:param sep:
:return:
"""
return html_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep)))
return html_decoration.pre(value=html_decoration.quote(_join(*content, sep=sep)))
def underline(*content, sep=" "):
@ -129,9 +112,7 @@ def underline(*content, sep=" "):
:param sep:
:return:
"""
return markdown_decoration.underline.format(
value=markdown_decoration.quote(_join(*content, sep=sep))
)
return markdown_decoration.underline(value=markdown_decoration.quote(_join(*content, sep=sep)))
def hunderline(*content, sep=" "):
@ -142,7 +123,7 @@ def hunderline(*content, sep=" "):
:param sep:
:return:
"""
return html_decoration.underline.format(value=html_decoration.quote(_join(*content, sep=sep)))
return html_decoration.underline(value=html_decoration.quote(_join(*content, sep=sep)))
def strikethrough(*content, sep=" "):
@ -153,7 +134,7 @@ def strikethrough(*content, sep=" "):
:param sep:
:return:
"""
return markdown_decoration.strikethrough.format(
return markdown_decoration.strikethrough(
value=markdown_decoration.quote(_join(*content, sep=sep))
)
@ -166,9 +147,7 @@ def hstrikethrough(*content, sep=" "):
:param sep:
:return:
"""
return html_decoration.strikethrough.format(
value=html_decoration.quote(_join(*content, sep=sep))
)
return html_decoration.strikethrough(value=html_decoration.quote(_join(*content, sep=sep)))
def link(title: str, url: str) -> str:
@ -179,7 +158,7 @@ def link(title: str, url: str) -> str:
:param url:
:return:
"""
return markdown_decoration.link.format(value=html_decoration.quote(title), link=url)
return markdown_decoration.link(value=html_decoration.quote(title), link=url)
def hlink(title: str, url: str) -> str:
@ -190,7 +169,7 @@ def hlink(title: str, url: str) -> str:
:param url:
:return:
"""
return html_decoration.link.format(value=html_decoration.quote(title), link=url)
return html_decoration.link(value=html_decoration.quote(title), link=url)
def hide_link(url: str) -> str:

View file

@ -1,32 +1,23 @@
from __future__ import annotations
import html
import re
import struct
from dataclasses import dataclass
from typing import AnyStr, Callable, Generator, Iterable, List, Optional
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Generator, List, Optional, Pattern, cast
from aiogram.api.types import MessageEntity
if TYPE_CHECKING: # pragma: no cover
from aiogram.api.types import MessageEntity
__all__ = (
"TextDecoration",
"HtmlDecoration",
"MarkdownDecoration",
"html_decoration",
"markdown_decoration",
"add_surrogate",
"remove_surrogate",
)
@dataclass
class TextDecoration:
link: str
bold: str
italic: str
code: str
pre: str
pre_language: str
underline: str
strikethrough: str
quote: Callable[[AnyStr], AnyStr]
class TextDecoration(ABC):
def apply_entity(self, entity: MessageEntity, text: str) -> str:
"""
Apply single entity to text
@ -36,20 +27,27 @@ class TextDecoration:
:return:
"""
if entity.type in ("bold", "italic", "code", "underline", "strikethrough"):
return getattr(self, entity.type).format(value=text)
return cast(str, getattr(self, entity.type)(value=text))
if entity.type == "pre":
return (self.pre_language if entity.language else self.pre).format(
value=text, language=entity.language
return (
self.pre_language(value=text, language=entity.language)
if entity.language
else self.pre(value=text)
)
elif entity.type == "text_mention":
return self.link.format(value=text, link=f"tg://user?id={entity.user.id}")
from aiogram.api.types import User
user = cast(User, entity.user)
return self.link(value=text, link=f"tg://user?id={user.id}")
elif entity.type == "mention":
return text
elif entity.type == "text_link":
return self.link.format(value=text, link=entity.url)
return self.link(value=text, link=cast(str, entity.url))
elif entity.type == "url":
return text
return self.quote(text)
def unparse(self, text, entities: Optional[List[MessageEntity]] = None) -> str:
def unparse(self, text: str, entities: Optional[List[MessageEntity]] = None) -> str:
"""
Unparse message entities
@ -57,22 +55,22 @@ class TextDecoration:
:param entities: Array of MessageEntities
:return:
"""
text = add_surrogate(text)
result = "".join(
self._unparse_entities(
text, sorted(entities, key=lambda item: item.offset) if entities else []
)
)
return remove_surrogate(result)
return result
def _unparse_entities(
self,
text: str,
entities: Iterable[MessageEntity],
entities: List[MessageEntity],
offset: Optional[int] = None,
length: Optional[int] = None,
) -> Generator[str, None, None]:
offset = offset or 0
if offset is None:
offset = 0
length = length or len(text)
for index, entity in enumerate(entities):
@ -83,7 +81,7 @@ class TextDecoration:
start = entity.offset
offset = entity.offset + entity.length
sub_entities = list(filter(lambda e: e.offset < offset, entities[index + 1 :]))
sub_entities = list(filter(lambda e: e.offset < (offset or 0), entities[index + 1 :]))
yield self.apply_entity(
entity,
"".join(self._unparse_entities(text, sub_entities, offset=start, length=offset)),
@ -92,42 +90,102 @@ class TextDecoration:
if offset < length:
yield self.quote(text[offset:length])
@abstractmethod
def link(self, value: str, link: str) -> str: # pragma: no cover
pass
html_decoration = TextDecoration(
link='<a href="{link}">{value}</a>',
bold="<b>{value}</b>",
italic="<i>{value}</i>",
code="<code>{value}</code>",
pre="<pre>{value}</pre>",
pre_language='<pre><code class="language-{language}">{value}</code></pre>',
underline="<u>{value}</u>",
strikethrough="<s>{value}</s>",
quote=html.escape,
)
@abstractmethod
def bold(self, value: str) -> str: # pragma: no cover
pass
MARKDOWN_QUOTE_PATTERN = re.compile(r"([_*\[\]()~`>#+\-|{}.!])")
@abstractmethod
def italic(self, value: str) -> str: # pragma: no cover
pass
markdown_decoration = TextDecoration(
link="[{value}]({link})",
bold="*{value}*",
italic="_{value}_\r",
code="`{value}`",
pre="```{value}```",
pre_language="```{language}\n{value}\n```",
underline="__{value}__",
strikethrough="~{value}~",
quote=lambda text: re.sub(pattern=MARKDOWN_QUOTE_PATTERN, repl=r"\\\1", string=text),
)
@abstractmethod
def code(self, value: str) -> str: # pragma: no cover
pass
@abstractmethod
def pre(self, value: str) -> str: # pragma: no cover
pass
@abstractmethod
def pre_language(self, value: str, language: str) -> str: # pragma: no cover
pass
@abstractmethod
def underline(self, value: str) -> str: # pragma: no cover
pass
@abstractmethod
def strikethrough(self, value: str) -> str: # pragma: no cover
pass
@abstractmethod
def quote(self, value: str) -> str: # pragma: no cover
pass
def add_surrogate(text: str) -> str:
return "".join(
"".join(chr(d) for d in struct.unpack("<HH", s.encode("utf-16-le")))
if (0x10000 <= ord(s) <= 0x10FFFF)
else s
for s in text
)
class HtmlDecoration(TextDecoration):
def link(self, value: str, link: str) -> str:
return f'<a href="{link}">{value}</a>'
def bold(self, value: str) -> str:
return f"<b>{value}</b>"
def italic(self, value: str) -> str:
return f"<i>{value}</i>"
def code(self, value: str) -> str:
return f"<code>{value}</code>"
def pre(self, value: str) -> str:
return f"<pre>{value}</pre>"
def pre_language(self, value: str, language: str) -> str:
return f'<pre><code class="language-{language}">{value}</code></pre>'
def underline(self, value: str) -> str:
return f"<u>{value}</u>"
def strikethrough(self, value: str) -> str:
return f"<s>{value}</s>"
def quote(self, value: str) -> str:
return html.escape(value)
def remove_surrogate(text: str) -> str:
return text.encode("utf-16", "surrogatepass").decode("utf-16")
class MarkdownDecoration(TextDecoration):
MARKDOWN_QUOTE_PATTERN: Pattern[str] = re.compile(r"([_*\[\]()~`>#+\-|{}.!])")
def link(self, value: str, link: str) -> str:
return f"[{value}]({link})"
def bold(self, value: str) -> str:
return f"*{value}*"
def italic(self, value: str) -> str:
return f"_{value}_\r"
def code(self, value: str) -> str:
return f"`{value}`"
def pre(self, value: str) -> str:
return f"```{value}```"
def pre_language(self, value: str, language: str) -> str:
return f"```{language}\n{value}\n```"
def underline(self, value: str) -> str:
return f"__{value}__"
def strikethrough(self, value: str) -> str:
return f"~{value}~"
def quote(self, value: str) -> str:
return re.sub(pattern=self.MARKDOWN_QUOTE_PATTERN, repl=r"\\\1", string=value)
html_decoration = HtmlDecoration()
markdown_decoration = MarkdownDecoration()

14
poetry.lock generated
View file

@ -85,7 +85,7 @@ marker = "python_version >= \"3.5\" and sys_platform == \"win32\" or sys_platfor
name = "atomicwrites"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "1.3.0"
version = "1.4.0"
[[package]]
category = "main"
@ -257,7 +257,7 @@ description = "IPython: Productive Interactive Computing"
name = "ipython"
optional = false
python-versions = ">=3.6"
version = "7.13.0"
version = "7.14.0"
[package.dependencies]
appnope = "*"
@ -273,7 +273,7 @@ setuptools = ">=18.5"
traitlets = ">=4.2"
[package.extras]
all = ["numpy (>=1.14)", "testpath", "notebook", "nose (>=0.10.1)", "nbconvert", "requests", "ipywidgets", "qtconsole", "ipyparallel", "Sphinx (>=1.3)", "pygments", "nbformat", "ipykernel"]
all = ["nose (>=0.10.1)", "Sphinx (>=1.3)", "testpath", "nbformat", "ipywidgets", "qtconsole", "numpy (>=1.14)", "notebook", "ipyparallel", "ipykernel", "pygments", "requests", "nbconvert"]
doc = ["Sphinx (>=1.3)"]
kernel = ["ipykernel"]
nbconvert = ["nbconvert"]
@ -979,8 +979,8 @@ asynctest = [
{file = "asynctest-0.13.0.tar.gz", hash = "sha256:c27862842d15d83e6a34eb0b2866c323880eb3a75e4485b079ea11748fd77fac"},
]
atomicwrites = [
{file = "atomicwrites-1.3.0-py2.py3-none-any.whl", hash = "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4"},
{file = "atomicwrites-1.3.0.tar.gz", hash = "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"},
{file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
{file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
]
attrs = [
{file = "attrs-19.3.0-py2.py3-none-any.whl", hash = "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c"},
@ -1071,8 +1071,8 @@ importlib-metadata = [
{file = "importlib_metadata-1.6.0.tar.gz", hash = "sha256:34513a8a0c4962bc66d35b359558fd8a5e10cd472d37aec5f66858addef32c1e"},
]
ipython = [
{file = "ipython-7.13.0-py3-none-any.whl", hash = "sha256:eb8d075de37f678424527b5ef6ea23f7b80240ca031c2dd6de5879d687a65333"},
{file = "ipython-7.13.0.tar.gz", hash = "sha256:ca478e52ae1f88da0102360e57e528b92f3ae4316aabac80a2cd7f7ab2efb48a"},
{file = "ipython-7.14.0-py3-none-any.whl", hash = "sha256:5b241b84bbf0eb085d43ae9d46adf38a13b45929ca7774a740990c2c242534bb"},
{file = "ipython-7.14.0.tar.gz", hash = "sha256:f0126781d0f959da852fb3089e170ed807388e986a8dd4e6ac44855845b0fb1c"},
]
ipython-genutils = [
{file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"},

View file

@ -180,12 +180,31 @@ class TestTextDecoration:
],
"<s>strike<b>bold<u>under</u></b></s>",
],
# [
# html,
# "test te👍🏿st test",
# [MessageEntity(type="bold", offset=5, length=6, url=None, user=None),],
# "test <b>te👍🏿st</b> test",
# ],
[
html_decoration,
"@username",
[
MessageEntity(
type="mention", offset=0, length=9, url=None, user=None, language=None
),
MessageEntity(
type="bold", offset=0, length=9, url=None, user=None, language=None
),
],
"<b>@username</b>",
],
[
html_decoration,
"test te👍🏿st test",
[MessageEntity(type="bold", offset=5, length=6, url=None, user=None)],
"test <b>te👍🏿st</b> test",
],
[
html_decoration,
"👋🏾 Hi!",
[MessageEntity(type="bold", offset=0, length=8, url=None, user=None)],
"<b>👋🏾 Hi!</b>",
],
],
)
def test_unparse(