From 4f2c89c4a8ecc1c0eae966f98941d6ec4eadc294 Mon Sep 17 00:00:00 2001 From: orocane <53696294+S1RANN@users.noreply.github.com> Date: Wed, 15 Feb 2023 15:12:33 +0800 Subject: [PATCH] Add a function to extract contents of entities from messages --- telebot/util.py | 33 ++++++++++++++++++++++++++++++ tests/test_telebot.py | 47 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/telebot/util.py b/telebot/util.py index 4fa81a3..326ad74 100644 --- a/telebot/util.py +++ b/telebot/util.py @@ -274,6 +274,39 @@ def extract_arguments(text: str) -> str or None: result = regexp.match(text) return result.group(2) if is_command(text) else None +def extract_entity(text: str, e: types.MessageEntity) -> str: + """ + Returns the content of the entity. + + :param text: The text of the message the entity belongs to + :type text: :obj:`str` + + :param e: The entity to extract + :type e: :obj:`MessageEntity` + + :return: The content of the entity + :rtype: :obj:`str` + """ + offset = 0 + start = 0 + encoded_text = text.encode() + end = len(encoded_text) + i = 0 + + for byte in encoded_text: + if (byte & 0xc0) != 0x80: + if offset == e.offset: + start = i + elif offset - e.offset == e.length: + end = i + break + if byte >= 0xf0: + offset += 2 + else: + offset += 1 + i += 1 + + return encoded_text[start:end].decode() def split_string(text: str, chars_per_string: int) -> List[str]: """ diff --git a/tests/test_telebot.py b/tests/test_telebot.py index 06ee681..0caff9e 100644 --- a/tests/test_telebot.py +++ b/tests/test_telebot.py @@ -470,6 +470,53 @@ class TestTeleBot: for i in range(0,200): util.antiflood(tb.send_message, CHAT_ID, text) assert i == 199 + + def test_extract_entity(self): + entities_map = {"https://core.telegram.org/api/entities": "https://core.telegram.org/api/entities", + "https://github.com/eternnoir/pyTelegramBotAPI": "https://github.com/eternnoir/pyTelegramBotAPI", + "*粗 bold text体*": "粗 bold text体", + "_斜体 italic text_": "斜体 italic text", + "[谷歌](http://www.google.com/)": "谷歌", + '`std::cout<<"test"<() { + Ok(number @ 0..=2) => break number, + _ => { + println!("invalid input!"); + option = String::new(); + continue; + } + }; +};```''': '''let number = loop { + println!("Pick a pattern from 0-2:"); + stdin.read_line(&mut option).unwrap(); + match option.lines().next().unwrap().parse::() { + Ok(number @ 0..=2) => break number, + _ => { + println!("invalid input!"); + option = String::new(); + continue; + } + }; +};''', + "@username": "@username", + "#hashtag索引标签": "#hashtag索引标签", + "do-not-reply@telegram.org": "do-not-reply@telegram.org", + "+12125550123": "+12125550123"} + entites = list(entities_map.keys()) + contents = list(entities_map.values()) + contents.sort() + text = '\n'.join(entites) + + bot = telebot.TeleBot(TOKEN) + message = bot.send_message(CHAT_ID, text, parse_mode="Markdown") + extracted_contents = [util.extract_entity( + message.text, e) for e in message.entities] + extracted_contents.sort() + assert contents == extracted_contents @staticmethod def create_text_message(text):