Add a function to extract contents of entities from messages

This commit is contained in:
orocane 2023-02-15 15:12:33 +08:00
parent fb7d60f09d
commit 4f2c89c4a8
2 changed files with 80 additions and 0 deletions

View File

@ -274,6 +274,39 @@ def extract_arguments(text: str) -> str or None:
result = regexp.match(text)
return result.group(2) if is_command(text) else None
def extract_entity(text: str, e: types.MessageEntity) -> str:
"""
Returns the content of the entity.
:param text: The text of the message the entity belongs to
:type text: :obj:`str`
:param e: The entity to extract
:type e: :obj:`MessageEntity`
:return: The content of the entity
:rtype: :obj:`str`
"""
offset = 0
start = 0
encoded_text = text.encode()
end = len(encoded_text)
i = 0
for byte in encoded_text:
if (byte & 0xc0) != 0x80:
if offset == e.offset:
start = i
elif offset - e.offset == e.length:
end = i
break
if byte >= 0xf0:
offset += 2
else:
offset += 1
i += 1
return encoded_text[start:end].decode()
def split_string(text: str, chars_per_string: int) -> List[str]:
"""

View File

@ -470,6 +470,53 @@ class TestTeleBot:
for i in range(0,200):
util.antiflood(tb.send_message, CHAT_ID, text)
assert i == 199
def test_extract_entity(self):
entities_map = {"https://core.telegram.org/api/entities": "https://core.telegram.org/api/entities",
"https://github.com/eternnoir/pyTelegramBotAPI": "https://github.com/eternnoir/pyTelegramBotAPI",
"*粗 bold text体*": "粗 bold text体",
"_斜体 italic text_": "斜体 italic text",
"[谷歌](http://www.google.com/)": "谷歌",
'`std::cout<<"test"<<std::endl;`': 'std::cout<<"test"<<std::endl;',
'''```rust
let number = loop {
println!("Pick a pattern from 0-2:");
stdin.read_line(&mut option).unwrap();
match option.lines().next().unwrap().parse::<usize>() {
Ok(number @ 0..=2) => break number,
_ => {
println!("invalid input!");
option = String::new();
continue;
}
};
};```''': '''let number = loop {
println!("Pick a pattern from 0-2:");
stdin.read_line(&mut option).unwrap();
match option.lines().next().unwrap().parse::<usize>() {
Ok(number @ 0..=2) => break number,
_ => {
println!("invalid input!");
option = String::new();
continue;
}
};
};''',
"@username": "@username",
"#hashtag索引标签": "#hashtag索引标签",
"do-not-reply@telegram.org": "do-not-reply@telegram.org",
"+12125550123": "+12125550123"}
entites = list(entities_map.keys())
contents = list(entities_map.values())
contents.sort()
text = '\n'.join(entites)
bot = telebot.TeleBot(TOKEN)
message = bot.send_message(CHAT_ID, text, parse_mode="Markdown")
extracted_contents = [util.extract_entity(
message.text, e) for e in message.entities]
extracted_contents.sort()
assert contents == extracted_contents
@staticmethod
def create_text_message(text):