mirror of
https://github.com/eternnoir/pyTelegramBotAPI.git
synced 2023-08-10 21:12:57 +03:00
Add a function to extract contents of entities from messages
This commit is contained in:
parent
fb7d60f09d
commit
4f2c89c4a8
@ -274,6 +274,39 @@ def extract_arguments(text: str) -> str or None:
|
||||
result = regexp.match(text)
|
||||
return result.group(2) if is_command(text) else None
|
||||
|
||||
def extract_entity(text: str, e: types.MessageEntity) -> str:
|
||||
"""
|
||||
Returns the content of the entity.
|
||||
|
||||
:param text: The text of the message the entity belongs to
|
||||
:type text: :obj:`str`
|
||||
|
||||
:param e: The entity to extract
|
||||
:type e: :obj:`MessageEntity`
|
||||
|
||||
:return: The content of the entity
|
||||
:rtype: :obj:`str`
|
||||
"""
|
||||
offset = 0
|
||||
start = 0
|
||||
encoded_text = text.encode()
|
||||
end = len(encoded_text)
|
||||
i = 0
|
||||
|
||||
for byte in encoded_text:
|
||||
if (byte & 0xc0) != 0x80:
|
||||
if offset == e.offset:
|
||||
start = i
|
||||
elif offset - e.offset == e.length:
|
||||
end = i
|
||||
break
|
||||
if byte >= 0xf0:
|
||||
offset += 2
|
||||
else:
|
||||
offset += 1
|
||||
i += 1
|
||||
|
||||
return encoded_text[start:end].decode()
|
||||
|
||||
def split_string(text: str, chars_per_string: int) -> List[str]:
|
||||
"""
|
||||
|
@ -470,6 +470,53 @@ class TestTeleBot:
|
||||
for i in range(0,200):
|
||||
util.antiflood(tb.send_message, CHAT_ID, text)
|
||||
assert i == 199
|
||||
|
||||
def test_extract_entity(self):
|
||||
entities_map = {"https://core.telegram.org/api/entities": "https://core.telegram.org/api/entities",
|
||||
"https://github.com/eternnoir/pyTelegramBotAPI": "https://github.com/eternnoir/pyTelegramBotAPI",
|
||||
"*粗 bold text体*": "粗 bold text体",
|
||||
"_斜体 italic text_": "斜体 italic text",
|
||||
"[谷歌](http://www.google.com/)": "谷歌",
|
||||
'`std::cout<<"test"<<std::endl;`': 'std::cout<<"test"<<std::endl;',
|
||||
'''```rust
|
||||
let number = loop {
|
||||
println!("Pick a pattern from 0-2:");
|
||||
stdin.read_line(&mut option).unwrap();
|
||||
match option.lines().next().unwrap().parse::<usize>() {
|
||||
Ok(number @ 0..=2) => break number,
|
||||
_ => {
|
||||
println!("invalid input!");
|
||||
option = String::new();
|
||||
continue;
|
||||
}
|
||||
};
|
||||
};```''': '''let number = loop {
|
||||
println!("Pick a pattern from 0-2:");
|
||||
stdin.read_line(&mut option).unwrap();
|
||||
match option.lines().next().unwrap().parse::<usize>() {
|
||||
Ok(number @ 0..=2) => break number,
|
||||
_ => {
|
||||
println!("invalid input!");
|
||||
option = String::new();
|
||||
continue;
|
||||
}
|
||||
};
|
||||
};''',
|
||||
"@username": "@username",
|
||||
"#hashtag索引标签": "#hashtag索引标签",
|
||||
"do-not-reply@telegram.org": "do-not-reply@telegram.org",
|
||||
"+12125550123": "+12125550123"}
|
||||
entites = list(entities_map.keys())
|
||||
contents = list(entities_map.values())
|
||||
contents.sort()
|
||||
text = '\n'.join(entites)
|
||||
|
||||
bot = telebot.TeleBot(TOKEN)
|
||||
message = bot.send_message(CHAT_ID, text, parse_mode="Markdown")
|
||||
extracted_contents = [util.extract_entity(
|
||||
message.text, e) for e in message.entities]
|
||||
extracted_contents.sort()
|
||||
assert contents == extracted_contents
|
||||
|
||||
@staticmethod
|
||||
def create_text_message(text):
|
||||
|
Loading…
Reference in New Issue
Block a user