mirror of
https://github.com/eternnoir/pyTelegramBotAPI.git
synced 2023-08-10 21:12:57 +03:00
Add a function to extract contents of entities from messages
This commit is contained in:
parent
fb7d60f09d
commit
4f2c89c4a8
@ -274,6 +274,39 @@ def extract_arguments(text: str) -> str or None:
|
|||||||
result = regexp.match(text)
|
result = regexp.match(text)
|
||||||
return result.group(2) if is_command(text) else None
|
return result.group(2) if is_command(text) else None
|
||||||
|
|
||||||
|
def extract_entity(text: str, e: types.MessageEntity) -> str:
|
||||||
|
"""
|
||||||
|
Returns the content of the entity.
|
||||||
|
|
||||||
|
:param text: The text of the message the entity belongs to
|
||||||
|
:type text: :obj:`str`
|
||||||
|
|
||||||
|
:param e: The entity to extract
|
||||||
|
:type e: :obj:`MessageEntity`
|
||||||
|
|
||||||
|
:return: The content of the entity
|
||||||
|
:rtype: :obj:`str`
|
||||||
|
"""
|
||||||
|
offset = 0
|
||||||
|
start = 0
|
||||||
|
encoded_text = text.encode()
|
||||||
|
end = len(encoded_text)
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
for byte in encoded_text:
|
||||||
|
if (byte & 0xc0) != 0x80:
|
||||||
|
if offset == e.offset:
|
||||||
|
start = i
|
||||||
|
elif offset - e.offset == e.length:
|
||||||
|
end = i
|
||||||
|
break
|
||||||
|
if byte >= 0xf0:
|
||||||
|
offset += 2
|
||||||
|
else:
|
||||||
|
offset += 1
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return encoded_text[start:end].decode()
|
||||||
|
|
||||||
def split_string(text: str, chars_per_string: int) -> List[str]:
|
def split_string(text: str, chars_per_string: int) -> List[str]:
|
||||||
"""
|
"""
|
||||||
|
@ -471,6 +471,53 @@ class TestTeleBot:
|
|||||||
util.antiflood(tb.send_message, CHAT_ID, text)
|
util.antiflood(tb.send_message, CHAT_ID, text)
|
||||||
assert i == 199
|
assert i == 199
|
||||||
|
|
||||||
|
def test_extract_entity(self):
|
||||||
|
entities_map = {"https://core.telegram.org/api/entities": "https://core.telegram.org/api/entities",
|
||||||
|
"https://github.com/eternnoir/pyTelegramBotAPI": "https://github.com/eternnoir/pyTelegramBotAPI",
|
||||||
|
"*粗 bold text体*": "粗 bold text体",
|
||||||
|
"_斜体 italic text_": "斜体 italic text",
|
||||||
|
"[谷歌](http://www.google.com/)": "谷歌",
|
||||||
|
'`std::cout<<"test"<<std::endl;`': 'std::cout<<"test"<<std::endl;',
|
||||||
|
'''```rust
|
||||||
|
let number = loop {
|
||||||
|
println!("Pick a pattern from 0-2:");
|
||||||
|
stdin.read_line(&mut option).unwrap();
|
||||||
|
match option.lines().next().unwrap().parse::<usize>() {
|
||||||
|
Ok(number @ 0..=2) => break number,
|
||||||
|
_ => {
|
||||||
|
println!("invalid input!");
|
||||||
|
option = String::new();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};```''': '''let number = loop {
|
||||||
|
println!("Pick a pattern from 0-2:");
|
||||||
|
stdin.read_line(&mut option).unwrap();
|
||||||
|
match option.lines().next().unwrap().parse::<usize>() {
|
||||||
|
Ok(number @ 0..=2) => break number,
|
||||||
|
_ => {
|
||||||
|
println!("invalid input!");
|
||||||
|
option = String::new();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};''',
|
||||||
|
"@username": "@username",
|
||||||
|
"#hashtag索引标签": "#hashtag索引标签",
|
||||||
|
"do-not-reply@telegram.org": "do-not-reply@telegram.org",
|
||||||
|
"+12125550123": "+12125550123"}
|
||||||
|
entites = list(entities_map.keys())
|
||||||
|
contents = list(entities_map.values())
|
||||||
|
contents.sort()
|
||||||
|
text = '\n'.join(entites)
|
||||||
|
|
||||||
|
bot = telebot.TeleBot(TOKEN)
|
||||||
|
message = bot.send_message(CHAT_ID, text, parse_mode="Markdown")
|
||||||
|
extracted_contents = [util.extract_entity(
|
||||||
|
message.text, e) for e in message.entities]
|
||||||
|
extracted_contents.sort()
|
||||||
|
assert contents == extracted_contents
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_text_message(text):
|
def create_text_message(text):
|
||||||
params = {'text': text}
|
params = {'text': text}
|
||||||
|
Loading…
Reference in New Issue
Block a user