Compare commits

...

2 Commits

Author SHA1 Message Date
Badiboy 5d9a76b0dd
Merge pull request #1917 from S1RANN/master
Add a function to extract contents of entities from messages
2023-03-03 11:00:03 +03:00
orocane 4f2c89c4a8 Add a function to extract contents of entities from messages 2023-02-15 17:24:39 +08:00
2 changed files with 80 additions and 0 deletions

View File

@ -273,6 +273,39 @@ def extract_arguments(text: str) -> str or None:
result = regexp.match(text)
return result.group(2) if is_command(text) else None
def extract_entity(text: str, e: types.MessageEntity) -> str:
"""
Returns the content of the entity.
:param text: The text of the message the entity belongs to
:type text: :obj:`str`
:param e: The entity to extract
:type e: :obj:`MessageEntity`
:return: The content of the entity
:rtype: :obj:`str`
"""
offset = 0
start = 0
encoded_text = text.encode()
end = len(encoded_text)
i = 0
for byte in encoded_text:
if (byte & 0xc0) != 0x80:
if offset == e.offset:
start = i
elif offset - e.offset == e.length:
end = i
break
if byte >= 0xf0:
offset += 2
else:
offset += 1
i += 1
return encoded_text[start:end].decode()
def split_string(text: str, chars_per_string: int) -> List[str]:
"""

View File

@ -470,6 +470,53 @@ class TestTeleBot:
for i in range(0,200):
util.antiflood(tb.send_message, CHAT_ID, text)
assert i == 199
def test_extract_entity(self):
entities_map = {"https://core.telegram.org/api/entities": "https://core.telegram.org/api/entities",
"https://github.com/eternnoir/pyTelegramBotAPI": "https://github.com/eternnoir/pyTelegramBotAPI",
"*粗 bold text体*": "粗 bold text体",
"_斜体 italic text_": "斜体 italic text",
"[谷歌](http://www.google.com/)": "谷歌",
'`std::cout<<"test"<<std::endl;`': 'std::cout<<"test"<<std::endl;',
'''```rust
let number = loop {
println!("Pick a pattern from 0-2:");
stdin.read_line(&mut option).unwrap();
match option.lines().next().unwrap().parse::<usize>() {
Ok(number @ 0..=2) => break number,
_ => {
println!("invalid input!");
option = String::new();
continue;
}
};
};```''': '''let number = loop {
println!("Pick a pattern from 0-2:");
stdin.read_line(&mut option).unwrap();
match option.lines().next().unwrap().parse::<usize>() {
Ok(number @ 0..=2) => break number,
_ => {
println!("invalid input!");
option = String::new();
continue;
}
};
};''',
"@username": "@username",
"#hashtag索引标签": "#hashtag索引标签",
"do-not-reply@telegram.org": "do-not-reply@telegram.org",
"+12125550123": "+12125550123"}
entites = list(entities_map.keys())
contents = list(entities_map.values())
contents.sort()
text = '\n'.join(entites)
bot = telebot.TeleBot(TOKEN)
message = bot.send_message(CHAT_ID, text, parse_mode="Markdown")
extracted_contents = [util.extract_entity(
message.text, e) for e in message.entities]
extracted_contents.sort()
assert contents == extracted_contents
@staticmethod
def create_text_message(text):