mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
net.html: fix panic in html.parse() called with empty string, remove replacement of \n
in the original content (#17206)
This commit is contained in:
parent
a8102f14be
commit
8cdc554c63
@ -96,6 +96,12 @@ fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
|
||||
|
||||
fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
|
||||
dom.constructed = true
|
||||
|
||||
// If there are no tags, accessing `tag_list` below does panic.
|
||||
if tag_list.len == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
mut temp_map := map[string]int{}
|
||||
mut temp_int := null_element
|
||||
mut temp_string := ''
|
||||
@ -106,6 +112,7 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
|
||||
temp_map['0'] = dom.btree.add_children(tag_list[0])
|
||||
stack.push(0)
|
||||
root_index := 0
|
||||
|
||||
for index := 1; index < tag_list.len; index++ {
|
||||
mut tag := tag_list[index]
|
||||
dom.print_debug(tag.str())
|
||||
|
@ -13,6 +13,7 @@ mut:
|
||||
is_attribute bool
|
||||
opened_code_type string
|
||||
line_count int
|
||||
outside_tag bool
|
||||
lexeme_builder strings.Builder = strings.new_builder(100)
|
||||
code_tags map[string]bool = {
|
||||
'script': true
|
||||
@ -90,6 +91,7 @@ fn (mut parser Parser) init() {
|
||||
parser.tags = []&Tag{}
|
||||
parser.dom.close_tags['/!document'] = true
|
||||
parser.lexical_attributes.current_tag = &Tag{}
|
||||
parser.lexical_attributes.outside_tag = true
|
||||
parser.initialized = true
|
||||
}
|
||||
|
||||
@ -231,19 +233,40 @@ pub fn (mut parser Parser) split_parse(data string) {
|
||||
parser.lexical_attributes.lexeme_builder.go_back_to(0)
|
||||
parser.generate_tag()
|
||||
parser.lexical_attributes.open_tag = true
|
||||
parser.lexical_attributes.outside_tag = false
|
||||
} else {
|
||||
parser.lexical_attributes.lexeme_builder.write_u8(chr)
|
||||
}
|
||||
}
|
||||
|
||||
// If `data` has not tags but has only text.
|
||||
if parser.lexical_attributes.outside_tag {
|
||||
temp_string := parser.lexical_attributes.lexeme_builder.str()
|
||||
|
||||
if parser.tags.len == 0 {
|
||||
parser.tags << &Tag{
|
||||
name: 'text'
|
||||
content: temp_string
|
||||
}
|
||||
} else if parser.tags.len == 1 {
|
||||
mut tag := parser.tags.first()
|
||||
|
||||
if tag.name == 'text' {
|
||||
tag.content += temp_string
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parse_html parses the given HTML string
|
||||
pub fn (mut parser Parser) parse_html(data string) {
|
||||
parser.init()
|
||||
mut lines := data.split_into_lines()
|
||||
for line in lines {
|
||||
for index, line in lines {
|
||||
parser.lexical_attributes.line_count++
|
||||
parser.split_parse(line)
|
||||
// Parser shouldn't replace `\n`, because it may break JS code or text which sticks together.
|
||||
// After `split_into_lines()` we need to add `\n` again.
|
||||
parser.split_parse(if index < lines.len - 1 { '${line}\n' } else { line })
|
||||
}
|
||||
parser.generate_tag()
|
||||
parser.dom.debug_file = parser.debug_file
|
||||
|
@ -2,6 +2,34 @@ module html
|
||||
|
||||
import strings
|
||||
|
||||
fn test_parse_empty_string() {
|
||||
mut parser := Parser{}
|
||||
|
||||
parser.parse_html('')
|
||||
|
||||
assert parser.tags.len == 0
|
||||
}
|
||||
|
||||
fn test_parse_text() {
|
||||
mut parser := Parser{}
|
||||
text_content := 'test\nparse\ntext'
|
||||
|
||||
parser.parse_html(text_content)
|
||||
|
||||
assert parser.tags.len == 1
|
||||
assert parser.tags.first().text() == text_content
|
||||
}
|
||||
|
||||
fn test_parse_one_tag_with_text() {
|
||||
mut parser := Parser{}
|
||||
text_content := 'tag\nwith\ntext'
|
||||
p_tag := '<p>${text_content}</p>'
|
||||
|
||||
parser.parse_html(p_tag)
|
||||
|
||||
assert parser.tags.first().text() == text_content
|
||||
}
|
||||
|
||||
fn test_split_parse() {
|
||||
mut parser := Parser{}
|
||||
parser.init()
|
||||
@ -37,5 +65,5 @@ fn test_script_tag() {
|
||||
script_content := "\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log('Birl');}\n"
|
||||
temp_html := '<html><body><script>${script_content}</script></body></html>'
|
||||
parser.parse_html(temp_html)
|
||||
assert parser.tags[2].content.len == script_content.replace('\n', '').len
|
||||
assert parser.tags[2].content.len == script_content.len
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ pub fn (tag Tag) text() string {
|
||||
return '\n'
|
||||
}
|
||||
mut text_str := strings.new_builder(200)
|
||||
text_str.write_string(tag.content.replace('\n', ''))
|
||||
text_str.write_string(tag.content)
|
||||
for child in tag.children {
|
||||
text_str.write_string(child.text())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user