diff --git a/vlib/net/html/dom.v b/vlib/net/html/dom.v index 7571f3f462..fee530e9a8 100644 --- a/vlib/net/html/dom.v +++ b/vlib/net/html/dom.v @@ -185,3 +185,8 @@ pub fn (dom DocumentObjectModel) get_root() &Tag { pub fn (dom DocumentObjectModel) get_tags() []&Tag { return dom.all_tags } + +// get_tags_by_class_name retrieves all the tags recursively in the document that has the given class name(s). +pub fn (dom DocumentObjectModel) get_tags_by_class_name(names ...string) []&Tag { + return dom.root.get_tags_by_class_name(...names) +} diff --git a/vlib/net/html/dom_test.v b/vlib/net/html/dom_test.v index 18dd613a1a..cbd8b12340 100644 --- a/vlib/net/html/dom_test.v +++ b/vlib/net/html/dom_test.v @@ -54,3 +54,33 @@ fn test_access_tag_fields() { assert id_tags[0].name == 'div' assert id_tags[1].attributes['class'] == 'several-1' } + +fn generate_temp_html_with_classes() string { + mut temp_html := strings.new_builder(400) + temp_html.write_string('Giant String') + temp_html.write_string("
Single
") + for counter := 0; counter < 4; counter++ { + temp_html.write_string("
Common No. ${counter}
") + } + temp_html.write_string("
Complex
") + temp_html.write_string("
Partial
") + temp_html.write_string('') + return temp_html.str() +} + +fn test_search_by_class() { + dom := parse(generate_temp_html_with_classes()) + single_class_tags := dom.get_tags_by_class_name('single') + common_class_tags := dom.get_tags_by_class_name('common') + complex_class_tags := dom.get_tags_by_class_name('complex-0', 'complex-1', 'complex-2') + partial_class_tags := dom.get_tags_by_class_name('complex-0', 'complex-2') + shuffled_class_tags := dom.get_tags_by_class_name('complex-2', 'complex-0', 'complex-1') + assert single_class_tags.len == 1 + assert common_class_tags.len == 4 + assert complex_class_tags.len == 1 + assert complex_class_tags[0].attributes['class'] == 'complex-0 complex-1 complex-2' + assert partial_class_tags.len == 2 + assert shuffled_class_tags.len == 1 + assert shuffled_class_tags[0].attributes['class'] == 'complex-0 complex-1 complex-2' +} diff --git a/vlib/net/html/parser.v b/vlib/net/html/parser.v index 2dd17adee4..283f3edd7d 100644 --- a/vlib/net/html/parser.v +++ b/vlib/net/html/parser.v @@ -151,6 +151,13 @@ pub fn (mut parser Parser) split_parse(data string) { nval := temp_lexeme.substr(1, temp_lexeme.len - 1) // parser.print_debug(lattr + " = " + temp_lexeme) parser.lexical_attributes.current_tag.attributes[lattr] = nval + if lattr == 'class' { + for class_name in nval.split_any('\t\r\n \x0D') { + if class_name != '' { + parser.lexical_attributes.current_tag.class_set.add(class_name) + } + } + } parser.lexical_attributes.current_tag.last_attribute = '' } else { parser.lexical_attributes.current_tag.attributes[temp_lexeme.to_lower()] = '' diff --git a/vlib/net/html/tag.v b/vlib/net/html/tag.v index dde3a46efd..d300e39f06 100644 --- a/vlib/net/html/tag.v +++ b/vlib/net/html/tag.v @@ -1,6 +1,7 @@ module html import strings +import datatypes enum CloseTagType { in_name @@ -16,6 +17,7 @@ pub mut: children []&Tag attributes map[string]string // attributes will be like map[name]value last_attribute string + class_set datatypes.Set[string] parent &Tag = unsafe { nil } position_in_parent int closed bool @@ -102,3 +104,22 @@ pub fn (tag &Tag) get_tags_by_attribute_value(name string, value string) []&Tag } return res } + +// get_tags_by_class_name retrieves all the child tags recursively in the tag that has the given class name(s). +pub fn (tag &Tag) get_tags_by_class_name(names ...string) []&Tag { + mut res := []&Tag{} + for child in tag.children { + mut matched := true + for name in names { + matched = child.class_set.exists(name) + if !matched { + break + } + } + if matched { + res << child + } + res << child.get_tags_by_class_name(...names) + } + return res +} diff --git a/vlib/net/html/tag_test.v b/vlib/net/html/tag_test.v index dbf79561f5..6e18b76914 100644 --- a/vlib/net/html/tag_test.v +++ b/vlib/net/html/tag_test.v @@ -1,5 +1,7 @@ module html +import strings + const ( html = ' @@ -34,4 +36,36 @@ fn test_search_by_tag_type() { assert tag.get_tags('div').len == 5 assert tag.get_tags_by_attribute('href')[2].content == 'vpm' assert tag.get_tags_by_attribute_value('class', 'bar').len == 3 + assert tag.get_tags_by_class_name('bar').len == 3 +} + +fn generate_temp_html_with_classes() string { + mut temp_html := strings.new_builder(400) + temp_html.write_string('Giant String') + temp_html.write_string("
Single
") + for counter := 0; counter < 4; counter++ { + temp_html.write_string("
Common No. ${counter}
") + } + temp_html.write_string("
Complex
") + temp_html.write_string("
Partial
") + temp_html.write_string('') + return temp_html.str() +} + +fn test_search_by_class() { + mut dom := parse(generate_temp_html_with_classes()) + tag := dom.get_tag('body')[0] + single_class_tags := tag.get_tags_by_class_name('single') + common_class_tags := tag.get_tags_by_class_name('common') + complex_class_tags := tag.get_tags_by_class_name('complex-0', 'complex-1', 'complex-2') + partial_class_tags := tag.get_tags_by_class_name('complex-0', 'complex-2') + shuffled_class_tags := tag.get_tags_by_class_name('complex-2', 'complex-0', 'complex-1') + assert single_class_tags.len == 1 + assert common_class_tags.len == 4 + assert complex_class_tags.len == 1 + assert complex_class_tags[0].attributes['class'] == 'complex-0 complex-1 complex-2' + assert partial_class_tags.len == 2 + assert shuffled_class_tags.len == 1 + assert shuffled_class_tags[0].attributes['class'] == 'complex-0 complex-1 complex-2' }