net.html: polish module, update docs (#7193)

2023-08-10 21:13:21 +03:00 · 2020-12-10 03:08:15 +08:00
parent 5fa1e403ec
commit b952bf2e6b
9 changed files with 302 additions and 446 deletions
--- a/vlib/net/html/README.md
+++ b/vlib/net/html/README.md
@@ -1,118 +1,16 @@
-# V HTML
-
-A HTML parser made in V.
+net/http is an HTML written in pure V.

 ## Usage
+```v oksyntax
+import net.html

-If the description below isn't enought, please look at the test files.
-
-### Parser
-
-Responsible for read HTML in full strings or splited string and returns all Tag objets of
-it HTML or return a DocumentObjectModel, that will try to find how the HTML Tree is.
-
-#### split_parse(data string)
-This functions is the main function called by parse method to fragment parse your HTML.
-
-#### parse_html(data string, is_file bool)
-This function is called passing a filename or a complete html data string to it.
-
-#### add_code_tag(name string)
-This function is used to add a tag for the parser ignore it's content. 
-For example, if you have an html or XML with a custom tag, like `<script>`, using this function, 
-like `add_code_tag('script')` will make all `script` tags content be jumped, 
-so you still have its content, but will not confuse the parser with it's `>` or `<`.
-
-#### finalize()
-When using **split_parse** method, you must call this function to ends the parse completely.
-
-#### get_tags() []Tag_ptr
-This functions returns a array with all tags and it's content.
-
-#### get_dom() DocumentObjectModel
-Returns the DocumentObjectModel for current parsed tags.
-
-### WARNING
-If you want to reuse parser object to parse another HTML, call `initialize_all()` function first.
-
-### DocumentObjectModel
-
-A DOM object that will make easier to access some tags and search it.
-
-#### get_by_attribute_value(name string, value string) []Tag_ptr
-This function retuns a Tag array with all tags in document 
-that have a attribute with given name and given value.
-
-#### get_by_tag(name string) []Tag_ptr
-This function retuns a Tag array with all tags in document that have a name with the given value.
-
-#### get_by_attribute(name string) []Tag_ptr
-This function retuns a Tag array with all tags in document that have a attribute with given name.
-
-#### get_root() Tag_ptr
-This function returns the root Tag.
-
-#### get_all_tags() []Tag_ptr
-This function returns all important tags, removing close tags.
-
-### Tag
-
-An object that holds tags information, such as `name`, `attributes`, `children`.
-
-#### get_children() []Tag_ptr
-Returns all children as an array.
-
-#### get_parent() &Tag
-Returns the parent of current tag.
-
-#### get_name() string
-Returns tag name.
-
-#### get_content() string
-Returns tag content.
-
-#### get_attributes() map[string]string
-Returns all attributes and it value.
-
-#### text() string
-Returns the content of the tag and all tags inside it. 
-Also, any `<br>` tag will be converted into `\n`.
-
-## Some questions that can appear
-
-### Q: Why in parser have a `builder_str() string` method that returns only the lexeme string?
-    
-A: Because in early stages of the project, `strings.Builder` are used, 
-but for some bug existing somewhere, it was necessary to use `string` directly. 
-Later, it's planned to use `strings.Builder` again.
-
-### Q: Why have a `compare_string(a string, b string) bool` method?
-
-A: For some reason when using != and == in strings directly, it is not working. 
-So this method is a workaround.
-
-### Q: Will be something like `XPath`?
-
-A: Like XPath yes. Exactly equal to it, no.
-
-## Roadmap
- [x] Parser
-  - [x] `<!-- Comments -->` detection
-  - [x] `Open Generic tags` detection
-  - [x] `Close Generic tags` detection
-  - [x] `verify string` detection
-  - [x] `tag attributes` detection
-  - [x] `attributes values` detection
-  - [x] `tag text` (on tag it is declared as content, maybe change for text in the future)
-  - [x] `text file for parse` support (open local files for parsing)
-  - [x] `open_code` verification
- [x] DocumentObjectModel
-  - [x] push elements that have a close tag into stack
-  - [x] remove elements from stack
-  - [x] ~~create a new document root if have some syntax error (deleted)~~
-  - [x] search tags in `DOM` by attributes
-  - [x] search tags in `DOM` by tag type
-  - [x] finish dom test
-
-## License
-[MIT](../../../LICENSE)
+fn main() {
+	doc := html.parse('<html><body><h1 class="title">Hello world!</h1></body></html>')
+	tag := doc.get_tag('h1')[0] // <h1>Hello world!</h1>
+	println(tag.name) // h1
+	println(tag.content) // Hello world!
+	println(tag.attributes) // {'class':'title'}
+	println(tag.str()) // <h1 class="title">Hello world!</h1>
+}
+```
+More examples found on [`parser_test.v`](parser_test.v) and [`html_test.v`](html_test.v)
--- a/vlib/net/html/data_structures.v
+++ b/vlib/net/html/data_structures.v
@@ -1,30 +1,35 @@
 module html

-#include <limits.h>
+const (
+	null_element = int(0x80000000)
+)
+
 struct Stack {
-	null_element int = C.INT_MIN
 mut:
-	elements     []int
-	size         int
+	elements []int
+	size     int
 }

-fn (stack Stack) is_null(data int) bool {
-	return data == stack.null_element
+[inline]
+fn is_null(data int) bool {
+	return data == null_element
 }

+[inline]
 fn (stack Stack) is_empty() bool {
 	return stack.size <= 0
 }

 fn (stack Stack) peek() int {
-	if !stack.is_empty() {
-		return stack.elements[stack.size - 1]
+	return if !stack.is_empty() {
+		stack.elements[stack.size - 1]
+	} else {
+		null_element
 	}
-	return stack.null_element
 }

 fn (mut stack Stack) pop() int {
-	mut to_return := stack.null_element
+	mut to_return := null_element
 	if !stack.is_empty() {
 		to_return = stack.elements[stack.size - 1]
 		stack.size--
@@ -53,7 +58,6 @@ fn (mut btree BTree) add_children(tag Tag) int {
 	btree.all_tags << tag
 	if btree.all_tags.len > 1 {
 		for btree.childrens.len <= btree.node_pointer {
-			// println("${btree.childrens.len} <= ${btree.node_pointer}")
 			mut temp_array := btree.childrens
 			temp_array << []int{}
 			btree.childrens = temp_array
@@ -69,14 +73,17 @@ fn (mut btree BTree) add_children(tag Tag) int {
 	return btree.all_tags.len - 1
 }

+[inline]
 fn (btree BTree) get_children() []int {
 	return btree.childrens[btree.node_pointer]
 }

+[inline]
 fn (btree BTree) get_parent() int {
 	return btree.parents[btree.node_pointer]
 }

+[inline]
 fn (btree BTree) get_stored() Tag {
 	return btree.all_tags[btree.node_pointer]
 }
--- a/vlib/net/html/dom.v
+++ b/vlib/net/html/dom.v
@@ -2,6 +2,11 @@ module html

 import os

+// The W3C Document Object Model (DOM) is a platform and language-neutral
+// interface that allows programs and scripts to dynamically access and
+// update the content, structure, and style of a document.
+//
+// https://www.w3.org/TR/WD-DOM/introduction.html
 pub struct DocumentObjectModel {
 mut:
 	root           &Tag
@@ -25,24 +30,14 @@ fn (mut dom DocumentObjectModel) print_debug(data string) {
 	}
 }

-/*
-fn (dom mut DocumentObjectModel) new_root(tag &Tag) {
-	mut new_tag := &Tag{} new_tag.name = "div"
-	new_tag.add_child(dom.root) new_tag.add_child(tag)
-	dom.root = new_tag
-}
-*/
+[inline]
 fn is_close_tag(tag &Tag) bool {
-	if tag.name.len > 0 {
-		return tag.name[0] == 47 // return if equals to /
-	}
-	return false
+	return tag.name.len > 0 && tag.name[0] == `/`
 }

 fn (mut dom DocumentObjectModel) where_is(item_name string, attribute_name string) int {
-	if !(attribute_name in dom.attributes) {
-		temp_array := []string{}
-		dom.attributes[attribute_name] = temp_array
+	if attribute_name !in dom.attributes {
+		dom.attributes[attribute_name] = []string{}
 	}
 	mut string_array := dom.attributes[attribute_name]
 	mut counter := 0
@@ -58,10 +53,10 @@ fn (mut dom DocumentObjectModel) where_is(item_name string, attribute_name strin
 }

 fn (mut dom DocumentObjectModel) add_tag_attribute(tag &Tag) {
-	for attribute_name in tag.attributes.keys() {
+	for attribute_name, _ in tag.attributes {
 		attribute_value := tag.attributes[attribute_name]
 		location := dom.where_is(attribute_value, attribute_name)
-		if !(attribute_name in dom.tag_attributes) {
+		if attribute_name !in dom.tag_attributes {
 			dom.tag_attributes[attribute_name] = []
 		}
 		for {
@@ -91,7 +86,7 @@ fn (mut dom DocumentObjectModel) add_tag_by_type(tag &Tag) {

 fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
 	for attribute_name in tag.attributes.keys() {
-		if !(attribute_name in dom.all_attributes) {
+		if attribute_name !in dom.all_attributes {
 			dom.all_attributes[attribute_name] = [tag]
 		} else {
 			mut temp_array := dom.all_attributes[attribute_name]
@@ -101,22 +96,10 @@ fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
 	}
 }

-fn compare_string(a string, b string) bool { // for some reason == doesn't work
-	if a.len != b.len {
-		return false
-	}
-	for i := 0; i < a.len; i++ {
-		if a[i] != b[i] {
-			return false
-		}
-	}
-	return true
-}
-
 fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 	dom.constructed = true
 	mut temp_map := map[string]int{}
-	mut temp_int := C.INT_MIN
+	mut temp_int := null_element
 	mut temp_string := ''
 	mut stack := Stack{}
 	dom.btree = BTree{}
@@ -130,21 +113,16 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 		dom.print_debug(tag.str())
 		if is_close_tag(tag) {
 			temp_int = stack.peek()
-			temp_string = tag.name[1..tag.name.len] // print(temp_string + " != " + tag_list[temp_int].name + " >> ") // println(temp_string != tag_list[temp_int].name)
-			for !stack.is_null(temp_int) &&
-				!compare_string(temp_string, tag_list[temp_int].name) && !tag_list[temp_int].closed {
-				dom.print_debug(temp_string + ' >> ' + tag_list[temp_int].name + ' ' +
-					compare_string(temp_string, tag_list[temp_int].name).str())
+			temp_string = tag.name[1..]
+			for !is_null(temp_int) && temp_string != tag_list[temp_int].name && !tag_list[temp_int].closed {
+				dom.print_debug(temp_string + ' >> ' + tag_list[temp_int].name + ' ' + (temp_string ==
+					tag_list[temp_int].name).str())
 				stack.pop()
 				temp_int = stack.peek()
 			}
 			temp_int = stack.peek()
-			if !stack.is_null(temp_int) {
-				temp_int = stack.pop()
-			} else {
-				temp_int = root_index
-			}
-			if stack.is_null(temp_int) {
+			temp_int = if !is_null(temp_int) { stack.pop() } else { root_index }
+			if is_null(temp_int) {
 				stack.push(root_index)
 			}
 			dom.print_debug('Removed ' + temp_string + ' -- ' + tag_list[temp_int].name)
@@ -154,7 +132,7 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 			dom.add_tag_by_type(tag)
 			dom.all_tags << tag
 			temp_int = stack.peek()
-			if !stack.is_null(temp_int) {
+			if !is_null(temp_int) {
 				dom.btree.move_pointer(temp_map[temp_int.str()])
 				temp_map[index.str()] = dom.btree.add_children(tag)
 				mut temp_tag := tag_list[temp_int]
@@ -164,8 +142,7 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 				dom.print_debug("Added ${tag.name} as child of '" + tag_list[temp_int].name +
 					"' which now has ${dom.btree.get_children().len} childrens")
 				*/
-				dom.print_debug("Added $tag.name as child of '" + temp_tag.name +
-					"' which now has $temp_tag.get_children().len childrens")
+				dom.print_debug("Added $tag.name as child of '" + temp_tag.name + "' which now has $temp_tag.children.len childrens")
 			} else { // dom.new_root(tag)
 				stack.push(root_index)
 			}
@@ -179,40 +156,40 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 	dom.root = tag_list[0]
 }

-pub fn (mut dom DocumentObjectModel) get_by_attribute_value(name string, value string) []&Tag {
+// get_tag_by_attribute_value retrieves all the tags in the document that has the given attribute name and value.
+pub fn (mut dom DocumentObjectModel) get_tag_by_attribute_value(name string, value string) []&Tag {
 	location := dom.where_is(value, name)
-	if dom.tag_attributes[name].len > location {
-		return dom.tag_attributes[name][location]
+	return if dom.tag_attributes[name].len > location {
+		dom.tag_attributes[name][location]
+	} else {
+		[]&Tag{}
 	}
-	return []&Tag{}
 }

-pub fn (dom DocumentObjectModel) get_by_tag(name string) []&Tag {
-	if name in dom.tag_type {
-		return dom.tag_type[name]
+// get_tag retrieves all the tags in the document that has the given tag name.
+pub fn (dom DocumentObjectModel) get_tag(name string) []&Tag {
+	return if name in dom.tag_type {
+		dom.tag_type[name]
+	} else {
+		[]&Tag{}
 	}
-	return []&Tag{}
 }

-pub fn (dom DocumentObjectModel) get_by_attribute(name string) []&Tag {
-	if name in dom.all_attributes {
-		return dom.all_attributes[name]
+// get_tag_by_attribute retrieves all the tags in the document that has the given attribute name.
+pub fn (dom DocumentObjectModel) get_tag_by_attribute(name string) []&Tag {
+	return if name in dom.all_attributes {
+		dom.all_attributes[name]
+	} else {
+		[]&Tag{}
 	}
-	return []&Tag{}
 }

+// get_root returns the root of the document.
 pub fn (dom DocumentObjectModel) get_root() &Tag {
 	return dom.root
 }

-pub fn (dom DocumentObjectModel) get_all_tags() []&Tag {
+// get_tags returns all of the tags stored in the document.
+pub fn (dom DocumentObjectModel) get_tags() []&Tag {
 	return dom.all_tags
 }
-
-/*
-pub fn (dom DocumentObjectModel) get_xpath() XPath {
-	return XPath{
-		dom: dom
-	}
-}
-*/
--- a/vlib/net/html/dom_test.v
+++ b/vlib/net/html/dom_test.v
@@ -1,63 +1,56 @@
 module html

-fn generate_temp_html() string {
-	mut temp_html := '<!doctype html><html><head><title>Giant String</title></head><body>'
-	for counter := 0; counter < 4; counter++ {
-		temp_html += "<div id='name_$counter' "
-		temp_html += "class='several-$counter'>Look at $counter</div>"
-	}
-	temp_html += '</body></html>'
-	return temp_html
-}
+import strings

-fn generate_dom(temp_html string) DocumentObjectModel {
-	mut parser := Parser{}
-	parser.parse_html(temp_html, false)
-	dom := parser.get_dom()
-	return dom
+fn generate_temp_html() string {
+	mut temp_html := strings.new_builder(200)
+	temp_html.write('<!doctype html><html><head><title>Giant String</title></head><body>')
+	for counter := 0; counter < 4; counter++ {
+		temp_html.write("<div id='name_$counter' ")
+		temp_html.write("class='several-$counter'>Look at $counter</div>")
+	}
+	temp_html.write('</body></html>')
+	return temp_html.str()
 }

 fn test_search_by_tag_type() {
-	dom := generate_dom(generate_temp_html())
-	assert dom.get_by_tag('div').len == 4
-	assert dom.get_by_tag('head').len == 1
-	assert dom.get_by_tag('body').len == 1
+	dom := parse(generate_temp_html())
+	assert dom.get_tag('div').len == 4
+	assert dom.get_tag('head').len == 1
+	assert dom.get_tag('body').len == 1
 }

 fn test_search_by_attribute_value() {
-	mut dom := generate_dom(generate_temp_html())
+	mut dom := parse(generate_temp_html())
 	// println(temp_html)
 	print('Amount ')
-	println(dom.get_by_attribute_value('id', 'name_0'))
-	assert dom.get_by_attribute_value('id', 'name_0').len == 1
+	println(dom.get_tag_by_attribute_value('id', 'name_0'))
+	assert dom.get_tag_by_attribute_value('id', 'name_0').len == 1
 }

 fn test_access_parent() {
-	mut dom := generate_dom(generate_temp_html())
-	div_tags := dom.get_by_tag('div')
-	assert div_tags[0].get_parent() != C.NULL
-	/*
-	parent := div_tags[0].get_parent()
-	assert parent != C.NULL
+	mut dom := parse(generate_temp_html())
+	div_tags := dom.get_tag('div')
+	parent := div_tags[0].parent
+	assert parent != 0
 	for div_tag in div_tags {
-		assert div_tag.get_parent() == parent
+		assert div_tag.parent == parent
 	}
-	*/
 }

 fn test_search_by_attributes() {
-	dom := generate_dom(generate_temp_html())
-	assert dom.get_by_attribute('id').len == 4
+	dom := parse(generate_temp_html())
+	assert dom.get_tag_by_attribute('id').len == 4
 }

 fn test_tags_used() {
-	dom := generate_dom(generate_temp_html())
-	assert dom.get_all_tags().len == 9
+	dom := parse(generate_temp_html())
+	assert dom.get_tags().len == 9
 }

 fn test_access_tag_fields() {
-	dom := generate_dom(generate_temp_html())
-	id_tags := dom.get_by_attribute('id')
-	assert id_tags[0].get_name() == "div"
-	assert id_tags[1].get_attributes()['class'] == "several-1"
+	dom := parse(generate_temp_html())
+	id_tags := dom.get_tag_by_attribute('id')
+	assert id_tags[0].name == 'div'
+	assert id_tags[1].attributes['class'] == 'several-1'
 }
--- a/vlib/net/html/html.v
+++ b/vlib/net/html/html.v
@@ -0,0 +1,18 @@
+module html
+
+import os
+
+// parse parses and returns the DOM from the given text.
+pub fn parse(text string) DocumentObjectModel {
+	mut parser := Parser{}
+	parser.parse_html(text)
+	return parser.get_dom()
+}
+
+// parse_file parses and returns the DOM from the contents of a file.
+pub fn parse_file(filename string) DocumentObjectModel {
+	content := os.read_file(filename) or { return DocumentObjectModel{
+		root: &Tag{}
+	} }
+	return parse(content)
+}
--- a/vlib/net/html/html_test.v
+++ b/vlib/net/html/html_test.v
@@ -0,0 +1,15 @@
+module html
+
+fn test_parse() {
+	doc := parse('<html><body><h1 class="title">Hello world!</h1></body></html>')
+	tags := doc.get_tag('h1')
+	assert tags.len == 1
+	h1_tag := tags[0] // <h1>Hello world!</h1>
+	assert h1_tag.name == 'h1'
+	assert h1_tag.content == 'Hello world!'
+	assert h1_tag.attributes.len == 2
+	// TODO: do not remove. Attributes must not have an empty attr.
+	// assert h1_tag.attributes.len == 1
+	assert h1_tag.str() == '<h1 class="title" >Hello world!</h1>'
+	// assert h1_tag.str() == '<h1 class="title">Hello world!</h1>'
+}
--- a/vlib/net/html/parser.v
+++ b/vlib/net/html/parser.v
@@ -1,8 +1,9 @@
 module html

 import os
+import strings

-struct LexycalAttributes {
+struct LexicalAttributes {
 mut:
 	current_tag      &Tag
 	open_tag         bool
@@ -12,44 +13,40 @@ mut:
 	is_attribute     bool
 	opened_code_type string
 	line_count       int
-	lexeme_builder   string
+	lexeme_builder   strings.Builder = strings.Builder{}
 	code_tags        map[string]bool = {
-		'script': true
-		'style': true
-	}
-}
-
-fn (mut lxa LexycalAttributes) write_lexeme(data byte) {
-	mut temp := lxa.lexeme_builder
-	temp += data.str()
-	lxa.lexeme_builder = temp
+	'script': true
+	'style':  true
+}
 }

+// Parser is responsible for reading the HTML strings and converting them into a `DocumentObjectModel`.
 pub struct Parser {
 mut:
 	dom                DocumentObjectModel
-	lexycal_attributes LexycalAttributes = LexycalAttributes{
-		current_tag: &Tag{}
-	}
+	lexical_attributes LexicalAttributes = LexicalAttributes{
+	current_tag: &Tag{}
+}
 	filename           string = 'direct-parse'
 	initialized        bool
 	tags               []&Tag
 	debug_file         os.File
 }

+// This function is used to add a tag for the parser ignore it's content.
+// For example, if you have an html or XML with a custom tag, like `<script>`, using this function,
+// like `add_code_tag('script')` will make all `script` tags content be jumped,
+// so you still have its content, but will not confuse the parser with it's `>` or `<`.
 pub fn (mut parser Parser) add_code_tag(name string) {
-	if parser.lexycal_attributes.code_tags.keys().len <= 0 {
-		parser.lexycal_attributes.code_tags = map[string]bool{}
-		parser.lexycal_attributes.code_tags['script'] = true
-		parser.lexycal_attributes.code_tags['style'] = true
-	}
-	if name.len > 0 {
-		parser.lexycal_attributes.code_tags[name] = true
+	if name.len <= 0 {
+		return
 	}
+	parser.lexical_attributes.code_tags[name] = true
 }

+[inline]
 fn (parser Parser) builder_str() string {
-	return parser.lexycal_attributes.lexeme_builder
+	return parser.lexical_attributes.lexeme_builder.after(0)
 }

 [if debug]
@@ -65,28 +62,28 @@ fn (mut parser Parser) verify_end_comment(remove bool) bool {
 	lexeme := parser.builder_str()
 	last := lexeme[lexeme.len - 1]
 	penultimate := lexeme[lexeme.len - 2]
-	mut is_end_comment := false
-	if last.str() == '-' && penultimate.str() == '-' {
-		is_end_comment = true
-	}
+	is_end_comment := last == `-` && penultimate == `-`
 	if is_end_comment && remove {
-		temp := parser.lexycal_attributes.lexeme_builder
-		parser.lexycal_attributes.lexeme_builder = temp[0..temp.len - 2]
+		parser.lexical_attributes.lexeme_builder.go_back(2)
 	}
 	return is_end_comment
 }

 fn blank_string(data string) bool {
 	mut count := 0
-	for word in data {
-		if word == 9 || word == 32 {
+	for chr in data {
+		if chr == 9 || chr == 32 {
 			count++
 		}
 	}
 	return count == data.len
 }

-fn (mut parser Parser) initialize_all() {
+// init initializes the parser.
+fn (mut parser Parser) init() {
+	if parser.initialized {
+		return
+	}
 	parser.dom = DocumentObjectModel{
 		debug_file: parser.debug_file
 		root: &Tag{}
@@ -94,181 +91,165 @@ fn (mut parser Parser) initialize_all() {
 	parser.add_code_tag('')
 	parser.tags = []&Tag{}
 	parser.dom.close_tags['/!document'] = true
-	parser.lexycal_attributes.current_tag = &Tag{}
+	parser.lexical_attributes.current_tag = &Tag{}
 	parser.initialized = true
 }

 fn (mut parser Parser) generate_tag() {
-	if !parser.lexycal_attributes.open_tag {
-		if parser.lexycal_attributes.current_tag.name.len > 0 ||
-			parser.lexycal_attributes.current_tag.content.len > 0 {
-			parser.tags << parser.lexycal_attributes.current_tag
-		}
-		parser.lexycal_attributes.current_tag = &Tag{}
+	if parser.lexical_attributes.open_tag {
+		return
 	}
+	if parser.lexical_attributes.current_tag.name.len > 0 ||
+		parser.lexical_attributes.current_tag.content.len > 0 {
+		parser.tags << parser.lexical_attributes.current_tag
+	}
+	parser.lexical_attributes.current_tag = &Tag{}
 }

+// split_parse parses the HTML fragment
 pub fn (mut parser Parser) split_parse(data string) {
-	if !parser.initialized {
-		parser.initialize_all()
-	}
-	for word in data {
-		mut is_quotation := false // " or '
-		if word == 34 || word == 39 {
-			is_quotation = true
-		}
-		string_code := match word {
-			34 { 1 } // "
-			39 { 2 } // '
+	parser.init()
+	for chr in data {
+		// returns true if byte is a " or '
+		is_quote := chr == `"` || chr == `\'`
+		string_code := match chr {
+			`"` { 1 } // "
+			`\'` { 2 } // '
 			else { 0 }
 		}
-		if parser.lexycal_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code
-			parser.lexycal_attributes.write_lexeme(word)
-			if parser.lexycal_attributes.open_string > 0 {
-				if parser.lexycal_attributes.open_string == string_code {
-					parser.lexycal_attributes.open_string = 0
-				}
-			} else if is_quotation {
-				parser.lexycal_attributes.open_string = string_code
-			} else if word == 62 { // only execute verification if is a > // here will verify < to know if code tag is finished
-				name_close_tag := '</' + parser.lexycal_attributes.opened_code_type + '>'
-				temp_string := parser.builder_str()
-				if temp_string.to_lower().ends_with(name_close_tag) {
-					parser.lexycal_attributes.open_code = false
+		if parser.lexical_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code
+			parser.lexical_attributes.lexeme_builder.write_b(chr)
+			if parser.lexical_attributes.open_string > 0 &&
+				parser.lexical_attributes.open_string == string_code {
+				parser.lexical_attributes.open_string = 0
+			} else if is_quote {
+				parser.lexical_attributes.open_string = string_code
+			} else if chr == `>` { // only execute verification if is a > // here will verify < to know if code tag is finished
+				name_close_tag := '</$parser.lexical_attributes.opened_code_type>'
+				if parser.builder_str().to_lower().ends_with(name_close_tag) {
+					parser.lexical_attributes.open_code = false
 					// need to modify lexeme_builder to add script text as a content in next loop (not gave error in dom)
-					parser.lexycal_attributes.lexeme_builder = temp_string[0..temp_string.len -
-						name_close_tag.len]
-					parser.lexycal_attributes.current_tag.closed = true
-					parser.lexycal_attributes.current_tag.close_type = .new_tag
+					parser.lexical_attributes.lexeme_builder.go_back(name_close_tag.len)
+					parser.lexical_attributes.current_tag.closed = true
+					parser.lexical_attributes.current_tag.close_type = .new_tag
 				}
 			}
-		} else if parser.lexycal_attributes.open_comment {
-			if word == 62 && parser.verify_end_comment(false) { // close tag '>'
-				// parser.print_debug(parser.builder_str() + " >> " + parser.lexycal_attributes.line_count.str())
-				parser.lexycal_attributes.lexeme_builder = '' // strings.Builder{}
-				parser.lexycal_attributes.open_comment = false
-				parser.lexycal_attributes.open_tag = false
+		} else if parser.lexical_attributes.open_comment {
+			if chr == `>` && parser.verify_end_comment(false) { // close tag '>'
+				// parser.print_debug(parser.builder_str() + " >> " + parser.lexical_attributes.line_count.str())
+				parser.lexical_attributes.lexeme_builder.go_back_to(0)
+				parser.lexical_attributes.open_comment = false
+				parser.lexical_attributes.open_tag = false
 			} else {
-				parser.lexycal_attributes.write_lexeme(word)
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
 			}
-		} else if parser.lexycal_attributes.open_string > 0 {
-			if parser.lexycal_attributes.open_string == string_code {
-				parser.lexycal_attributes.open_string = 0
-				parser.lexycal_attributes.write_lexeme(word)
+		} else if parser.lexical_attributes.open_string > 0 {
+			if parser.lexical_attributes.open_string == string_code {
+				parser.lexical_attributes.open_string = 0
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
 				temp_lexeme := parser.builder_str()
-				if parser.lexycal_attributes.current_tag.last_attribute != '' {
-					lattr := parser.lexycal_attributes.current_tag.last_attribute
+				if parser.lexical_attributes.current_tag.last_attribute != '' {
+					lattr := parser.lexical_attributes.current_tag.last_attribute
 					nval := temp_lexeme.substr(1, temp_lexeme.len - 1)
 					// parser.print_debug(lattr + " = " + temp_lexeme)
-					parser.lexycal_attributes.current_tag.attributes[lattr] = nval
-					parser.lexycal_attributes.current_tag.last_attribute = ''
+					parser.lexical_attributes.current_tag.attributes[lattr] = nval
+					parser.lexical_attributes.current_tag.last_attribute = ''
 				} else {
-					parser.lexycal_attributes.current_tag.attributes[temp_lexeme.to_lower()] = '' // parser.print_debug(temp_lexeme)
+					parser.lexical_attributes.current_tag.attributes[temp_lexeme.to_lower()] = '' // parser.print_debug(temp_lexeme)
 				}
-				parser.lexycal_attributes.lexeme_builder = ''
+				parser.lexical_attributes.lexeme_builder.go_back_to(0)
 			} else {
-				parser.lexycal_attributes.write_lexeme(word)
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
 			}
-		} else if parser.lexycal_attributes.open_tag {
-			if parser.lexycal_attributes.lexeme_builder.len == 0 && is_quotation {
-				parser.lexycal_attributes.open_string = string_code
-				parser.lexycal_attributes.write_lexeme(word)
-			} else if word == 62 { // close tag >
+		} else if parser.lexical_attributes.open_tag {
+			if parser.lexical_attributes.lexeme_builder.len == 0 && is_quote {
+				parser.lexical_attributes.open_string = string_code
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
+			} else if chr == `>` { // close tag >
 				complete_lexeme := parser.builder_str().to_lower()
-				parser.lexycal_attributes.current_tag.closed = (complete_lexeme.len > 0 &&
-					complete_lexeme[complete_lexeme.len - 1] == 47) // if equals to /
-				if complete_lexeme.len > 0 && complete_lexeme[0] == 47 {
+				parser.lexical_attributes.current_tag.closed = (complete_lexeme.len > 0 &&
+					complete_lexeme[complete_lexeme.len - 1] == `/`) // if equals to /
+				if complete_lexeme.len > 0 && complete_lexeme[0] == `/` {
 					parser.dom.close_tags[complete_lexeme] = true
 				}
 				/*
 				else if complete_lexeme.len > 0 && complete_lexeme[complete_lexeme.len - 1] == 47 { // if end tag like "/>"
-					parser.lexycal_attributes.current_tag.closed = true
+					parser.lexical_attributes.current_tag.closed = true
 				}
 				*/
-				if parser.lexycal_attributes.current_tag.name == '' {
-					parser.lexycal_attributes.current_tag.name = complete_lexeme
+				if parser.lexical_attributes.current_tag.name == '' {
+					parser.lexical_attributes.current_tag.name = complete_lexeme
 				} else if complete_lexeme != '/' {
-					parser.lexycal_attributes.current_tag.attributes[complete_lexeme] = ''
+					parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
 				}
-				parser.lexycal_attributes.open_tag = false
-				parser.lexycal_attributes.lexeme_builder = '' // if tag name is code
-				if parser.lexycal_attributes.current_tag.name in parser.lexycal_attributes.code_tags {
-					parser.lexycal_attributes.open_code = true
-					parser.lexycal_attributes.opened_code_type = parser.lexycal_attributes.current_tag.name
+				parser.lexical_attributes.open_tag = false
+				parser.lexical_attributes.lexeme_builder.go_back_to(0) // if tag name is code
+				if parser.lexical_attributes.current_tag.name in parser.lexical_attributes.code_tags {
+					parser.lexical_attributes.open_code = true
+					parser.lexical_attributes.opened_code_type = parser.lexical_attributes.current_tag.name
 				}
-				// parser.print_debug(parser.lexycal_attributes.current_tag.name)
-			} else if word != 9 && word != 32 && word != 61 && word != 10 { // Tab, space, = and \n
-				parser.lexycal_attributes.write_lexeme(word)
-			} else if word != 10 {
+				// parser.print_debug(parser.lexical_attributes.current_tag.name)
+			} else if chr !in [byte(9), ` `, `=`, `\n`] { // Tab, space, = and \n
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
+			} else if chr != 10 {
 				complete_lexeme := parser.builder_str().to_lower()
-				if parser.lexycal_attributes.current_tag.name == '' {
-					parser.lexycal_attributes.current_tag.name = complete_lexeme
+				if parser.lexical_attributes.current_tag.name == '' {
+					parser.lexical_attributes.current_tag.name = complete_lexeme
 				} else {
-					parser.lexycal_attributes.current_tag.attributes[complete_lexeme] = ''
-					parser.lexycal_attributes.current_tag.last_attribute = ''
-					if word == 61 { // if was a =
-						parser.lexycal_attributes.current_tag.last_attribute = complete_lexeme
+					parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
+					parser.lexical_attributes.current_tag.last_attribute = ''
+					if chr == `=` { // if was a =
+						parser.lexical_attributes.current_tag.last_attribute = complete_lexeme
 					}
 				}
-				parser.lexycal_attributes.lexeme_builder = '' // strings.Builder{}
+				parser.lexical_attributes.lexeme_builder.go_back_to(0)
 			}
 			if parser.builder_str() == '!--' {
-				parser.lexycal_attributes.open_comment = true
+				parser.lexical_attributes.open_comment = true
 			}
-		} else if word == 60 { // open tag '<'
+		} else if chr == `<` { // open tag '<'
 			temp_string := parser.builder_str()
-			if parser.lexycal_attributes.lexeme_builder.len >= 1 {
-				if parser.lexycal_attributes.current_tag.name.len > 1 &&
-					parser.lexycal_attributes.current_tag.name[0] == 47 && !blank_string(temp_string) {
+			if parser.lexical_attributes.lexeme_builder.len >= 1 {
+				if parser.lexical_attributes.current_tag.name.len > 1 &&
+					parser.lexical_attributes.current_tag.name[0] == 47 && !blank_string(temp_string) {
 					parser.tags << &Tag{
 						name: 'text'
 						content: temp_string
 					}
 				} else {
-					parser.lexycal_attributes.current_tag.content = temp_string // verify later who has this content
+					parser.lexical_attributes.current_tag.content = temp_string // verify later who has this content
 				}
 			}
-			// parser.print_debug(parser.lexycal_attributes.current_tag.str())
-			parser.lexycal_attributes.lexeme_builder = ''
+			// parser.print_debug(parser.lexical_attributes.current_tag.str())
+			parser.lexical_attributes.lexeme_builder.go_back_to(0)
 			parser.generate_tag()
-			parser.lexycal_attributes.open_tag = true
+			parser.lexical_attributes.open_tag = true
 		} else {
-			parser.lexycal_attributes.write_lexeme(word)
+			parser.lexical_attributes.lexeme_builder.write_b(chr)
 		}
 	}
 }

-pub fn (mut parser Parser) parse_html(data string, is_file bool) {
-	if !parser.initialized {
-		parser.initialize_all()
-	}
-	mut lines := []string{}
-	if is_file {
-		file_lines := os.read_lines(data) or {
-			eprintln('failed to read the file $data')
-			return
-		}
-		lines = file_lines
-	} else {
-		lines = data.split_into_lines()
-	}
+// parse_html parses the given HTML string
+pub fn (mut parser Parser) parse_html(data string) {
+	parser.init()
+	mut lines := data.split_into_lines()
 	for line in lines {
-		parser.lexycal_attributes.line_count++
+		parser.lexical_attributes.line_count++
 		parser.split_parse(line)
 	}
 	parser.generate_tag()
 	parser.dom.debug_file = parser.debug_file
-	parser.dom.construct(parser.tags) // println(parser.close_tags.keys())
+	parser.dom.construct(parser.tags)
 }

+// finalize finishes the parsing stage .
+[inline]
 pub fn (mut parser Parser) finalize() {
 	parser.generate_tag()
 }

-pub fn (parser Parser) get_tags() []&Tag {
-	return parser.tags
-}
-
+// get_dom returns the parser's current DOM representation.
 pub fn (mut parser Parser) get_dom() DocumentObjectModel {
 	if !parser.dom.constructed {
 		parser.generate_tag()
@@ -276,10 +257,3 @@ pub fn (mut parser Parser) get_dom() DocumentObjectModel {
 	}
 	return parser.dom
 }
-
-/*pub fn (mut parser Parser) get_xpath() XPath {
-	dom := parser.get_dom()
-	return XPath{
-		dom: dom
-	}
-}*/
--- a/vlib/net/html/parser_test.v
+++ b/vlib/net/html/parser_test.v
@@ -1,10 +1,10 @@
 module html

-//import net.http
+import strings

 fn test_split_parse() {
 	mut parser := Parser{}
-	parser.initialize_all()
+	parser.init()
 	parser.split_parse('<!doctype htm')
 	parser.split_parse('l public')
 	parser.split_parse('><html><he')
@@ -16,37 +16,26 @@ fn test_split_parse() {
 	parser.split_parse('Nice Test!</h3>')
 	parser.split_parse('</bo\n\n\ndy></html>')
 	parser.finalize()
-	assert parser.get_tags().len == 11
-	assert parser.get_tags()[3].get_content() == ' Hum... A Tit\nle'
+	assert parser.tags.len == 11
+	assert parser.tags[3].content == ' Hum... A Tit\nle'
 }

 fn test_giant_string() {
-	mut temp_html := '<!doctype html><html><head><title>Giant String</title></head><body>'
-	for counter := 0; counter < 2000; counter++ {
-		temp_html += "<div id='name_$counter' class='several-$counter'>Look at $counter</div>"
-	}
-	temp_html += '</body></html>'
+	mut temp_html := strings.new_builder(200)
 	mut parser := Parser{}
-	parser.parse_html(temp_html, false)
-	assert parser.get_tags().len == 4009
+	temp_html.write('<!doctype html><html><head><title>Giant String</title></head><body>')
+	for counter := 0; counter < 2000; counter++ {
+		temp_html.write("<div id='name_$counter' class='several-$counter'>Look at $counter</div>")
+	}
+	temp_html.write('</body></html>')
+	parser.parse_html(temp_html.str())
+	assert parser.tags.len == 4009
 }

 fn test_script_tag() {
-	temp_html := "<html><body><script>\nvar googletag = googletag || {};\n
-	googletag.cmd = googletag.cmd || [];if(3 > 5) {console.log('Birl');}\n</script></body></html>"
 	mut parser := Parser{}
-	parser.parse_html(temp_html, false)
-	assert parser.get_tags()[2].get_content().len == 101
+	script_content := "\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log(\'Birl\');}\n"
+	temp_html := '<html><body><script>$script_content</script></body></html>'
+	parser.parse_html(temp_html)
+	assert parser.tags[2].content.len == script_content.replace('\n', '').len
 }
-
-/*fn test_download_source() {
-	println('Fetching github data in pastebin')
-	resp := http.get('https://pastebin.com/raw/5snUQgqN') or {
-		println('failed to fetch data from the server')
-		return
-	}
-	println('Finalized fetching, start parsing')
-	mut parser := Parser{}
-	parser.parse_html(resp.text, false)
-	assert parser.get_tags().len == 2244
-}*/
--- a/vlib/net/html/tag.v
+++ b/vlib/net/html/tag.v
@@ -1,20 +1,22 @@
 module html

+import strings
+
 enum CloseTagType {
 	in_name
 	new_tag
 }

+// Tag holds the information of an HTML tag.
 [ref_only]
 pub struct Tag {
 pub mut:
 	name               string
 	content            string
 	children           []&Tag
-mut:
 	attributes         map[string]string // attributes will be like map[name]value
 	last_attribute     string
-	parent             &Tag = C.NULL
+	parent             &Tag = 0
 	position_in_parent int
 	closed             bool
 	close_type         CloseTagType = .in_name
@@ -26,62 +28,45 @@ fn (mut tag Tag) add_parent(t &Tag, position int) {
 }

 fn (mut tag Tag) add_child(t &Tag) int {
-	mut children := tag.children
-	children << t
-	tag.children = children
+	tag.children << t
 	return tag.children.len
 }

-pub fn (tag Tag) get_children() []&Tag {
-	return tag.children
-}
-
-pub fn (tag Tag) get_parent() &Tag {
-	return tag.parent
-}
-
-pub fn (tag Tag) get_name() string {
-	return tag.name
-}
-
-pub fn (tag Tag) get_content() string {
-	return tag.content
-}
-
-pub fn (tag Tag) get_attributes() map[string]string {
-	return tag.attributes
-}
-
+// text returns the text contents of the tag.
 pub fn (tag Tag) text() string {
-	if tag.name.len >= 2 && tag.name[0..2] == 'br' {
+	if tag.name.len >= 2 && tag.name[..2] == 'br' {
 		return '\n'
 	}
-	mut to_return := tag.content.replace('\n', '')
-	for index := 0; index < tag.children.len; index++ {
-		to_return += tag.children[index].text()
+	mut text_str := strings.new_builder(200)
+	text_str.write(tag.content.replace('\n', ''))
+	for child in tag.children {
+		text_str.write(child.text())
 	}
-	return to_return
+	return text_str.str()
 }

 pub fn (tag &Tag) str() string {
-	mut to_return := '<$tag.name'
-	for key in tag.attributes.keys() {
-		to_return += ' $key'
-		value := tag.attributes[key]
+	mut html_str := strings.new_builder(200)
+	html_str.write('<$tag.name')
+	for key, value in tag.attributes {
+		html_str.write(' $key')
 		if value.len > 0 {
-			to_return += '=' + '"${tag.attributes[key]}"'
+			html_str.write('="$value"')
 		}
 	}
-	to_return += if tag.closed && tag.close_type == .in_name { '/>' } else { '>' }
-	to_return += '$tag.content'
+	html_str.write(if tag.closed && tag.close_type == .in_name {
+		'/>'
+	} else {
+		'>'
+	})
+	html_str.write(tag.content)
 	if tag.children.len > 0 {
-		// println('${tag.name} have ${tag.children.len} childrens')
-		for index := 0; index < tag.children.len; index++ {
-			to_return += tag.get_children()[index].str()
+		for child in tag.children {
+			html_str.write(child.str())
 		}
 	}
 	if !tag.closed || tag.close_type == .new_tag {
-		to_return += '</$tag.name>'
+		html_str.write('</$tag.name>')
 	}
-	return to_return
+	return html_str.str()
 }