mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
examples: add web_crawler and get_weather (#10084)
This commit is contained in:
22
examples/web_crawler/README.md
Normal file
22
examples/web_crawler/README.md
Normal file
@ -0,0 +1,22 @@
|
||||
# web_crawler
|
||||
web_crawler is a very simple web crawler.
|
||||
This web crawler fetches news from tuicool.com,
|
||||
(a chinese site similar to hacker-news.firebaseio.com).
|
||||
|
||||
# Compile and Run
|
||||
|
||||
Use this to generate an executable, and then launch the web crawler:
|
||||
```bash
|
||||
v web_crawler.v
|
||||
./web_crawler
|
||||
```
|
||||
|
||||
And this to compile and launch the web crawler directly:
|
||||
```bash
|
||||
v run web_crawler.v
|
||||
```
|
||||
|
||||
This project shows how to use http.fetch() to get http.Response,
|
||||
and then html.parse() to parse the returned html.
|
||||
|
||||
It's easy, isn't it?
|
31
examples/web_crawler/web_crawler.v
Normal file
31
examples/web_crawler/web_crawler.v
Normal file
@ -0,0 +1,31 @@
|
||||
import net.http
|
||||
import net.html
|
||||
|
||||
fn main() {
|
||||
/*
|
||||
user_agent = 'v.http'
|
||||
resp := http.get('https://tuicool.com') or {
|
||||
println('failed to fetch data from the server')
|
||||
return
|
||||
}
|
||||
*/
|
||||
// http.fetch() sends an HTTP request to the URL with the given method and configurations.
|
||||
config := http.FetchConfig{
|
||||
user_agent: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||
}
|
||||
resp := http.fetch('https://tuicool.com', config) or {
|
||||
println('failed to fetch data from the server')
|
||||
return
|
||||
}
|
||||
// html.parse() parses and returns the DOM from the given text.
|
||||
mut doc := html.parse(resp.text)
|
||||
// html.DocumentObjectModel.get_tag_by_attribute_value() retrieves all the tags in the document that has the given attribute name and value.
|
||||
tags := doc.get_tag_by_attribute_value('class', 'list_article_item')
|
||||
for tag in tags {
|
||||
href := tag.children[0].attributes['href'] or { panic('key not found') }
|
||||
title := tag.children[0].attributes['title'] or { panic('key not found') }
|
||||
println('href: $href')
|
||||
println('title: $title')
|
||||
println('')
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user