根据前面学习的爬虫方面的知识,自己最中国国旅网页进行了爬虫,现将代码分享如下:
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"strconv"
"strings"
)
//http://www.szcits.cn/line/346/
func main() {
list := PaseGuoLv()
if len(list) == 0 {
fmt.Println("获取旅游城市失败")
} else {
fmt.Println("获取旅游城市成功" + strconv.Itoa(len(list)))
}
}
type TravelModel struct {
title string //标题
sub_tiltle string //子标题
set_out_city string //出发城市
price int //价格
per_good int //满意度
look_count int //浏览次数
detail_url string //详情的ulr
dateString string //出发时间
price_type string //币种$,¥
}
func PaseGuoLv() []TravelModel {
list := make([]TravelModel, 1)
fmt.Println("开始爬虫中国国旅的数据...")
js, err := goquery.NewDocument("http://www.szcits.cn/line/346/")
if err != nil {
fmt.Println("NewDocument Error")
log.Fatal(err)
} else {
fmt.Println("NewDocument right")
js.Find(".big_box section").Each(func(i int, contentionSection *goquery.Selection) {
model := TravelModel{}
intro := contentionSection.Find(".intro .sub_title").Text()
city := contentionSection.Find(".city_title span").Text()
title := contentionSection.Find(".intro h3 a").Text()
price := contentionSection.Find(".right .price strong").Text()
price_type := contentionSection.Find(".right .price span").Text()
like_look := contentionSection.Find(".right .txt span").Text()
html_url, _ := contentionSection.Find(".right .buttom a").Attr("href")
like_Array := strings.Split(like_look, "%")
var like, look string
if len(like_Array) > 1 {
like = like_Array[0]
look = like_Array[1]
}
//date := contentionSection.Find("")
fmt.Println("---------------------")
fmt.Println("\n获取的旅游介绍是" + intro + "\n\t")
fmt.Println("出发的城市是:" + strconv.Itoa(i) + "\n\t")
fmt.Println("title is:", title)
fmt.Println("价钱是 is", price_type+price)
fmt.Println("喜欢的数量是", like+"--"+look)
fmt.Println("html的url是", "http://www.szcits.cn"+html_url)
fmt.Println("---------------------")
price_int, _ := strconv.Atoi(price)
good_int, _ := strconv.Atoi(like)
looK_int, _ := strconv.Atoi(look)
model.sub_tiltle = intro
model.set_out_city = city
model.title = title
model.price_type = price_type
model.price = price_int
model.per_good = good_int
model.look_count = looK_int
model.detail_url = "http://www.szcits.cn" + html_url
list = append(list, model)
})
}
return list
}
未完待续