一.贴吧
1.1 普通版
package main
import (
"fmt"
"net/http"
"os"
"strconv"
)
func main() {
var start, end int
fmt.Printf("请输入起始页(>=1):")
fmt.Scan(&start)
fmt.Printf("请输入终止页(>=起始页):")
fmt.Scan(&end)
Dowork1(start, end)
}
func Dowork1(start, end int) {
fmt.Printf("正在爬取%d到%d的页面\n", start, end)
for i := start; i <= end; i++ {
url := "https://tieba.baidu.com/f?kw=galgame&ie=utf-8&pn=" + strconv.Itoa((i-1)*50)
fmt.Println(url)
resp, err := HttpGet1(url)
if err != nil {
fmt.Println("spide err=", err)
continue
}
fileName := strconv.Itoa(i) + ".html"
file, e := os.Create(fileName)
if e != nil{
fmt.Println("os creat err", e)
continue
}
file.WriteString(resp)
file.Close()
}
}
func HttpGet1(url string) (resp string, err error) {
response, e := http.Get(url)
if e != nil {
err = e
return
}
defer response.Body.Close()
bytes := make([]byte, 4096)
for {
n, _ := response.Body.Read(bytes)
if n == 0 {
//fmt.Println("response Err=", err2)
break
}
resp += string(bytes[:n])
}
return
}
1.2 并发版
package main
import (
"fmt"
"net/http"
"os"
"strconv"
)
func main() {
var start, end int
fmt.Printf("请输入起始页(>=1):")
fmt.Scan(&start)
fmt.Printf("请输入终止页(>=起始页):")
fmt.Scan(&end)
Dowork(start, end)
}
func Dowork(start, end int) {
fmt.Printf("正在爬取%d到%d的页面\n", start, end)
page := make(chan int)
for i := start; i <= end; i++ {
go SpideTieba(i, page)
}
for i := start; i <= end; i++ {
fmt.Printf("第%d个页面爬取完成\n", <-page)
}
}
func SpideTieba(i int, page chan<- int) {
url := "https://tieba.baidu.com/f?kw=galgame&ie=utf-8&pn=" + strconv.Itoa((i-1)*50)
fmt.Println(url)
resp, err := HttpGet(url)
if err != nil {
fmt.Printf("正在爬第%d页:%s\n",i, err)
return
}
fileName := strconv.Itoa(i) + ".html"
file, e := os.Create(fileName)
if e != nil {
fmt.Println("os creat err", e)
return
}
file.WriteString(resp)
file.Close()
page <- i
}
func HttpGet(url string) (resp string, err error) {
response, e := http.Get(url)
if e != nil {
err = e
return
}
defer response.Body.Close()
bytes := make([]byte, 4096)
for {
n, _ := response.Body.Read(bytes)
if n == 0 {
//fmt.Println("response Err=", err2)
break
}
resp += string(bytes[:n])
}
return
}