1
0
mirror of https://github.com/chai2010/advanced-go-programming-book.git synced 2025-05-24 12:32:21 +00:00

add pub demo

This commit is contained in:
Xargin 2018-09-01 16:39:44 +08:00
parent cf95d2b714
commit d3d008ee1c

View File

@ -154,4 +154,77 @@ for {
#### 结合 colly 的消息生产 #### 结合 colly 的消息生产
```go
package main
import (
"fmt"
"net/url"
"github.com/gocolly/colly"
)
var domain2Collector = map[string]*colly.Collector{}
var nc *nats.Conn
var maxDepth = 10
var natsURL = "nats://localhost:4222"
func factory(urlStr string) *colly.Collector {
u, _ := url.Parse(urlStr)
return domain2Collector[u.Host]
}
func initV2exCollector() *colly.Collector {
c := colly.NewCollector(
colly.AllowedDomains("www.v2ex.com"),
colly.MaxDepth(maxDepth),
)
c.OnResponse(func(resp *colly.Response) {
// 做一些爬完之后的善后工作
// 比如页面已爬完的确认存进 MySQL
})
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
// 基本的反爬虫策略
time.Sleep(time.Second * 2)
c.Visit(e.Request.AbsoluteURL(link))
})
return c
}
func initV2fxCollector() *colly.Collector {
c := colly.NewCollector(
colly.AllowedDomains("www.v2fx.com"),
colly.MaxDepth(maxDepth),
)
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
})
return c
}
func init() {
domain2Collector["www.v2ex.com"] = initV2exCollector()
domain2Collector["www.v2fx.com"] = initV2fxCollector()
var err error
nc, err = nats.Connect(natsURL)
if err != nil {
// log fatal
os.Exit(1)
}
}
func main() {
urls := []string{"https://www.v2ex.com", "https://www.v2fx.com"}
for _, url := range urls {
instance := factory(url)
instance.Visit(url)
}
}
```
#### 结合 colly 的消息消费 #### 结合 colly 的消息消费