go爬虫框架的Colly框架使用

package main
import ( "fmt" "github.com/gocolly/colly")
func main() { c := colly.NewCollector()
 c.OnHTML("a", func(e *colly.HTMLElement) { e.Request.Visit(e.Attr("href")) })
 c.OnRequest(func(r *colly.Request) { fmt.Println("Visiting", r.URL) })
 c.Visit("https://docs.dbsgw.com/")}

package main
import ( "fmt" "github.com/gocolly/colly" "strings" "time")
func main() {
 c := colly.NewCollector(colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"), colly.MaxDepth(1))
 c1 := c.Clone() c1.Async = true //限速 c1.Limit(&colly.LimitRule{ DomainRegexp: "", DomainGlob: "*.jianshu.com/p/*", Delay: 10 * time.Second, RandomDelay: 0, Parallelism: 1, }) // 首页提取链接 c.OnHTML("body > section > div.sidebar > div.widget.widget_ui_tags.wow.article-categories.fadeInUp > div", func(e *colly.HTMLElement) {
 e.ForEach("a", func(i int, element *colly.HTMLElement) { href := element.Attr("href") if strings.Contains(href, "http") { fmt.Println(href,">>>>>包含",i) c1.Request("GET",href,nil,nil,nil)
 }else { fmt.Println(element.Text,"<<<<<<不包含") } })
 })
 // 从链接里面 异步提取详情 c1.OnHTML("body > section > div.content-wrap > div > article.excerpt.excerpt-1.wow.fadeInUp > header > h2 > a", func(e *colly.HTMLElement) { fmt.Println(e.Text)
 })
 err := c.Visit("https://blog.dbsgw.com/") if err != nil { fmt.Println(err.Error()) } c1.Wait()}

package main
import ( "fmt" "github.com/gocolly/colly" "github.com/gocolly/colly/debug" "github.com/gocolly/colly/extensions" _ "github.com/gocolly/colly/extensions" "net/http")
func main() { url := "https://blog.dbsgw.com/admin/"
 c := colly.NewCollector(colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"), colly.MaxDepth(1), colly.Debugger(&debug.LogDebugger{})) c.OnHTML("#admindex_servinfo > ul", func(e *colly.HTMLElement) { fmt.Println(e.Text,"---------------") })
 //设置随机useragent extensions.RandomUserAgent(c) //设置登录cookie c.SetCookies(url, []*http.Cookie{ &http.Cookie{ Name: "PHPSESSID", Value: "3ur579rq6lindrkolomq24br17", Path: "/", Domain: "blog.dbsgw.com", Secure: true, HttpOnly: true, }, }) c.SetCookies(url, []*http.Cookie{ &http.Cookie{ Name: "EM_AUTHCOOKIE_95yUDyuGoTu7WYejbLXmH8m6O630lMsd", Value: "admin%7C%7Ca7e91e464d86db572e6e588ef9dd5815", Path: "/", Domain: "blog.dbsgw.com", Secure: true, HttpOnly: true, }, })

 c.OnRequest(func(r *colly.Request) { fmt.Println("爬取页面：", r.URL) })
 c.OnError(func(r *colly.Response, err error) { fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err) }) err := c.Visit(url) if err != nil { fmt.Println(err.Error()) }}