mirror of
https://github.com/gopl-zh/gopl-zh.github.com.git
synced 2025-09-11 06:11:33 +00:00
good good study, day day up!
This commit is contained in:
64
vendor/gopl.io/ch8/crawl2/findlinks.go
generated
vendored
Normal file
64
vendor/gopl.io/ch8/crawl2/findlinks.go
generated
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
|
||||
// See page 241.
|
||||
|
||||
// Crawl2 crawls web links starting with the command-line arguments.
|
||||
//
|
||||
// This version uses a buffered channel as a counting semaphore
|
||||
// to limit the number of concurrent calls to links.Extract.
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"gopl.io/ch5/links"
|
||||
)
|
||||
|
||||
//!+sema
|
||||
// tokens is a counting semaphore used to
|
||||
// enforce a limit of 20 concurrent requests.
|
||||
var tokens = make(chan struct{}, 20)
|
||||
|
||||
func crawl(url string) []string {
|
||||
fmt.Println(url)
|
||||
tokens <- struct{}{} // acquire a token
|
||||
list, err := links.Extract(url)
|
||||
<-tokens // release the token
|
||||
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
//!-sema
|
||||
|
||||
//!+
|
||||
func main() {
|
||||
worklist := make(chan []string)
|
||||
var n int // number of pending sends to worklist
|
||||
|
||||
// Start with the command-line arguments.
|
||||
n++
|
||||
go func() { worklist <- os.Args[1:] }()
|
||||
|
||||
// Crawl the web concurrently.
|
||||
seen := make(map[string]bool)
|
||||
for ; n > 0; n-- {
|
||||
list := <-worklist
|
||||
for _, link := range list {
|
||||
if !seen[link] {
|
||||
seen[link] = true
|
||||
n++
|
||||
go func(link string) {
|
||||
worklist <- crawl(link)
|
||||
}(link)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//!-
|
Reference in New Issue
Block a user