mirror of
https://github.com/gopl-zh/gopl-zh.github.com.git
synced 2024-11-05 14:03:45 +00:00
73 lines
1.4 KiB
Go
73 lines
1.4 KiB
Go
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
|
|
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
|
|
|
// See page 240.
|
|
|
|
// Crawl1 crawls web links starting with the command-line arguments.
|
|
//
|
|
// This version quickly exhausts available file descriptors
|
|
// due to excessive concurrent calls to links.Extract.
|
|
//
|
|
// Also, it never terminates because the worklist is never closed.
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
|
|
"gopl.io/ch5/links"
|
|
)
|
|
|
|
//!+crawl
|
|
func crawl(url string) []string {
|
|
fmt.Println(url)
|
|
list, err := links.Extract(url)
|
|
if err != nil {
|
|
log.Print(err)
|
|
}
|
|
return list
|
|
}
|
|
|
|
//!-crawl
|
|
|
|
//!+main
|
|
func main() {
|
|
worklist := make(chan []string)
|
|
|
|
// Start with the command-line arguments.
|
|
go func() { worklist <- os.Args[1:] }()
|
|
|
|
// Crawl the web concurrently.
|
|
seen := make(map[string]bool)
|
|
for list := range worklist {
|
|
for _, link := range list {
|
|
if !seen[link] {
|
|
seen[link] = true
|
|
go func(link string) {
|
|
worklist <- crawl(link)
|
|
}(link)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//!-main
|
|
|
|
/*
|
|
//!+output
|
|
$ go build gopl.io/ch8/crawl1
|
|
$ ./crawl1 http://gopl.io/
|
|
http://gopl.io/
|
|
https://golang.org/help/
|
|
|
|
https://golang.org/doc/
|
|
https://golang.org/blog/
|
|
...
|
|
2015/07/15 18:22:12 Get ...: dial tcp: lookup blog.golang.org: no such host
|
|
2015/07/15 18:22:12 Get ...: dial tcp 23.21.222.120:443: socket:
|
|
too many open files
|
|
...
|
|
//!-output
|
|
*/
|