gopl-zh.github.com/vendor/gopl.io/ch8/crawl1/findlinks.go

// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/

// See page 240.

// Crawl1 crawls web links starting with the command-line arguments.
//
// This version quickly exhausts available file descriptors
// due to excessive concurrent calls to links.Extract.
//
// Also, it never terminates because the worklist is never closed.
package main

import (
	"fmt"
	"log"
	"os"

	"gopl.io/ch5/links"
)

//!+crawl
func crawl(url string) []string {
	fmt.Println(url)
	list, err := links.Extract(url)
	if err != nil {
		log.Print(err)
	}
	return list
}

//!-crawl

//!+main
func main() {
	worklist := make(chan []string)

	// Start with the command-line arguments.
	go func() { worklist <- os.Args[1:] }()

	// Crawl the web concurrently.
	seen := make(map[string]bool)
	for list := range worklist {
		for _, link := range list {
			if !seen[link] {
				seen[link] = true
				go func(link string) {
					worklist <- crawl(link)
				}(link)
			}
		}
	}
}

//!-main

/*
//!+output
$ go build gopl.io/ch8/crawl1
$ ./crawl1 http://gopl.io/
http://gopl.io/
https://golang.org/help/

https://golang.org/doc/
https://golang.org/blog/
...
2015/07/15 18:22:12 Get ...: dial tcp: lookup blog.golang.org: no such host
2015/07/15 18:22:12 Get ...: dial tcp 23.21.222.120:443: socket:
                                                        too many open files
...
//!-output
*/
no msg 2015-12-09 07:57:17 +00:00			`// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.`
			`// License: https://creativecommons.org/licenses/by-nc-sa/4.0/`

			`// See page 240.`

			`// Crawl1 crawls web links starting with the command-line arguments.`
			`//`
			`// This version quickly exhausts available file descriptors`
			`// due to excessive concurrent calls to links.Extract.`
			`//`
			`// Also, it never terminates because the worklist is never closed.`
			`package main`

			`import (`
			`"fmt"`
			`"log"`
			`"os"`

			`"gopl.io/ch5/links"`
			`)`

			`//!+crawl`
			`func crawl(url string) []string {`
			`fmt.Println(url)`
			`list, err := links.Extract(url)`
			`if err != nil {`
			`log.Print(err)`
			`}`
			`return list`
			`}`

			`//!-crawl`

			`//!+main`
			`func main() {`
			`worklist := make(chan []string)`

			`// Start with the command-line arguments.`
			`go func() { worklist <- os.Args[1:] }()`

			`// Crawl the web concurrently.`
			`seen := make(map[string]bool)`
			`for list := range worklist {`
			`for _, link := range list {`
			`if !seen[link] {`
			`seen[link] = true`
			`go func(link string) {`
			`worklist <- crawl(link)`
			`}(link)`
			`}`
			`}`
			`}`
			`}`

			`//!-main`

			`/*`
			`//!+output`
			`$ go build gopl.io/ch8/crawl1`
			`$ ./crawl1 http://gopl.io/`
			`http://gopl.io/`
			`https://golang.org/help/`

			`https://golang.org/doc/`
			`https://golang.org/blog/`
			`...`
			`2015/07/15 18:22:12 Get ...: dial tcp: lookup blog.golang.org: no such host`
			`2015/07/15 18:22:12 Get ...: dial tcp 23.21.222.120:443: socket:`
			`too many open files`
			`...`
			`//!-output`
			`*/`