From 4e256259055102cf5e7d80196f765c44855591db Mon Sep 17 00:00:00 2001 From: Xargin Date: Thu, 10 Dec 2015 19:53:24 +0800 Subject: [PATCH 1/3] 8.8 done --- ch8/ch8-08.md | 184 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 183 insertions(+), 1 deletion(-) diff --git a/ch8/ch8-08.md b/ch8/ch8-08.md index c72718e..b81cbf9 100644 --- a/ch8/ch8-08.md +++ b/ch8/ch8-08.md @@ -1,3 +1,185 @@ ## 8.8. 示例: 併髮的字典遍歷 -TODO +In this section, we’ll build a program that reports the disk usage of one or more directories specified on the command line, like the Unix du command. Most of its work is done by the walkDir function below, which enumerates the entries of the directory dir using the dirents helper function. + +```go +gopl.io/ch8/du1 +// walkDir recursively walks the file tree rooted at dir +// and sends the size of each found file on fileSizes. +func walkDir(dir string, fileSizes chan<- int64) { + for _, entry := range dirents(dir) { + if entry.IsDir() { + subdir := filepath.Join(dir, entry.Name()) + walkDir(subdir, fileSizes) + } else { + fileSizes <- entry.Size() + } + } +} + +// dirents returns the entries of directory dir. +func dirents(dir string) []os.FileInfo { + entries, err := ioutil.ReadDir(dir) + if err != nil { + fmt.Fprintf(os.Stderr, "du1: %v\n", err) + return nil + } + return entries +} +``` + +he ioutil.ReadDir function returns a slice of os.FileInfo—the same information that a call to os.Stat returns for a single file. For each subdirectory, walkDir recursively calls itself, and for each file, walkDir sends a message on the fileSizes channel. The message is the size of the file in bytes. + +The main function, shown below, uses two goroutines. The background goroutine calls walkDir for each directory specified on the command line and finally closes the fileSizes channel. The main goroutine computes the sum of the file sizes it receives from the channel and finally prints the total. + + +```go +package main + +import ( + "flag" + "fmt" + "io/ioutil" + "os" + "path/filepath" +) + +func main() { + // Determine the initial directories. + flag.Parse() + roots := flag.Args() + if len(roots) == 0 { + roots = []string{"."} + } + + // Traverse the file tree. + fileSizes := make(chan int64) + go func() { + for _, root := range roots { + walkDir(root, fileSizes) + } + close(fileSizes) + }() + + // Print the results. + var nfiles, nbytes int64 + for size := range fileSizes { + nfiles++ + nbytes += size + } + printDiskUsage(nfiles, nbytes) +} + +func printDiskUsage(nfiles, nbytes int64) { + fmt.Printf("%d files %.1f GB\n", nfiles, float64(nbytes)/1e9) +} + +``` +This program pauses for a long while before printing its result: +``` +$ go build gopl.io/ch8/du1 +$ ./du1 $HOME /usr /bin /etc +213201 files 62.7 GB +``` + +The program would be nicer if it kept us informed of its progress. However, simply moving the printDiskUsage call into the loop would cause it to print thousands of lines of output. +The variant of du below prints the totals periodically, but only if the -v flag is specified since not all users will want to see progress messages. The background goroutine that loops over roots remains unchanged. The main goroutine now uses a ticker to generate events every 500ms, and a select statement to wait for either a file size message, in which case it updates the totals, or a tick event, in which case it prints the current totals. If the -v flag is not specified, the tick channel remains nil, and its case in the select is effectively disabled. + +```go +gopl.io/ch8/du2 +var verbose = flag.Bool("v", false, "show verbose progress messages") + +func main() { + // ...start background goroutine... + + // Print the results periodically. + var tick <-chan time.Time + if *verbose { + tick = time.Tick(500 * time.Millisecond) + } + var nfiles, nbytes int64 +loop: + for { + select { + case size, ok := <-fileSizes: + if !ok { + break loop // fileSizes was closed + } + nfiles++ + nbytes += size + case <-tick: + printDiskUsage(nfiles, nbytes) + } + } + printDiskUsage(nfiles, nbytes) // final totals +} +``` +Since the program no longer uses a range loop, the first select case must explicitly test whether the fileSizes channel has been closed, using the two-result form of receive opera- tion. If the channel has been closed, the program breaks out of the loop. The labeled break statement breaks out of both the select and the for loop; an unlabeled break would break out of only the select, causing the loop to begin the next iteration. + +The program now gives us a leisurely stream of updates: + + +$ go build gopl.io/ch8/du2 +$ ./du2 -v $HOME /usr /bin /etc +28608 files 8.3 GB +54147 files 10.3 GB +93591 files 15.1 GB +127169 files 52.9 GB +175931 files 62.2 GB +213201 files 62.7 GB + +However, it still takes too long to finish. There’s no reason why all the calls to walkDir can’t be done concurrently, thereby exploiting parallelism in the disk system. The third version of du, below, creates a new goroutine for each call to walkDir. It uses a sync.WaitGroup (§8.5) to count the number of calls to walkDir that are still active, and a closer goroutine to close the fileSizes channel when the counter drops to zero. + + +```go +gopl.io/ch8/du3 +func main() { + // ...determine roots... + // Traverse each root of the file tree in parallel. + fileSizes := make(chan int64) + var n sync.WaitGroup + for _, root := range roots { + n.Add(1) + go walkDir(root, &n, fileSizes) + } + go func() { + n.Wait() + close(fileSizes) + }() + // ...select loop... +} + +func walkDir(dir string, n *sync.WaitGroup, fileSizes chan<- int64) { + defer n.Done() + for _, entry := range dirents(dir) { + if entry.IsDir() { + n.Add(1) + subdir := filepath.Join(dir, entry.Name()) + go walkDir(subdir, n, fileSizes) + } else { + fileSizes <- entry.Size() + } + } +} +``` + +Since this program creates many thousands of goroutines at its peak, we have to change dirents to use a counting semaphore to prevent it from opening too many files at once, just as we did for the web crawler in Section 8.6: + + +```go +// sema is a counting semaphore for limiting concurrency in dirents. +var sema = make(chan struct{}, 20) + +// dirents returns the entries of directory dir. +func dirents(dir string) []os.FileInfo { + sema <- struct{}{} // acquire token + defer func() { <-sema }() // release token + // ... + +``` + +This version runs several times faster than the previous one, though there is a lot of variability from system to system. + +Exercise 8.9: Write a version of du that computes and periodically displays separate totals for each of the root directories. + + From 195e5d450fdbc653906c9ada64f8f575405c64ad Mon Sep 17 00:00:00 2001 From: Xargin Date: Fri, 11 Dec 2015 13:03:04 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E6=BC=8F=E7=BF=BB=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ch8/ch8-06.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ch8/ch8-06.md b/ch8/ch8-06.md index 3fbdc91..d98a573 100644 --- a/ch8/ch8-06.md +++ b/ch8/ch8-06.md @@ -82,7 +82,7 @@ func crawl(url string) []string { } ``` -The second problem is that the program never terminates, even when it has discovered all the links reachable from the initial URLs. (Of course, you’re unlikely to notice this problem unless you choose the initial URLs carefully or implement the depth-limiting feature of Exercise 8.6.) For the program to terminate, we need to break out of the main loop when the worklist is empty and no crawl goroutines are active. +第二个问题是这个程序永远都不会终止,即使它已经爬到了所有初始链接衍生出的链接。(当然,除非你慎重地选择了合适的初始化URL或者已经实现了练习8.6中的深度限制,你应该还没有意识到这个问题)。为了使这个程序能够终止,我们需要在worklist为空或者没有crawl的goroutine在运行时退出主循环。 ```go From ee87a6dc8b2801119a36ae2aad0ef1fa2c04ce66 Mon Sep 17 00:00:00 2001 From: Xargin Date: Fri, 11 Dec 2015 14:55:53 +0800 Subject: [PATCH 3/3] Revert "8.8 done" This reverts commit 4e256259055102cf5e7d80196f765c44855591db. --- ch8/ch8-08.md | 184 +------------------------------------------------- 1 file changed, 1 insertion(+), 183 deletions(-) diff --git a/ch8/ch8-08.md b/ch8/ch8-08.md index b81cbf9..c72718e 100644 --- a/ch8/ch8-08.md +++ b/ch8/ch8-08.md @@ -1,185 +1,3 @@ ## 8.8. 示例: 併髮的字典遍歷 -In this section, we’ll build a program that reports the disk usage of one or more directories specified on the command line, like the Unix du command. Most of its work is done by the walkDir function below, which enumerates the entries of the directory dir using the dirents helper function. - -```go -gopl.io/ch8/du1 -// walkDir recursively walks the file tree rooted at dir -// and sends the size of each found file on fileSizes. -func walkDir(dir string, fileSizes chan<- int64) { - for _, entry := range dirents(dir) { - if entry.IsDir() { - subdir := filepath.Join(dir, entry.Name()) - walkDir(subdir, fileSizes) - } else { - fileSizes <- entry.Size() - } - } -} - -// dirents returns the entries of directory dir. -func dirents(dir string) []os.FileInfo { - entries, err := ioutil.ReadDir(dir) - if err != nil { - fmt.Fprintf(os.Stderr, "du1: %v\n", err) - return nil - } - return entries -} -``` - -he ioutil.ReadDir function returns a slice of os.FileInfo—the same information that a call to os.Stat returns for a single file. For each subdirectory, walkDir recursively calls itself, and for each file, walkDir sends a message on the fileSizes channel. The message is the size of the file in bytes. - -The main function, shown below, uses two goroutines. The background goroutine calls walkDir for each directory specified on the command line and finally closes the fileSizes channel. The main goroutine computes the sum of the file sizes it receives from the channel and finally prints the total. - - -```go -package main - -import ( - "flag" - "fmt" - "io/ioutil" - "os" - "path/filepath" -) - -func main() { - // Determine the initial directories. - flag.Parse() - roots := flag.Args() - if len(roots) == 0 { - roots = []string{"."} - } - - // Traverse the file tree. - fileSizes := make(chan int64) - go func() { - for _, root := range roots { - walkDir(root, fileSizes) - } - close(fileSizes) - }() - - // Print the results. - var nfiles, nbytes int64 - for size := range fileSizes { - nfiles++ - nbytes += size - } - printDiskUsage(nfiles, nbytes) -} - -func printDiskUsage(nfiles, nbytes int64) { - fmt.Printf("%d files %.1f GB\n", nfiles, float64(nbytes)/1e9) -} - -``` -This program pauses for a long while before printing its result: -``` -$ go build gopl.io/ch8/du1 -$ ./du1 $HOME /usr /bin /etc -213201 files 62.7 GB -``` - -The program would be nicer if it kept us informed of its progress. However, simply moving the printDiskUsage call into the loop would cause it to print thousands of lines of output. -The variant of du below prints the totals periodically, but only if the -v flag is specified since not all users will want to see progress messages. The background goroutine that loops over roots remains unchanged. The main goroutine now uses a ticker to generate events every 500ms, and a select statement to wait for either a file size message, in which case it updates the totals, or a tick event, in which case it prints the current totals. If the -v flag is not specified, the tick channel remains nil, and its case in the select is effectively disabled. - -```go -gopl.io/ch8/du2 -var verbose = flag.Bool("v", false, "show verbose progress messages") - -func main() { - // ...start background goroutine... - - // Print the results periodically. - var tick <-chan time.Time - if *verbose { - tick = time.Tick(500 * time.Millisecond) - } - var nfiles, nbytes int64 -loop: - for { - select { - case size, ok := <-fileSizes: - if !ok { - break loop // fileSizes was closed - } - nfiles++ - nbytes += size - case <-tick: - printDiskUsage(nfiles, nbytes) - } - } - printDiskUsage(nfiles, nbytes) // final totals -} -``` -Since the program no longer uses a range loop, the first select case must explicitly test whether the fileSizes channel has been closed, using the two-result form of receive opera- tion. If the channel has been closed, the program breaks out of the loop. The labeled break statement breaks out of both the select and the for loop; an unlabeled break would break out of only the select, causing the loop to begin the next iteration. - -The program now gives us a leisurely stream of updates: - - -$ go build gopl.io/ch8/du2 -$ ./du2 -v $HOME /usr /bin /etc -28608 files 8.3 GB -54147 files 10.3 GB -93591 files 15.1 GB -127169 files 52.9 GB -175931 files 62.2 GB -213201 files 62.7 GB - -However, it still takes too long to finish. There’s no reason why all the calls to walkDir can’t be done concurrently, thereby exploiting parallelism in the disk system. The third version of du, below, creates a new goroutine for each call to walkDir. It uses a sync.WaitGroup (§8.5) to count the number of calls to walkDir that are still active, and a closer goroutine to close the fileSizes channel when the counter drops to zero. - - -```go -gopl.io/ch8/du3 -func main() { - // ...determine roots... - // Traverse each root of the file tree in parallel. - fileSizes := make(chan int64) - var n sync.WaitGroup - for _, root := range roots { - n.Add(1) - go walkDir(root, &n, fileSizes) - } - go func() { - n.Wait() - close(fileSizes) - }() - // ...select loop... -} - -func walkDir(dir string, n *sync.WaitGroup, fileSizes chan<- int64) { - defer n.Done() - for _, entry := range dirents(dir) { - if entry.IsDir() { - n.Add(1) - subdir := filepath.Join(dir, entry.Name()) - go walkDir(subdir, n, fileSizes) - } else { - fileSizes <- entry.Size() - } - } -} -``` - -Since this program creates many thousands of goroutines at its peak, we have to change dirents to use a counting semaphore to prevent it from opening too many files at once, just as we did for the web crawler in Section 8.6: - - -```go -// sema is a counting semaphore for limiting concurrency in dirents. -var sema = make(chan struct{}, 20) - -// dirents returns the entries of directory dir. -func dirents(dir string) []os.FileInfo { - sema <- struct{}{} // acquire token - defer func() { <-sema }() // release token - // ... - -``` - -This version runs several times faster than the previous one, though there is a lot of variability from system to system. - -Exercise 8.9: Write a version of du that computes and periodically displays separate totals for each of the root directories. - - +TODO