2017-05-27 05:51:58 +00:00
|
|
|
|
## 8.8. 示例: 并发的目录遍历
|
2015-12-09 07:45:11 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
在本小节中,我们会创建一个程序来生成指定目录的硬盘使用情况报告,这个程序和Unix里的du工具比较相似。大多数工作用下面这个walkDir函数来完成,这个函数使用dirents函数来枚举一个目录下的所有入口。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2016-01-21 02:39:06 +00:00
|
|
|
|
<u><i>gopl.io/ch8/du1</i></u>
|
2015-12-11 07:02:03 +00:00
|
|
|
|
```go
|
|
|
|
|
// walkDir recursively walks the file tree rooted at dir
|
|
|
|
|
// and sends the size of each found file on fileSizes.
|
|
|
|
|
func walkDir(dir string, fileSizes chan<- int64) {
|
|
|
|
|
for _, entry := range dirents(dir) {
|
|
|
|
|
if entry.IsDir() {
|
|
|
|
|
subdir := filepath.Join(dir, entry.Name())
|
|
|
|
|
walkDir(subdir, fileSizes)
|
|
|
|
|
} else {
|
|
|
|
|
fileSizes <- entry.Size()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dirents returns the entries of directory dir.
|
|
|
|
|
func dirents(dir string) []os.FileInfo {
|
|
|
|
|
entries, err := ioutil.ReadDir(dir)
|
|
|
|
|
if err != nil {
|
|
|
|
|
fmt.Fprintf(os.Stderr, "du1: %v\n", err)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return entries
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
2017-08-24 14:29:40 +00:00
|
|
|
|
ioutil.ReadDir函数会返回一个os.FileInfo类型的slice,os.FileInfo类型也是os.Stat这个函数的返回值。对每一个子目录而言,walkDir会递归地调用其自身,同时也在递归里获取每一个文件的信息。walkDir函数会向fileSizes这个channel发送一条消息。这条消息包含了文件的字节大小。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
下面的主函数,用了两个goroutine。后台的goroutine调用walkDir来遍历命令行给出的每一个路径并最终关闭fileSizes这个channel。主goroutine会对其从channel中接收到的文件大小进行累加,并输出其和。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
```go
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"flag"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io/ioutil"
|
|
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
// Determine the initial directories.
|
|
|
|
|
flag.Parse()
|
|
|
|
|
roots := flag.Args()
|
|
|
|
|
if len(roots) == 0 {
|
|
|
|
|
roots = []string{"."}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Traverse the file tree.
|
|
|
|
|
fileSizes := make(chan int64)
|
|
|
|
|
go func() {
|
|
|
|
|
for _, root := range roots {
|
|
|
|
|
walkDir(root, fileSizes)
|
|
|
|
|
}
|
|
|
|
|
close(fileSizes)
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
// Print the results.
|
|
|
|
|
var nfiles, nbytes int64
|
|
|
|
|
for size := range fileSizes {
|
|
|
|
|
nfiles++
|
|
|
|
|
nbytes += size
|
|
|
|
|
}
|
|
|
|
|
printDiskUsage(nfiles, nbytes)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func printDiskUsage(nfiles, nbytes int64) {
|
2016-01-21 02:39:06 +00:00
|
|
|
|
fmt.Printf("%d files %.1f GB\n", nfiles, float64(nbytes)/1e9)
|
2015-12-11 07:02:03 +00:00
|
|
|
|
}
|
|
|
|
|
```
|
2016-01-02 13:17:21 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
这个程序会在打印其结果之前卡住很长时间。
|
2016-01-02 13:17:21 +00:00
|
|
|
|
|
2015-12-11 07:02:03 +00:00
|
|
|
|
```
|
|
|
|
|
$ go build gopl.io/ch8/du1
|
|
|
|
|
$ ./du1 $HOME /usr /bin /etc
|
|
|
|
|
213201 files 62.7 GB
|
|
|
|
|
```
|
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
如果在运行的时候能够让我们知道处理进度的话想必更好。但是,如果简单地把printDiskUsage函数调用移动到循环里会导致其打印出成百上千的输出。
|
2015-12-11 07:48:07 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
下面这个du的变种会间歇打印内容,不过只有在调用时提供了-v的flag才会显示程序进度信息。在roots目录上循环的后台goroutine在这里保持不变。主goroutine现在使用了计时器来每500ms生成事件,然后用select语句来等待文件大小的消息来更新总大小数据,或者一个计时器的事件来打印当前的总大小数据。如果-v的flag在运行时没有传入的话,tick这个channel会保持为nil,这样在select里的case也就相当于被禁用了。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2016-01-21 02:39:06 +00:00
|
|
|
|
<u><i>gopl.io/ch8/du2</i></u>
|
2015-12-11 07:02:03 +00:00
|
|
|
|
```go
|
|
|
|
|
var verbose = flag.Bool("v", false, "show verbose progress messages")
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
// ...start background goroutine...
|
|
|
|
|
|
|
|
|
|
// Print the results periodically.
|
|
|
|
|
var tick <-chan time.Time
|
|
|
|
|
if *verbose {
|
|
|
|
|
tick = time.Tick(500 * time.Millisecond)
|
|
|
|
|
}
|
|
|
|
|
var nfiles, nbytes int64
|
|
|
|
|
loop:
|
|
|
|
|
for {
|
|
|
|
|
select {
|
|
|
|
|
case size, ok := <-fileSizes:
|
|
|
|
|
if !ok {
|
|
|
|
|
break loop // fileSizes was closed
|
|
|
|
|
}
|
|
|
|
|
nfiles++
|
|
|
|
|
nbytes += size
|
|
|
|
|
case <-tick:
|
|
|
|
|
printDiskUsage(nfiles, nbytes)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
printDiskUsage(nfiles, nbytes) // final totals
|
|
|
|
|
}
|
|
|
|
|
```
|
2016-01-21 02:39:06 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
由于我们的程序不再使用range循环,第一个select的case必须显式地判断fileSizes的channel是不是已经被关闭了,这里可以用到channel接收的二值形式。如果channel已经被关闭了的话,程序会直接退出循环。这里的break语句用到了标签break,这样可以同时终结select和for两个循环;如果没有用标签就break的话只会退出内层的select循环,而外层的for循环会使之进入下一轮select循环。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
现在程序会悠闲地为我们打印更新流:
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2015-12-11 07:48:07 +00:00
|
|
|
|
```
|
2015-12-11 07:02:03 +00:00
|
|
|
|
$ go build gopl.io/ch8/du2
|
|
|
|
|
$ ./du2 -v $HOME /usr /bin /etc
|
|
|
|
|
28608 files 8.3 GB
|
|
|
|
|
54147 files 10.3 GB
|
|
|
|
|
93591 files 15.1 GB
|
|
|
|
|
127169 files 52.9 GB
|
|
|
|
|
175931 files 62.2 GB
|
|
|
|
|
213201 files 62.7 GB
|
2015-12-11 07:48:07 +00:00
|
|
|
|
```
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
然而这个程序还是会花上很长时间才会结束。无法对walkDir做并行化处理没什么别的原因,无非是因为磁盘系统并行限制。下面这个第三个版本的du,会对每一个walkDir的调用创建一个新的goroutine。它使用sync.WaitGroup (§8.5)来对仍旧活跃的walkDir调用进行计数,另一个goroutine会在计数器减为零的时候将fileSizes这个channel关闭。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2016-01-21 02:39:06 +00:00
|
|
|
|
<u><i>gopl.io/ch8/du3</i></u>
|
2015-12-11 07:02:03 +00:00
|
|
|
|
```go
|
|
|
|
|
func main() {
|
2016-01-21 02:39:06 +00:00
|
|
|
|
// ...determine roots...
|
|
|
|
|
// Traverse each root of the file tree in parallel.
|
|
|
|
|
fileSizes := make(chan int64)
|
|
|
|
|
var n sync.WaitGroup
|
|
|
|
|
for _, root := range roots {
|
|
|
|
|
n.Add(1)
|
|
|
|
|
go walkDir(root, &n, fileSizes)
|
|
|
|
|
}
|
|
|
|
|
go func() {
|
|
|
|
|
n.Wait()
|
|
|
|
|
close(fileSizes)
|
|
|
|
|
}()
|
|
|
|
|
// ...select loop...
|
2015-12-11 07:02:03 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func walkDir(dir string, n *sync.WaitGroup, fileSizes chan<- int64) {
|
2016-01-21 02:39:06 +00:00
|
|
|
|
defer n.Done()
|
|
|
|
|
for _, entry := range dirents(dir) {
|
|
|
|
|
if entry.IsDir() {
|
|
|
|
|
n.Add(1)
|
|
|
|
|
subdir := filepath.Join(dir, entry.Name())
|
|
|
|
|
go walkDir(subdir, n, fileSizes)
|
|
|
|
|
} else {
|
|
|
|
|
fileSizes <- entry.Size()
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-12-11 07:02:03 +00:00
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
由于这个程序在高峰期会创建成百上千的goroutine,我们需要修改dirents函数,用计数信号量来阻止他同时打开太多的文件,就像我们在8.7节中的并发爬虫一样:
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
```go
|
|
|
|
|
// sema is a counting semaphore for limiting concurrency in dirents.
|
|
|
|
|
var sema = make(chan struct{}, 20)
|
|
|
|
|
|
|
|
|
|
// dirents returns the entries of directory dir.
|
|
|
|
|
func dirents(dir string) []os.FileInfo {
|
2016-01-21 02:39:06 +00:00
|
|
|
|
sema <- struct{}{} // acquire token
|
|
|
|
|
defer func() { <-sema }() // release token
|
|
|
|
|
// ...
|
2015-12-11 07:02:03 +00:00
|
|
|
|
```
|
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
这个版本比之前那个快了好几倍,尽管其具体效率还是和你的运行环境,机器配置相关。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2016-02-15 03:06:34 +00:00
|
|
|
|
**练习 8.9:** 编写一个du工具,每隔一段时间将root目录下的目录大小计算并显示出来。
|