2015-12-18 02:53:03 +00:00
|
|
|
|
## 8.8. 示例: 並發的字典遍曆
|
2015-12-09 07:45:11 +00:00
|
|
|
|
|
2015-12-18 02:53:03 +00:00
|
|
|
|
在本小節中,我們會創建一個程序來生成指定目録的硬盤使用情況報告,這個程序和Unix裡的du工具比較相似。大多數工作用下面這個walkDir函數來完成,這個函數使用dirents函數來枚舉一個目録下的所有入口。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
```go
|
|
|
|
|
gopl.io/ch8/du1
|
|
|
|
|
// walkDir recursively walks the file tree rooted at dir
|
|
|
|
|
// and sends the size of each found file on fileSizes.
|
|
|
|
|
func walkDir(dir string, fileSizes chan<- int64) {
|
|
|
|
|
for _, entry := range dirents(dir) {
|
|
|
|
|
if entry.IsDir() {
|
|
|
|
|
subdir := filepath.Join(dir, entry.Name())
|
|
|
|
|
walkDir(subdir, fileSizes)
|
|
|
|
|
} else {
|
|
|
|
|
fileSizes <- entry.Size()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dirents returns the entries of directory dir.
|
|
|
|
|
func dirents(dir string) []os.FileInfo {
|
|
|
|
|
entries, err := ioutil.ReadDir(dir)
|
|
|
|
|
if err != nil {
|
|
|
|
|
fmt.Fprintf(os.Stderr, "du1: %v\n", err)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return entries
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
2015-12-18 02:53:03 +00:00
|
|
|
|
ioutil.ReadDir函數會返迴一個os.FileInfo類型的slice,os.FileInfo類型也是os.Stat這個函數的返迴值。對每一個子目録而言,walkDir會遞歸地調用其自身,並且會對每一個文件也遞歸調用。walkDir函數會向fileSizes這個channel發送一條消息。這條消息包含了文件的字節大小。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2015-12-18 02:53:03 +00:00
|
|
|
|
下面的主函數,用了兩個goroutine。後檯的goroutine調用walkDir來遍曆命令行給齣的每一個路徑並最終關閉fileSizes這個channel。主goroutine會對其從channel中接收到的文件大小進行纍加,並輸齣其和。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```go
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"flag"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io/ioutil"
|
|
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
// Determine the initial directories.
|
|
|
|
|
flag.Parse()
|
|
|
|
|
roots := flag.Args()
|
|
|
|
|
if len(roots) == 0 {
|
|
|
|
|
roots = []string{"."}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Traverse the file tree.
|
|
|
|
|
fileSizes := make(chan int64)
|
|
|
|
|
go func() {
|
|
|
|
|
for _, root := range roots {
|
|
|
|
|
walkDir(root, fileSizes)
|
|
|
|
|
}
|
|
|
|
|
close(fileSizes)
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
// Print the results.
|
|
|
|
|
var nfiles, nbytes int64
|
|
|
|
|
for size := range fileSizes {
|
|
|
|
|
nfiles++
|
|
|
|
|
nbytes += size
|
|
|
|
|
}
|
|
|
|
|
printDiskUsage(nfiles, nbytes)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func printDiskUsage(nfiles, nbytes int64) {
|
|
|
|
|
fmt.Printf("%d files %.1f GB\n", nfiles, float64(nbytes)/1e9)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
```
|
2015-12-11 09:19:15 +00:00
|
|
|
|
這個程序會在打印其結果之前卡住很長時間。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
```
|
|
|
|
|
$ go build gopl.io/ch8/du1
|
|
|
|
|
$ ./du1 $HOME /usr /bin /etc
|
|
|
|
|
213201 files 62.7 GB
|
|
|
|
|
```
|
|
|
|
|
|
2015-12-11 09:19:15 +00:00
|
|
|
|
如果在運行的時候能夠讓我們知道處理進度的話想必更好。但是,如果簡單地把printDiskUsage函數調用移動到循環裡會導緻其打印齣成百上韆的輸齣。
|
2015-12-11 07:48:07 +00:00
|
|
|
|
|
2015-12-18 02:53:03 +00:00
|
|
|
|
下面這個du的變種會間歇打印內容,不過隻有在調用時提供了-v的flag纔會顯示程序進度信息。在roots目録上循環的後檯goroutine在這裡保持不變。主goroutine現在使用了計時器來每500ms生成事件,然後用select語句來等待文件大小的消息來更新總大小數據,或者一個計時器的事件來打印當前的總大小數據。如果-v的flag在運行時沒有傳入的話,tick這個channel會保持爲nil,這樣在select裡的case也就相當於被禁用了。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
```go
|
|
|
|
|
gopl.io/ch8/du2
|
|
|
|
|
var verbose = flag.Bool("v", false, "show verbose progress messages")
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
// ...start background goroutine...
|
|
|
|
|
|
|
|
|
|
// Print the results periodically.
|
|
|
|
|
var tick <-chan time.Time
|
|
|
|
|
if *verbose {
|
|
|
|
|
tick = time.Tick(500 * time.Millisecond)
|
|
|
|
|
}
|
|
|
|
|
var nfiles, nbytes int64
|
|
|
|
|
loop:
|
|
|
|
|
for {
|
|
|
|
|
select {
|
|
|
|
|
case size, ok := <-fileSizes:
|
|
|
|
|
if !ok {
|
|
|
|
|
break loop // fileSizes was closed
|
|
|
|
|
}
|
|
|
|
|
nfiles++
|
|
|
|
|
nbytes += size
|
|
|
|
|
case <-tick:
|
|
|
|
|
printDiskUsage(nfiles, nbytes)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
printDiskUsage(nfiles, nbytes) // final totals
|
|
|
|
|
}
|
|
|
|
|
```
|
2015-12-18 02:53:03 +00:00
|
|
|
|
由於我們的程序不再使用range循環,第一個select的case必鬚顯式地判斷fileSizes的channel是不是已經被關閉了,這裡可以用到channel接收的二值形式。如果channel已經被關閉了的話,程序會直接退齣循環。這裡的break語句用到了標簽break,這樣可以同時終結select和for兩個循環;如果沒有用標簽就break的話隻會退齣內層的select循環,而外層的for循環會使之進入下一輪select循環。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2015-12-11 09:19:15 +00:00
|
|
|
|
現在程序會悠閒地爲我們打印更新流:
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2015-12-11 07:48:07 +00:00
|
|
|
|
```
|
2015-12-11 07:02:03 +00:00
|
|
|
|
$ go build gopl.io/ch8/du2
|
|
|
|
|
$ ./du2 -v $HOME /usr /bin /etc
|
|
|
|
|
28608 files 8.3 GB
|
|
|
|
|
54147 files 10.3 GB
|
|
|
|
|
93591 files 15.1 GB
|
|
|
|
|
127169 files 52.9 GB
|
|
|
|
|
175931 files 62.2 GB
|
|
|
|
|
213201 files 62.7 GB
|
2015-12-11 07:48:07 +00:00
|
|
|
|
```
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2015-12-18 02:53:03 +00:00
|
|
|
|
然而這個程序還是會花上很長時間纔會結束。無法對walkDir做並行化處理沒什麽別的原因,無非是因爲磁盤繫統並行限製。下面這個第三個版本的du,會對每一個walkDir的調用創建一個新的goroutine。它使用sync.WaitGroup (§8.5)來對仍舊活躍的walkDir調用進行計數,另一個goroutine會在計數器減爲零的時候將fileSizes這個channel關閉。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
```go
|
|
|
|
|
gopl.io/ch8/du3
|
|
|
|
|
func main() {
|
|
|
|
|
// ...determine roots...
|
|
|
|
|
// Traverse each root of the file tree in parallel.
|
|
|
|
|
fileSizes := make(chan int64)
|
|
|
|
|
var n sync.WaitGroup
|
|
|
|
|
for _, root := range roots {
|
|
|
|
|
n.Add(1)
|
|
|
|
|
go walkDir(root, &n, fileSizes)
|
|
|
|
|
}
|
|
|
|
|
go func() {
|
|
|
|
|
n.Wait()
|
|
|
|
|
close(fileSizes)
|
|
|
|
|
}()
|
|
|
|
|
// ...select loop...
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func walkDir(dir string, n *sync.WaitGroup, fileSizes chan<- int64) {
|
|
|
|
|
defer n.Done()
|
|
|
|
|
for _, entry := range dirents(dir) {
|
|
|
|
|
if entry.IsDir() {
|
|
|
|
|
n.Add(1)
|
|
|
|
|
subdir := filepath.Join(dir, entry.Name())
|
|
|
|
|
go walkDir(subdir, n, fileSizes)
|
|
|
|
|
} else {
|
|
|
|
|
fileSizes <- entry.Size()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
2015-12-18 02:53:03 +00:00
|
|
|
|
由於這個程序在高峯期會創建成百上韆的goroutine,我們需要脩改dirents函數,用計數信號量來阻止他同時打開太多的文件,就像我們在8.7節中的並發爬蟲一樣:
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```go
|
|
|
|
|
// sema is a counting semaphore for limiting concurrency in dirents.
|
|
|
|
|
var sema = make(chan struct{}, 20)
|
|
|
|
|
|
|
|
|
|
// dirents returns the entries of directory dir.
|
|
|
|
|
func dirents(dir string) []os.FileInfo {
|
|
|
|
|
sema <- struct{}{} // acquire token
|
|
|
|
|
defer func() { <-sema }() // release token
|
|
|
|
|
// ...
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
2015-12-11 09:19:15 +00:00
|
|
|
|
這個版本比之前那個快了好幾倍,儘管其具體效率還是和你的運行環境,機器配置相關。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
2015-12-18 02:53:03 +00:00
|
|
|
|
練習8.9: 編寫一個du工具,每隔一段時間將root目録下的目録大小計算並顯示齣來。
|
2015-12-11 07:02:03 +00:00
|
|
|
|
|
|
|
|
|
|