2022-08-04 06:59:33 +00:00
<!DOCTYPE HTML>
< html lang = "zh" class = "sidebar-visible no-js light" >
< head >
2024-08-13 00:15:53 +00:00
<!-- Book generated using https://github.com/wa - lang/wabook -->
2022-08-04 06:59:33 +00:00
< meta charset = "UTF-8" >
< title > 示例: 并发的Web爬虫 - Go语言圣经< / title >
<!-- Custom HTML head -->
< meta content = "text/html; charset=utf-8" http-equiv = "Content-Type" >
2024-08-08 07:47:03 +00:00
< meta name = "description" content = "" >
2022-08-04 06:59:33 +00:00
< meta name = "viewport" content = "width=device-width, initial-scale=1" >
< meta name = "theme-color" content = "#ffffff" / >
2024-08-08 07:47:03 +00:00
< link rel = "icon" href = "../favicon.svg" >
< link rel = "shortcut icon" href = "../favicon.png" >
2024-08-13 00:15:53 +00:00
< link rel = "stylesheet" href = "../static/wabook/css/variables.css" >
< link rel = "stylesheet" href = "../static/wabook/css/general.css" >
< link rel = "stylesheet" href = "../static/wabook/css/chrome.css" >
< link rel = "stylesheet" href = "../static/wabook/css/print.css" media = "print" >
2022-08-04 06:59:33 +00:00
<!-- Fonts -->
2024-08-13 00:15:53 +00:00
< link rel = "stylesheet" href = "../static/wabook/FontAwesome/css/font-awesome.css" >
< link rel = "stylesheet" href = "../static/wabook/fonts/fonts.css" >
2022-08-04 06:59:33 +00:00
<!-- Highlight.js Stylesheets -->
2024-08-13 00:15:53 +00:00
< link rel = "stylesheet" href = "../static/wabook/highlight.css" >
< link rel = "stylesheet" href = "../static/wabook/tomorrow-night.css" >
< link rel = "stylesheet" href = "../static/wabook/ayu-highlight.css" >
2022-08-04 06:59:33 +00:00
<!-- Custom theme stylesheets -->
< / head >
< body >
<!-- Provide site root to javascript -->
< script type = "text/javascript" >
var path_to_root = "../";
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
< / script >
<!-- Work around some values being stored in localStorage wrapped in quotes -->
< script type = "text/javascript" >
try {
2024-08-13 00:15:53 +00:00
var theme = localStorage.getItem('wabook-theme');
var sidebar = localStorage.getItem('wabook-sidebar');
2022-08-04 06:59:33 +00:00
if (theme.startsWith('"') & & theme.endsWith('"')) {
2024-08-13 00:15:53 +00:00
localStorage.setItem('wabook-theme', theme.slice(1, theme.length - 1));
2022-08-04 06:59:33 +00:00
}
if (sidebar.startsWith('"') & & sidebar.endsWith('"')) {
2024-08-13 00:15:53 +00:00
localStorage.setItem('wabook-sidebar', sidebar.slice(1, sidebar.length - 1));
2022-08-04 06:59:33 +00:00
}
} catch (e) { }
< / script >
<!-- Set the theme before any content is loaded, prevents flash -->
< script type = "text/javascript" >
var theme;
2024-08-13 00:15:53 +00:00
try { theme = localStorage.getItem('wabook-theme'); } catch(e) { }
2022-08-04 06:59:33 +00:00
if (theme === null || theme === undefined) { theme = default_theme; }
var html = document.querySelector('html');
html.classList.remove('no-js')
html.classList.remove('light')
html.classList.add(theme);
html.classList.add('js');
< / script >
<!-- Hide / unhide sidebar before it is displayed -->
< script type = "text/javascript" >
var html = document.querySelector('html');
var sidebar = 'hidden';
if (document.body.clientWidth >= 1080) {
2024-08-13 00:15:53 +00:00
try { sidebar = localStorage.getItem('wabook-sidebar'); } catch(e) { }
2022-08-04 06:59:33 +00:00
sidebar = sidebar || 'visible';
}
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
< / script >
< nav id = "sidebar" class = "sidebar" aria-label = "Table of contents" >
< div class = "sidebar-scrollbox" >
2024-08-08 07:47:03 +00:00
< ol class = "chapter" >
< li class = "chapter-item expanded " >
< a href = "../index.html" > Go语言圣经< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../preface-zh.html" > 译者序< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../preface.html" > 前言< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1.html" > < strong aria-hidden = "true" > 1.< / strong > 入门< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-01.html" > < strong aria-hidden = "true" > 1.1.< / strong > Hello, World< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-02.html" > < strong aria-hidden = "true" > 1.2.< / strong > 命令行参数< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-03.html" > < strong aria-hidden = "true" > 1.3.< / strong > 查找重复的行< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-04.html" > < strong aria-hidden = "true" > 1.4.< / strong > GIF动画< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-05.html" > < strong aria-hidden = "true" > 1.5.< / strong > 获取URL< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-06.html" > < strong aria-hidden = "true" > 1.6.< / strong > 并发获取多个URL< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-07.html" > < strong aria-hidden = "true" > 1.7.< / strong > Web服务< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch1/ch1-08.html" > < strong aria-hidden = "true" > 1.8.< / strong > 本章要点< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2.html" > < strong aria-hidden = "true" > 2.< / strong > 程序结构< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2-01.html" > < strong aria-hidden = "true" > 2.1.< / strong > 命名< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2-02.html" > < strong aria-hidden = "true" > 2.2.< / strong > 声明< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2-03.html" > < strong aria-hidden = "true" > 2.3.< / strong > 变量< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2-04.html" > < strong aria-hidden = "true" > 2.4.< / strong > 赋值< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2-05.html" > < strong aria-hidden = "true" > 2.5.< / strong > 类型< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2-06.html" > < strong aria-hidden = "true" > 2.6.< / strong > 包和文件< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch2/ch2-07.html" > < strong aria-hidden = "true" > 2.7.< / strong > 作用域< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch3/ch3.html" > < strong aria-hidden = "true" > 3.< / strong > 基础数据类型< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch3/ch3-01.html" > < strong aria-hidden = "true" > 3.1.< / strong > 整型< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch3/ch3-02.html" > < strong aria-hidden = "true" > 3.2.< / strong > 浮点数< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch3/ch3-03.html" > < strong aria-hidden = "true" > 3.3.< / strong > 复数< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch3/ch3-04.html" > < strong aria-hidden = "true" > 3.4.< / strong > 布尔型< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch3/ch3-05.html" > < strong aria-hidden = "true" > 3.5.< / strong > 字符串< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch3/ch3-06.html" > < strong aria-hidden = "true" > 3.6.< / strong > 常量< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch4/ch4.html" > < strong aria-hidden = "true" > 4.< / strong > 复合数据类型< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch4/ch4-01.html" > < strong aria-hidden = "true" > 4.1.< / strong > 数组< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch4/ch4-02.html" > < strong aria-hidden = "true" > 4.2.< / strong > Slice< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch4/ch4-03.html" > < strong aria-hidden = "true" > 4.3.< / strong > Map< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch4/ch4-04.html" > < strong aria-hidden = "true" > 4.4.< / strong > 结构体< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch4/ch4-05.html" > < strong aria-hidden = "true" > 4.5.< / strong > JSON< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch4/ch4-06.html" > < strong aria-hidden = "true" > 4.6.< / strong > 文本和HTML模板< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5.html" > < strong aria-hidden = "true" > 5.< / strong > 函数< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-01.html" > < strong aria-hidden = "true" > 5.1.< / strong > 函数声明< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-02.html" > < strong aria-hidden = "true" > 5.2.< / strong > 递归< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-03.html" > < strong aria-hidden = "true" > 5.3.< / strong > 多返回值< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-04.html" > < strong aria-hidden = "true" > 5.4.< / strong > 错误< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-05.html" > < strong aria-hidden = "true" > 5.5.< / strong > 函数值< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-06.html" > < strong aria-hidden = "true" > 5.6.< / strong > 匿名函数< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-07.html" > < strong aria-hidden = "true" > 5.7.< / strong > 可变参数< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-08.html" > < strong aria-hidden = "true" > 5.8.< / strong > Deferred函数< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-09.html" > < strong aria-hidden = "true" > 5.9.< / strong > Panic异常< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch5/ch5-10.html" > < strong aria-hidden = "true" > 5.10.< / strong > Recover捕获异常< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch6/ch6.html" > < strong aria-hidden = "true" > 6.< / strong > 方法< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch6/ch6-01.html" > < strong aria-hidden = "true" > 6.1.< / strong > 方法声明< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch6/ch6-02.html" > < strong aria-hidden = "true" > 6.2.< / strong > 基于指针对象的方法< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch6/ch6-03.html" > < strong aria-hidden = "true" > 6.3.< / strong > 通过嵌入结构体来扩展类型< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch6/ch6-04.html" > < strong aria-hidden = "true" > 6.4.< / strong > 方法值和方法表达式< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch6/ch6-05.html" > < strong aria-hidden = "true" > 6.5.< / strong > 示例: Bit数组< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch6/ch6-06.html" > < strong aria-hidden = "true" > 6.6.< / strong > 封装< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7.html" > < strong aria-hidden = "true" > 7.< / strong > 接口< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-01.html" > < strong aria-hidden = "true" > 7.1.< / strong > 接口是合约< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-02.html" > < strong aria-hidden = "true" > 7.2.< / strong > 接口类型< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-03.html" > < strong aria-hidden = "true" > 7.3.< / strong > 实现接口的条件< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-04.html" > < strong aria-hidden = "true" > 7.4.< / strong > flag.Value接口< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-05.html" > < strong aria-hidden = "true" > 7.5.< / strong > 接口值< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-06.html" > < strong aria-hidden = "true" > 7.6.< / strong > sort.Interface接口< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-07.html" > < strong aria-hidden = "true" > 7.7.< / strong > http.Handler接口< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-08.html" > < strong aria-hidden = "true" > 7.8.< / strong > error接口< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-09.html" > < strong aria-hidden = "true" > 7.9.< / strong > 示例: 表达式求值< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-10.html" > < strong aria-hidden = "true" > 7.10.< / strong > 类型断言< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-11.html" > < strong aria-hidden = "true" > 7.11.< / strong > 基于类型断言识别错误类型< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-12.html" > < strong aria-hidden = "true" > 7.12.< / strong > 通过类型断言查询接口< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-13.html" > < strong aria-hidden = "true" > 7.13.< / strong > 类型分支< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-14.html" > < strong aria-hidden = "true" > 7.14.< / strong > 示例: 基于标记的XML解码< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch7/ch7-15.html" > < strong aria-hidden = "true" > 7.15.< / strong > 补充几点< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8.html" > < strong aria-hidden = "true" > 8.< / strong > Goroutines和Channels< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-01.html" > < strong aria-hidden = "true" > 8.1.< / strong > Goroutines< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-02.html" > < strong aria-hidden = "true" > 8.2.< / strong > 示例: 并发的Clock服务< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-03.html" > < strong aria-hidden = "true" > 8.3.< / strong > 示例: 并发的Echo服务< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-04.html" > < strong aria-hidden = "true" > 8.4.< / strong > Channels< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-05.html" > < strong aria-hidden = "true" > 8.5.< / strong > 并发的循环< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-06.html" class = "active" > < strong aria-hidden = "true" > 8.6.< / strong > 示例: 并发的Web爬虫< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-07.html" > < strong aria-hidden = "true" > 8.7.< / strong > 基于select的多路复用< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-08.html" > < strong aria-hidden = "true" > 8.8.< / strong > 示例: 并发的目录遍历< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-09.html" > < strong aria-hidden = "true" > 8.9.< / strong > 并发的退出< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch8/ch8-10.html" > < strong aria-hidden = "true" > 8.10.< / strong > 示例: 聊天服务< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9.html" > < strong aria-hidden = "true" > 9.< / strong > 基于共享变量的并发< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-01.html" > < strong aria-hidden = "true" > 9.1.< / strong > 竞争条件< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-02.html" > < strong aria-hidden = "true" > 9.2.< / strong > sync.Mutex互斥锁< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-03.html" > < strong aria-hidden = "true" > 9.3.< / strong > sync.RWMutex读写锁< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-04.html" > < strong aria-hidden = "true" > 9.4.< / strong > 内存同步< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-05.html" > < strong aria-hidden = "true" > 9.5.< / strong > sync.Once惰性初始化< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-06.html" > < strong aria-hidden = "true" > 9.6.< / strong > 竞争条件检测< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-07.html" > < strong aria-hidden = "true" > 9.7.< / strong > 示例: 并发的非阻塞缓存< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch9/ch9-08.html" > < strong aria-hidden = "true" > 9.8.< / strong > Goroutines和线程< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10.html" > < strong aria-hidden = "true" > 10.< / strong > 包和工具< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10-01.html" > < strong aria-hidden = "true" > 10.1.< / strong > 包简介< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10-02.html" > < strong aria-hidden = "true" > 10.2.< / strong > 导入路径< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10-03.html" > < strong aria-hidden = "true" > 10.3.< / strong > 包声明< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10-04.html" > < strong aria-hidden = "true" > 10.4.< / strong > 导入声明< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10-05.html" > < strong aria-hidden = "true" > 10.5.< / strong > 包的匿名导入< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10-06.html" > < strong aria-hidden = "true" > 10.6.< / strong > 包和命名< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch10/ch10-07.html" > < strong aria-hidden = "true" > 10.7.< / strong > 工具< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch11/ch11.html" > < strong aria-hidden = "true" > 11.< / strong > 测试< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch11/ch11-01.html" > < strong aria-hidden = "true" > 11.1.< / strong > go test< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch11/ch11-02.html" > < strong aria-hidden = "true" > 11.2.< / strong > 测试函数< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch11/ch11-03.html" > < strong aria-hidden = "true" > 11.3.< / strong > 测试覆盖率< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch11/ch11-04.html" > < strong aria-hidden = "true" > 11.4.< / strong > 基准测试< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch11/ch11-05.html" > < strong aria-hidden = "true" > 11.5.< / strong > 剖析< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch11/ch11-06.html" > < strong aria-hidden = "true" > 11.6.< / strong > 示例函数< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12.html" > < strong aria-hidden = "true" > 12.< / strong > 反射< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-01.html" > < strong aria-hidden = "true" > 12.1.< / strong > 为何需要反射?< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-02.html" > < strong aria-hidden = "true" > 12.2.< / strong > reflect.Type和reflect.Value< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-03.html" > < strong aria-hidden = "true" > 12.3.< / strong > Display递归打印< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-04.html" > < strong aria-hidden = "true" > 12.4.< / strong > 示例: 编码S表达式< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-05.html" > < strong aria-hidden = "true" > 12.5.< / strong > 通过reflect.Value修改值< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-06.html" > < strong aria-hidden = "true" > 12.6.< / strong > 示例: 解码S表达式< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-07.html" > < strong aria-hidden = "true" > 12.7.< / strong > 获取结构体字段标签< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-08.html" > < strong aria-hidden = "true" > 12.8.< / strong > 显示一个类型的方法集< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch12/ch12-09.html" > < strong aria-hidden = "true" > 12.9.< / strong > 几点忠告< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../ch13/ch13.html" > < strong aria-hidden = "true" > 13.< / strong > 底层编程< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../ch13/ch13-01.html" > < strong aria-hidden = "true" > 13.1.< / strong > unsafe.Sizeof, Alignof 和 Offsetof< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch13/ch13-02.html" > < strong aria-hidden = "true" > 13.2.< / strong > unsafe.Pointer< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch13/ch13-03.html" > < strong aria-hidden = "true" > 13.3.< / strong > 示例: 深度相等判断< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch13/ch13-04.html" > < strong aria-hidden = "true" > 13.4.< / strong > 通过cgo调用C代码< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../ch13/ch13-05.html" > < strong aria-hidden = "true" > 13.5.< / strong > 几点忠告< / a >
< / li >
< / ol >
< li class = "chapter-item expanded " >
< a href = "../appendix/appendix.html" > < strong aria-hidden = "true" > 14.< / strong > 附录< / a >
< / li >
< ol class = "section" >
< li class = "chapter-item expanded " >
< a href = "../appendix/appendix-a-errata.html" > < strong aria-hidden = "true" > 14.1.< / strong > 附录A: 原文勘误< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../appendix/appendix-b-author.html" > < strong aria-hidden = "true" > 14.2.< / strong > 附录B: 作者译者< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../appendix/appendix-c-cpoyright.html" > < strong aria-hidden = "true" > 14.3.< / strong > 附录C: 译文授权< / a >
< / li >
< li class = "chapter-item expanded " >
< a href = "../appendix/appendix-d-translations.html" > < strong aria-hidden = "true" > 14.4.< / strong > 附录D: 其它语言< / a >
< / li >
< / ol >
< / ol >
< / div >
2022-08-04 06:59:33 +00:00
< div id = "sidebar-resize-handle" class = "sidebar-resize-handle" > < / div >
< / nav >
< div id = "page-wrapper" class = "page-wrapper" >
< div class = "page" >
< div id = "menu-bar-hover-placeholder" > < / div >
< div id = "menu-bar" class = "menu-bar sticky bordered" >
< div class = "left-buttons" >
< button id = "sidebar-toggle" class = "icon-button" type = "button" title = "Toggle Table of Contents" aria-label = "Toggle Table of Contents" aria-controls = "sidebar" >
< i class = "fa fa-bars" > < / i >
< / button >
< button id = "theme-toggle" class = "icon-button" type = "button" title = "Change theme" aria-label = "Change theme" aria-haspopup = "true" aria-expanded = "false" aria-controls = "theme-list" >
< i class = "fa fa-paint-brush" > < / i >
< / button >
< ul id = "theme-list" class = "theme-popup" aria-label = "Themes" role = "menu" >
< li role = "none" > < button role = "menuitem" class = "theme" id = "light" > Light (default)< / button > < / li >
< li role = "none" > < button role = "menuitem" class = "theme" id = "coal" > Coal< / button > < / li >
< li role = "none" > < button role = "menuitem" class = "theme" id = "navy" > Navy< / button > < / li >
< li role = "none" > < button role = "menuitem" class = "theme" id = "ayu" > Ayu< / button > < / li >
< / ul >
2024-08-08 07:47:03 +00:00
< / div >
2022-08-04 06:59:33 +00:00
2024-08-08 07:47:03 +00:00
< h1 class = "menu-title" > < a href = "../index.html" > Go语言圣经< / a > < / h1 >
2022-08-04 06:59:33 +00:00
< div class = "right-buttons" >
2024-08-08 07:47:03 +00:00
< a href = "https://github.com/gopl-zh/gopl-zh.github.com" title = "Git repository" aria-label = "Git repository" >
2022-08-04 06:59:33 +00:00
< i id = "git-repository-button" class = "fa fa-github" > < / i >
< / a >
2024-08-08 07:47:03 +00:00
< a href = "https://github.com/gopl-zh/gopl-zh.github.com/edit/master/ch8/ch8-06.md" title = "Suggest an edit" aria-label = "Suggest an edit" >
2022-08-04 06:59:33 +00:00
< i id = "git-edit-button" class = "fa fa-edit" > < / i >
< / a >
< / div >
< / div >
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
< script type = "text/javascript" >
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
< / script >
< div id = "content" class = "content" >
<!-- Page table of contents -->
< div class = "sidetoc" > < nav class = "pagetoc" > < / nav > < / div >
2024-08-08 07:47:03 +00:00
2022-08-04 06:59:33 +00:00
< main >
2024-08-13 00:15:53 +00:00
< ul dir = "auto" > < li > < em > 凹语言(Go实现, 面向WASM设计): < a href = "https://github.com/wa-lang/wa" > https://github.com/wa-lang/wa< / a > < / em > < / li > < li > < em > WaBook(Go语言实现的MD电子书构建工具): < a href = "https://github.com/wa-lang/wabook" > https://github.com/wa-lang/wabook< / a > < / em > < / li > < / ul > < hr >
2022-08-04 06:59:33 +00:00
2024-08-08 07:47:03 +00:00
< h2 > 8.6. 示例: 并发的Web爬虫< / h2 >
2022-08-04 06:59:33 +00:00
< p > 在5.6节中, 我们做了一个简单的web爬虫, 用bfs(广度优先)算法来抓取整个网站。在本节中, 我们会让这个爬虫并行化, 这样每一个彼此独立的抓取命令可以并行进行IO, 最大化利用网络资源。crawl函数和gopl.io/ch5/findlinks3中的是一样的。< / p >
2024-08-08 07:47:03 +00:00
< p > <!-- raw HTML omitted --> <!-- raw HTML omitted --> gopl.io/ch8/crawl1<!-- raw HTML omitted --> <!-- raw HTML omitted --> < / p >
< pre > < code class = "language-go" > func crawl(url string) []string {
fmt.Println(url)
list, err := links.Extract(url)
if err != nil {
log.Print(err)
}
return list
}
2022-08-04 06:59:33 +00:00
< / code > < / pre >
< p > 主函数和5.6节中的breadthFirst(广度优先)类似。像之前一样, 一个worklist是一个记录了需要处理的元素的队列, 每一个元素都是一个需要抓取的URL列表, 不过这一次我们用channel代替slice来做这个队列。每一个对crawl的调用都会在他们自己的goroutine中进行并且会把他们抓到的链接发送回worklist。< / p >
2024-08-08 07:47:03 +00:00
< pre > < code class = "language-go" > func main() {
worklist := make(chan []string)
// Start with the command-line arguments.
go func() { worklist < - os.Args[1:] }()
// Crawl the web concurrently.
seen := make(map[string]bool)
for list := range worklist {
for _, link := range list {
if !seen[link] {
seen[link] = true
go func(link string) {
worklist < - crawl(link)
}(link)
}
}
}
}
2022-08-04 06:59:33 +00:00
< / code > < / pre >
< p > 注意这里的crawl所在的goroutine会将link作为一个显式的参数传入, 来避免“循环变量快照”的问题( 在5.6.1中有讲解) 。另外注意这里将命令行参数传入worklist也是在一个另外的goroutine中进行的, 这是为了避免channel两端的main goroutine与crawler goroutine都尝试向对方发送内容, 却没有一端接收内容时发生死锁。当然, 这里我们也可以用buffered channel来解决问题, 这里不再赘述。< / p >
< p > 现在爬虫可以高并发地运行起来, 并且可以产生一大坨的URL了, 不过还是会有俩问题。一个问题是在运行一段时间后可能会出现在log的错误信息里的: < / p >
2024-08-08 07:47:03 +00:00
< pre > < code > $ go build gopl.io/ch8/crawl1
$ ./crawl1 http://gopl.io/
http://gopl.io/
https://golang.org/help/
https://golang.org/doc/
https://golang.org/blog/
...
2015/07/15 18:22:12 Get ...: dial tcp: lookup blog.golang.org: no such host
2015/07/15 18:22:12 Get ...: dial tcp 23.21.222.120:443: socket: too many open files
...
2022-08-04 06:59:33 +00:00
< / code > < / pre >
< p > 最初的错误信息是一个让人莫名的DNS查找失败, 即使这个域名是完全可靠的。而随后的错误信息揭示了原因: 这个程序一次性创建了太多网络连接, 超过了每一个进程的打开文件数限制, 既而导致了在调用net.Dial像DNS查找失败这样的问题。< / p >
< p > 这个程序实在是太他妈并行了。无穷无尽地并行化并不是什么好事情, 因为不管怎么说, 你的系统总是会有一些个限制因素, 比如CPU核心数会限制你的计算负载, 比如你的硬盘转轴和磁头数限制了你的本地磁盘IO操作频率, 比如你的网络带宽限制了你的下载速度上限, 或者是你的一个web服务的服务容量上限等等。为了解决这个问题, 我们可以限制并发程序所使用的资源来使之适应自己的运行环境。对于我们的例子来说, 最简单的方法就是限制对links.Extract在同一时间最多不会有超过n次调用, 这里的n一般小于文件描述符的上限值, 比如20。这和一个夜店里限制客人数目是一个道理, 只有当有客人离开时, 才会允许新的客人进入店内。< / p >
< p > 我们可以用一个有容量限制的buffered channel来控制并发, 这类似于操作系统里的计数信号量概念。从概念上讲, channel里的n个空槽代表n个可以处理内容的token( 通行证) , 从channel里接收一个值会释放其中的一个token, 并且生成一个新的空槽位。这样保证了在没有接收介入时最多有n个发送操作。( 这里可能我们拿channel里填充的槽来做token更直观一些, 不过还是这样吧。) 由于channel里的元素类型并不重要, 我们用一个零值的struct{}来作为其元素。< / p >
< p > 让我们重写crawl函数, 将对links.Extract的调用操作用获取、释放token的操作包裹起来, 来确保同一时间对其只有20个调用。信号量数量和其能操作的IO资源数量应保持接近。< / p >
2024-08-08 07:47:03 +00:00
< p > <!-- raw HTML omitted --> <!-- raw HTML omitted --> gopl.io/ch8/crawl2<!-- raw HTML omitted --> <!-- raw HTML omitted --> < / p >
< pre > < code class = "language-go" > // tokens is a counting semaphore used to
// enforce a limit of 20 concurrent requests.
var tokens = make(chan struct{}, 20)
func crawl(url string) []string {
fmt.Println(url)
tokens < - struct{}{} // acquire a token
list, err := links.Extract(url)
< -tokens // release the token
if err != nil {
log.Print(err)
}
return list
}
2022-08-04 06:59:33 +00:00
< / code > < / pre >
< p > 第二个问题是这个程序永远都不会终止, 即使它已经爬到了所有初始链接衍生出的链接。( 当然, 除非你慎重地选择了合适的初始化URL或者已经实现了练习8.6中的深度限制, 你应该还没有意识到这个问题。) 为了使这个程序能够终止, 我们需要在worklist为空或者没有crawl的goroutine在运行时退出主循环。< / p >
2024-08-08 07:47:03 +00:00
< pre > < code class = "language-go" > func main() {
worklist := make(chan []string)
var n int // number of pending sends to worklist
// Start with the command-line arguments.
n++
go func() { worklist < - os.Args[1:] }()
// Crawl the web concurrently.
seen := make(map[string]bool)
for ; n > 0; n-- {
list := < -worklist
for _, link := range list {
if !seen[link] {
seen[link] = true
n++
go func(link string) {
worklist < - crawl(link)
}(link)
}
}
}
}
2022-08-04 06:59:33 +00:00
< / code > < / pre >
< p > 这个版本中, 计数器n对worklist的发送操作数量进行了限制。每一次我们发现有元素需要被发送到worklist时, 我们都会对n进行++操作, 在向worklist中发送初始的命令行参数之前, 我们也进行过一次++操作。这里的操作++是在每启动一个crawler的goroutine之前。主循环会在n减为0时终止, 这时候说明没活可干了。< / p >
< p > 现在这个并发爬虫会比5.6节中的深度优先搜索版快上20倍, 而且不会出什么错, 并且在其完成任务时也会正确地终止。< / p >
< p > 下面的程序是避免过度并发的另一种思路。这个版本使用了原来的crawl函数, 但没有使用计数信号量, 取而代之用了20个常驻的crawler goroutine, 这样来保证最多20个HTTP请求在并发。< / p >
2024-08-08 07:47:03 +00:00
< pre > < code class = "language-go" > func main() {
worklist := make(chan []string) // lists of URLs, may have duplicates
unseenLinks := make(chan string) // de-duplicated URLs
// Add command-line arguments to worklist.
go func() { worklist < - os.Args[1:] }()
// Create 20 crawler goroutines to fetch each unseen link.
for i := 0; i < 20; i++ {
go func() {
for link := range unseenLinks {
foundLinks := crawl(link)
go func() { worklist < - foundLinks }()
}
}()
}
// The main goroutine de-duplicates worklist items
// and sends the unseen ones to the crawlers.
seen := make(map[string]bool)
for list := range worklist {
for _, link := range list {
if !seen[link] {
seen[link] = true
unseenLinks < - link
}
}
}
}
2022-08-04 06:59:33 +00:00
< / code > < / pre >
< p > 所有的爬虫goroutine现在都是被同一个channel - unseenLinks喂饱的了。主goroutine负责拆分它从worklist里拿到的元素, 然后把没有抓过的经由unseenLinks channel发送给一个爬虫的goroutine。< / p >
< p > seen这个map被限定在main goroutine中; 也就是说这个map只能在main goroutine中进行访问。类似于其它的信息隐藏方式, 这样的约束可以让我们从一定程度上保证程序的正确性。例如, 内部变量不能够在函数外部被访问到; 变量( §2.3.4)在没有发生变量逃逸(译注:局部变量被全局变量引用地址导致变量被分配在堆上)的情况下是无法在函数外部访问的;一个对象的封装字段无法被该对象的方法以外的方法访问到。在所有的情况下,信息隐藏都可以帮助我们约束我们的程序,使其不发生意料之外的情况。< / p >
< p > crawl函数爬到的链接在一个专有的goroutine中被发送到worklist中来避免死锁。为了节省篇幅, 这个例子的终止问题我们先不进行详细阐述了。< / p >
< p > < strong > 练习 8.6: < / strong > 为并发爬虫增加深度限制。也就是说, 如果用户设置了depth=3, 那么只有从首页跳转三次以内能够跳到的页面才能被抓取到。< / p >
< p > < strong > 练习 8.7: < / strong > 完成一个并发程序来创建一个线上网站的本地镜像, 把该站点的所有可达的页面都抓取到本地硬盘。为了省事, 我们这里可以只取出现在该域下的所有页面( 比如golang.org开头, 译注: 外链的应该就不算了。) 当然了, 出现在页面里的链接你也需要进行一些处理, 使其能够在你的镜像站点上进行跳转, 而不是指向原始的链接。< / p >
< p > < strong > 译注:< / strong >
拓展阅读 < a href = "http://marcio.io/2015/07/handling-1-million-requests-per-minute-with-golang/" > Handling 1 Million Requests per Minute with Go< / a > 。< / p >
2024-08-08 07:47:03 +00:00
< hr > < table > < tr > < td > < img width = "222px" src = "https://chai2010.cn/advanced-go-programming-book/css.png" > < / td > < td > < img width = "222px" src = "https://chai2010.cn/advanced-go-programming-book/cch.png" > < / td > < / tr > < / table >
< div id = "giscus-container" > < / div >
2022-08-04 06:59:33 +00:00
2024-08-08 07:47:03 +00:00
< footer class = "page-footer" >
< span > © 2015-2016 | < a href = "https://github.com/gopl-zh" > Go语言圣经中文版< / a > , 仅学习交流使用< / span >
< / footer >
2022-08-04 06:59:33 +00:00
< / main >
< nav class = "nav-wrapper" aria-label = "Page navigation" >
<!-- Mobile navigation buttons -->
2024-08-08 07:47:03 +00:00
< a rel = "prev" href = "../ch8/ch8-05.html" class = "mobile-nav-chapters previous" title = "Previous chapter" aria-label = "Previous chapter" aria-keyshortcuts = "Left" >
2022-08-04 06:59:33 +00:00
< i class = "fa fa-angle-left" > < / i >
< / a >
2024-08-08 07:47:03 +00:00
<!-- ../ch8/ch8 - 07.html -->
< a rel = "next" href = "../ch8/ch8-07.html" class = "mobile-nav-chapters next" title = "Next chapter" aria-label = "Next chapter" aria-keyshortcuts = "Right" >
2022-08-04 06:59:33 +00:00
< i class = "fa fa-angle-right" > < / i >
< / a >
< div style = "clear: both" > < / div >
< / nav >
< / div >
< / div >
< nav class = "nav-wide-wrapper" aria-label = "Page navigation" >
2024-08-08 07:47:03 +00:00
< a rel = "prev" href = "../ch8/ch8-05.html" class = "nav-chapters previous" title = "Previous chapter" aria-label = "Previous chapter" aria-keyshortcuts = "Left" >
2022-08-04 06:59:33 +00:00
< i class = "fa fa-angle-left" > < / i >
< / a >
2024-08-08 07:47:03 +00:00
< a rel = "next" href = "../ch8/ch8-07.html" class = "nav-chapters next" title = "Next chapter" aria-label = "Next chapter" aria-keyshortcuts = "Right" >
2022-08-04 06:59:33 +00:00
< i class = "fa fa-angle-right" > < / i >
< / a >
2024-08-08 07:47:03 +00:00
< / nav >
2022-08-04 06:59:33 +00:00
< / div >
2024-08-08 07:47:03 +00:00
< script type = "text/javascript" >
2022-08-04 06:59:33 +00:00
window.playground_copyable = true;
< / script >
2024-08-13 00:15:53 +00:00
< script src = "../static/wabook/mark.min.js" type = "text/javascript" charset = "utf-8" > < / script >
< script src = "../static/wabook/clipboard.min.js" type = "text/javascript" charset = "utf-8" > < / script >
< script src = "../static/wabook/highlight.js" type = "text/javascript" charset = "utf-8" > < / script >
< script src = "../static/wabook/book.js" type = "text/javascript" charset = "utf-8" > < / script >
2022-08-04 06:59:33 +00:00
< script type = "text/javascript" charset = "utf-8" >
2024-08-08 07:47:03 +00:00
var pagePath = "ch8/ch8-06.md"
2022-08-04 06:59:33 +00:00
< / script >
<!-- Custom JS scripts -->
2024-08-13 00:15:53 +00:00
< script src = "../static/wabook/giscus.js" type = "text/javascript" charset = "utf-8" > < / script >
2022-08-04 06:59:33 +00:00
2024-08-08 07:47:03 +00:00
2022-08-04 06:59:33 +00:00
< / body >
< / html >