mirror of
https://github.com/gopl-zh/gopl-zh.github.com.git
synced 2024-11-14 02:13:51 +00:00
371 lines
37 KiB
HTML
371 lines
37 KiB
HTML
<!DOCTYPE HTML>
|
||
<html lang="zh" class="sidebar-visible no-js light">
|
||
<head>
|
||
<!-- Book generated using mdBook -->
|
||
<meta charset="UTF-8">
|
||
<title>示例: 并发的Web爬虫 - Go语言圣经</title>
|
||
|
||
|
||
<!-- Custom HTML head -->
|
||
|
||
|
||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||
<meta name="description" content="<The Go Programming Language>中文版">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
<meta name="theme-color" content="#ffffff" />
|
||
|
||
<link rel="icon" href="../favicon.svg">
|
||
<link rel="shortcut icon" href="../favicon.png">
|
||
<link rel="stylesheet" href="../css/variables.css">
|
||
<link rel="stylesheet" href="../css/general.css">
|
||
<link rel="stylesheet" href="../css/chrome.css">
|
||
<link rel="stylesheet" href="../css/print.css" media="print">
|
||
|
||
<!-- Fonts -->
|
||
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
|
||
<link rel="stylesheet" href="../fonts/fonts.css">
|
||
|
||
<!-- Highlight.js Stylesheets -->
|
||
<link rel="stylesheet" href="../highlight.css">
|
||
<link rel="stylesheet" href="../tomorrow-night.css">
|
||
<link rel="stylesheet" href="../ayu-highlight.css">
|
||
|
||
<!-- Custom theme stylesheets -->
|
||
<link rel="stylesheet" href="../style.css">
|
||
|
||
|
||
</head>
|
||
<body>
|
||
<!-- Provide site root to javascript -->
|
||
<script type="text/javascript">
|
||
var path_to_root = "../";
|
||
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
|
||
</script>
|
||
|
||
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
||
<script type="text/javascript">
|
||
try {
|
||
var theme = localStorage.getItem('mdbook-theme');
|
||
var sidebar = localStorage.getItem('mdbook-sidebar');
|
||
|
||
if (theme.startsWith('"') && theme.endsWith('"')) {
|
||
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
||
}
|
||
|
||
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
||
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
||
}
|
||
} catch (e) { }
|
||
</script>
|
||
|
||
<!-- Set the theme before any content is loaded, prevents flash -->
|
||
<script type="text/javascript">
|
||
var theme;
|
||
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
||
if (theme === null || theme === undefined) { theme = default_theme; }
|
||
var html = document.querySelector('html');
|
||
html.classList.remove('no-js')
|
||
html.classList.remove('light')
|
||
html.classList.add(theme);
|
||
html.classList.add('js');
|
||
</script>
|
||
|
||
<!-- Hide / unhide sidebar before it is displayed -->
|
||
<script type="text/javascript">
|
||
var html = document.querySelector('html');
|
||
var sidebar = 'hidden';
|
||
if (document.body.clientWidth >= 1080) {
|
||
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
||
sidebar = sidebar || 'visible';
|
||
}
|
||
html.classList.remove('sidebar-visible');
|
||
html.classList.add("sidebar-" + sidebar);
|
||
</script>
|
||
|
||
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
||
<div class="sidebar-scrollbox">
|
||
<ol class="chapter"><li class="chapter-item expanded affix "><a href="../index.html">Go语言圣经</a></li><li class="chapter-item expanded affix "><a href="../preface.html">前言</a></li><li class="chapter-item expanded "><a href="../ch1/ch1.html"><strong aria-hidden="true">1.</strong> 入门</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch1/ch1-01.html"><strong aria-hidden="true">1.1.</strong> Hello, World</a></li><li class="chapter-item expanded "><a href="../ch1/ch1-02.html"><strong aria-hidden="true">1.2.</strong> 命令行参数</a></li><li class="chapter-item expanded "><a href="../ch1/ch1-03.html"><strong aria-hidden="true">1.3.</strong> 查找重复的行</a></li><li class="chapter-item expanded "><a href="../ch1/ch1-04.html"><strong aria-hidden="true">1.4.</strong> GIF动画</a></li><li class="chapter-item expanded "><a href="../ch1/ch1-05.html"><strong aria-hidden="true">1.5.</strong> 获取URL</a></li><li class="chapter-item expanded "><a href="../ch1/ch1-06.html"><strong aria-hidden="true">1.6.</strong> 并发获取多个URL</a></li><li class="chapter-item expanded "><a href="../ch1/ch1-07.html"><strong aria-hidden="true">1.7.</strong> Web服务</a></li><li class="chapter-item expanded "><a href="../ch1/ch1-08.html"><strong aria-hidden="true">1.8.</strong> 本章要点</a></li></ol></li><li class="chapter-item expanded "><a href="../ch2/ch2.html"><strong aria-hidden="true">2.</strong> 程序结构</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch2/ch2-01.html"><strong aria-hidden="true">2.1.</strong> 命名</a></li><li class="chapter-item expanded "><a href="../ch2/ch2-02.html"><strong aria-hidden="true">2.2.</strong> 声明</a></li><li class="chapter-item expanded "><a href="../ch2/ch2-03.html"><strong aria-hidden="true">2.3.</strong> 变量</a></li><li class="chapter-item expanded "><a href="../ch2/ch2-04.html"><strong aria-hidden="true">2.4.</strong> 赋值</a></li><li class="chapter-item expanded "><a href="../ch2/ch2-05.html"><strong aria-hidden="true">2.5.</strong> 类型</a></li><li class="chapter-item expanded "><a href="../ch2/ch2-06.html"><strong aria-hidden="true">2.6.</strong> 包和文件</a></li><li class="chapter-item expanded "><a href="../ch2/ch2-07.html"><strong aria-hidden="true">2.7.</strong> 作用域</a></li></ol></li><li class="chapter-item expanded "><a href="../ch3/ch3.html"><strong aria-hidden="true">3.</strong> 基础数据类型</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch3/ch3-01.html"><strong aria-hidden="true">3.1.</strong> 整型</a></li><li class="chapter-item expanded "><a href="../ch3/ch3-02.html"><strong aria-hidden="true">3.2.</strong> 浮点数</a></li><li class="chapter-item expanded "><a href="../ch3/ch3-03.html"><strong aria-hidden="true">3.3.</strong> 复数</a></li><li class="chapter-item expanded "><a href="../ch3/ch3-04.html"><strong aria-hidden="true">3.4.</strong> 布尔型</a></li><li class="chapter-item expanded "><a href="../ch3/ch3-05.html"><strong aria-hidden="true">3.5.</strong> 字符串</a></li><li class="chapter-item expanded "><a href="../ch3/ch3-06.html"><strong aria-hidden="true">3.6.</strong> 常量</a></li></ol></li><li class="chapter-item expanded "><a href="../ch4/ch4.html"><strong aria-hidden="true">4.</strong> 复合数据类型</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch4/ch4-01.html"><strong aria-hidden="true">4.1.</strong> 数组</a></li><li class="chapter-item expanded "><a href="../ch4/ch4-02.html"><strong aria-hidden="true">4.2.</strong> Slice</a></li><li class="chapter-item expanded "><a href="../ch4/ch4-03.html"><strong aria-hidden="true">4.3.</strong> Map</a></li><li class="chapter-item expanded "><a href="../ch4/ch4-04.html"><strong aria-hidden="true">4.4.</strong> 结构体</a></li><li class="chapter-item expanded "><a href="../ch4/ch4-05.html"><strong aria-hidden="true">4.5.</strong> JSON</a></li><li class="chapter-item expanded "><a href="../ch4/ch4-06.html"><strong aria-hidden="true">4.6.</strong> 文本和HTML模板</a></li></ol></li><li class="chapter-item expanded "><a href="../ch5/ch5.html"><strong aria-hidden="true">5.</strong> 函数</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch5/ch5-01.html"><strong aria-hidden="true">5.1.</strong> 函数声明</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-02.html"><strong aria-hidden="true">5.2.</strong> 递归</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-03.html"><strong aria-hidden="true">5.3.</strong> 多返回值</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-04.html"><strong aria-hidden="true">5.4.</strong> 错误</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-05.html"><strong aria-hidden="true">5.5.</strong> 函数值</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-06.html"><strong aria-hidden="true">5.6.</strong> 匿名函数</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-07.html"><strong aria-hidden="true">5.7.</strong> 可变参数</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-08.html"><strong aria-hidden="true">5.8.</strong> Deferred函数</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-09.html"><strong aria-hidden="true">5.9.</strong> Panic异常</a></li><li class="chapter-item expanded "><a href="../ch5/ch5-10.html"><strong aria-hidden="true">5.10.</strong> Recover捕获异常</a></li></ol></li><li class="chapter-item expanded "><a href="../ch6/ch6.html"><strong aria-hidden="true">6.</strong> 方法</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch6/ch6-01.html"><strong aria-hidden="true">6.1.</strong> 方法声明</a></li><li class="chapter-item expanded "><a href="../ch6/ch6-02.html"><strong aria-hidden="true">6.2.</strong> 基于指针对象的方法</a></li><li class="chapter-item expanded "><a href="../ch6/ch6-03.html"><strong aria-hidden="true">6.3.</strong> 通过嵌入结构体来扩展类型</a></li><li class="chapter-item expanded "><a href="../ch6/ch6-04.html"><strong aria-hidden="true">6.4.</strong> 方法值和方法表达式</a></li><li class="chapter-item expanded "><a href="../ch6/ch6-05.html"><strong aria-hidden="true">6.5.</strong> 示例: Bit数组</a></li><li class="chapter-item expanded "><a href="../ch6/ch6-06.html"><strong aria-hidden="true">6.6.</strong> 封装</a></li></ol></li><li class="chapter-item expanded "><a href="../ch7/ch7.html"><strong aria-hidden="true">7.</strong> 接口</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch7/ch7-01.html"><strong aria-hidden="true">7.1.</strong> 接口是合约</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-02.html"><strong aria-hidden="true">7.2.</strong> 接口类型</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-03.html"><strong aria-hidden="true">7.3.</strong> 实现接口的条件</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-04.html"><strong aria-hidden="true">7.4.</strong> flag.Value接口</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-05.html"><strong aria-hidden="true">7.5.</strong> 接口值</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-06.html"><strong aria-hidden="true">7.6.</strong> sort.Interface接口</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-07.html"><strong aria-hidden="true">7.7.</strong> http.Handler接口</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-08.html"><strong aria-hidden="true">7.8.</strong> error接口</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-09.html"><strong aria-hidden="true">7.9.</strong> 示例: 表达式求值</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-10.html"><strong aria-hidden="true">7.10.</strong> 类型断言</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-11.html"><strong aria-hidden="true">7.11.</strong> 基于类型断言识别错误类型</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-12.html"><strong aria-hidden="true">7.12.</strong> 通过类型断言查询接口</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-13.html"><strong aria-hidden="true">7.13.</strong> 类型分支</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-14.html"><strong aria-hidden="true">7.14.</strong> 示例: 基于标记的XML解码</a></li><li class="chapter-item expanded "><a href="../ch7/ch7-15.html"><strong aria-hidden="true">7.15.</strong> 补充几点</a></li></ol></li><li class="chapter-item expanded "><a href="../ch8/ch8.html"><strong aria-hidden="true">8.</strong> Goroutines和Channels</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch8/ch8-01.html"><strong aria-hidden="true">8.1.</strong> Goroutines</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-02.html"><strong aria-hidden="true">8.2.</strong> 示例: 并发的Clock服务</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-03.html"><strong aria-hidden="true">8.3.</strong> 示例: 并发的Echo服务</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-04.html"><strong aria-hidden="true">8.4.</strong> Channels</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-05.html"><strong aria-hidden="true">8.5.</strong> 并发的循环</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-06.html" class="active"><strong aria-hidden="true">8.6.</strong> 示例: 并发的Web爬虫</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-07.html"><strong aria-hidden="true">8.7.</strong> 基于select的多路复用</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-09.html"><strong aria-hidden="true">8.8.</strong> 并发的退出</a></li><li class="chapter-item expanded "><a href="../ch8/ch8-10.html"><strong aria-hidden="true">8.9.</strong> 示例: 聊天服务</a></li></ol></li><li class="chapter-item expanded "><a href="../ch9/ch9.html"><strong aria-hidden="true">9.</strong> 基于共享变量的并发</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch9/ch9-01.html"><strong aria-hidden="true">9.1.</strong> 竞争条件</a></li><li class="chapter-item expanded "><a href="../ch9/ch9-02.html"><strong aria-hidden="true">9.2.</strong> sync.Mutex互斥锁</a></li><li class="chapter-item expanded "><a href="../ch9/ch9-03.html"><strong aria-hidden="true">9.3.</strong> sync.RWMutex读写锁</a></li><li class="chapter-item expanded "><a href="../ch9/ch9-04.html"><strong aria-hidden="true">9.4.</strong> 内存同步</a></li><li class="chapter-item expanded "><a href="../ch9/ch9-06.html"><strong aria-hidden="true">9.5.</strong> 竞争条件检测</a></li><li class="chapter-item expanded "><a href="../ch9/ch9-07.html"><strong aria-hidden="true">9.6.</strong> 示例: 并发的非阻塞缓存</a></li><li class="chapter-item expanded "><a href="../ch9/ch9-08.html"><strong aria-hidden="true">9.7.</strong> Goroutines和线程</a></li></ol></li><li class="chapter-item expanded "><a href="../ch10/ch10.html"><strong aria-hidden="true">10.</strong> 包和工具</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch10/ch10-01.html"><strong aria-hidden="true">10.1.</strong> 包简介</a></li><li class="chapter-item expanded "><a href="../ch10/ch10-02.html"><strong aria-hidden="true">10.2.</strong> 导入路径</a></li><li class="chapter-item expanded "><a href="../ch10/ch10-03.html"><strong aria-hidden="true">10.3.</strong> 包声明</a></li><li class="chapter-item expanded "><a href="../ch10/ch10-04.html"><strong aria-hidden="true">10.4.</strong> 导入声明</a></li><li class="chapter-item expanded "><a href="../ch10/ch10-05.html"><strong aria-hidden="true">10.5.</strong> 包的匿名导入</a></li><li class="chapter-item expanded "><a href="../ch10/ch10-06.html"><strong aria-hidden="true">10.6.</strong> 包和命名</a></li><li class="chapter-item expanded "><a href="../ch10/ch10-07.html"><strong aria-hidden="true">10.7.</strong> 工具</a></li></ol></li><li class="chapter-item expanded "><a href="../ch11/ch11.html"><strong aria-hidden="true">11.</strong> 测试</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch11/ch11-01.html"><strong aria-hidden="true">11.1.</strong> go test</a></li><li class="chapter-item expanded "><a href="../ch11/ch11-02.html"><strong aria-hidden="true">11.2.</strong> 测试函数</a></li><li class="chapter-item expanded "><a href="../ch11/ch11-03.html"><strong aria-hidden="true">11.3.</strong> 测试覆盖率</a></li><li class="chapter-item expanded "><a href="../ch11/ch11-04.html"><strong aria-hidden="true">11.4.</strong> 基准测试</a></li><li class="chapter-item expanded "><a href="../ch11/ch11-05.html"><strong aria-hidden="true">11.5.</strong> 剖析</a></li><li class="chapter-item expanded "><a href="../ch11/ch11-06.html"><strong aria-hidden="true">11.6.</strong> 示例函数</a></li></ol></li><li class="chapter-item expanded "><a href="../ch12/ch12.html"><strong aria-hidden="true">12.</strong> 反射</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch12/ch12-01.html"><strong aria-hidden="true">12.1.</strong> 为何需要反射?</a></li><li class="chapter-item expanded "><a href="../ch12/ch12-02.html"><strong aria-hidden="true">12.2.</strong> reflect.Type和reflect.Value</a></li><li class="chapter-item expanded "><a href="../ch12/ch12-03.html"><strong aria-hidden="true">12.3.</strong> Display递归打印</a></li><li class="chapter-item expanded "><a href="../ch12/ch12-04.html"><strong aria-hidden="true">12.4.</strong> 示例: 编码S表达式</a></li><li class="chapter-item expanded "><a href="../ch12/ch12-05.html"><strong aria-hidden="true">12.5.</strong> 通过reflect.Value修改值</a></li><li class="chapter-item expanded "><a href="../ch12/ch12-06.html"><strong aria-hidden="true">12.6.</strong> 示例: 解码S表达式</a></li><li class="chapter-item expanded "><a href="../ch12/ch12-08.html"><strong aria-hidden="true">12.7.</strong> 显示一个类型的方法集</a></li><li class="chapter-item expanded "><a href="../ch12/ch12-09.html"><strong aria-hidden="true">12.8.</strong> 几点忠告</a></li></ol></li><li class="chapter-item expanded "><a href="../ch13/ch13.html"><strong aria-hidden="true">13.</strong> 底层编程</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../ch13/ch13-01.html"><strong aria-hidden="true">13.1.</strong> unsafe.Sizeof, Alignof 和 Offsetof</a></li><li class="chapter-item expanded "><a href="../ch13/ch13-02.html"><strong aria-hidden="true">13.2.</strong> unsafe.Pointer</a></li><li class="chapter-item expanded "><a href="../ch13/ch13-03.html"><strong aria-hidden="true">13.3.</strong> 示例: 深度相等判断</a></li><li class="chapter-item expanded "><a href="../ch13/ch13-04.html"><strong aria-hidden="true">13.4.</strong> 通过cgo调用C代码</a></li><li class="chapter-item expanded "><a href="../ch13/ch13-05.html"><strong aria-hidden="true">13.5.</strong> 几点忠告</a></li></ol></li><li class="chapter-item expanded "><a href="../appendix/appendix.html"><strong aria-hidden="true">14.</strong> 附录</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="../appendix/appendix-a-errata.html"><strong aria-hidden="true">14.1.</strong> 附录A:原文勘误</a></li><li class="chapter-item expanded "><a href="../appendix/appendix-b-author.html"><strong aria-hidden="true">14.2.</strong> 附录B:作者译者</a></li><li class="chapter-item expanded "><a href="../appendix/appendix-c-cpoyright.html"><strong aria-hidden="true">14.3.</strong> 附录C:译文授权</a></li><li class="chapter-item expanded "><a href="../appendix/appendix-d-translations.html"><strong aria-hidden="true">14.4.</strong> 附录D:其它语言</a></li></ol></li></ol> </div>
|
||
<div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
|
||
</nav>
|
||
|
||
<div id="page-wrapper" class="page-wrapper">
|
||
|
||
<div class="page">
|
||
|
||
<div id="menu-bar-hover-placeholder"></div>
|
||
<div id="menu-bar" class="menu-bar sticky bordered">
|
||
<div class="left-buttons">
|
||
<button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
||
<i class="fa fa-bars"></i>
|
||
</button>
|
||
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
||
<i class="fa fa-paint-brush"></i>
|
||
</button>
|
||
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
||
<li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
||
</ul>
|
||
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
||
<i class="fa fa-search"></i>
|
||
</button>
|
||
</div>
|
||
|
||
<h1 class="menu-title">Go语言圣经</h1>
|
||
|
||
<div class="right-buttons">
|
||
<a href="../print.html" title="Print this book" aria-label="Print this book">
|
||
<i id="print-button" class="fa fa-print"></i>
|
||
</a>
|
||
<a href="https://github.com/gopl-zh/gopl-zh.github.com" title="Git repository" aria-label="Git repository">
|
||
<i id="git-repository-button" class="fa fa-github"></i>
|
||
</a>
|
||
<a href="https://github.com/gopl-zh/gopl-zh.github.com/edit/master/./ch8/ch8-06.md" title="Suggest an edit" aria-label="Suggest an edit">
|
||
<i id="git-edit-button" class="fa fa-edit"></i>
|
||
</a>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
<div id="search-wrapper" class="hidden">
|
||
<form id="searchbar-outer" class="searchbar-outer">
|
||
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
||
</form>
|
||
<div id="searchresults-outer" class="searchresults-outer hidden">
|
||
<div id="searchresults-header" class="searchresults-header"></div>
|
||
<ul id="searchresults">
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
||
<script type="text/javascript">
|
||
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
||
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
||
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
||
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
||
});
|
||
</script>
|
||
|
||
<div id="content" class="content">
|
||
<!-- Page table of contents -->
|
||
<div class="sidetoc"><nav class="pagetoc"></nav></div>
|
||
<main>
|
||
<!-- 头部 -->
|
||
<ul dir="auto">
|
||
<li><em>KusonStack一站式可编程配置技术栈(Go): <a href="https://github.com/KusionStack/kusion">https://github.com/KusionStack/kusion</a></em></li>
|
||
<li><em>KCL 配置编程语言(Rust): <a href="https://github.com/KusionStack/KCLVM">https://github.com/KusionStack/KCLVM</a></em></li>
|
||
<li><em>凹语言™: <a href="https://github.com/wa-lang/wa">https://github.com/wa-lang/wa</a></em></li>
|
||
</ul>
|
||
<hr>
|
||
|
||
<h2 id="86-示例-并发的web爬虫"><a class="header" href="#86-示例-并发的web爬虫">8.6. 示例: 并发的Web爬虫</a></h2>
|
||
<p>在5.6节中,我们做了一个简单的web爬虫,用bfs(广度优先)算法来抓取整个网站。在本节中,我们会让这个爬虫并行化,这样每一个彼此独立的抓取命令可以并行进行IO,最大化利用网络资源。crawl函数和gopl.io/ch5/findlinks3中的是一样的。</p>
|
||
<p><u><i>gopl.io/ch8/crawl1</i></u></p>
|
||
<pre><code class="language-go">func crawl(url string) []string {
|
||
fmt.Println(url)
|
||
list, err := links.Extract(url)
|
||
if err != nil {
|
||
log.Print(err)
|
||
}
|
||
return list
|
||
}
|
||
</code></pre>
|
||
<p>主函数和5.6节中的breadthFirst(广度优先)类似。像之前一样,一个worklist是一个记录了需要处理的元素的队列,每一个元素都是一个需要抓取的URL列表,不过这一次我们用channel代替slice来做这个队列。每一个对crawl的调用都会在他们自己的goroutine中进行并且会把他们抓到的链接发送回worklist。</p>
|
||
<pre><code class="language-go">func main() {
|
||
worklist := make(chan []string)
|
||
|
||
// Start with the command-line arguments.
|
||
go func() { worklist <- os.Args[1:] }()
|
||
|
||
// Crawl the web concurrently.
|
||
seen := make(map[string]bool)
|
||
for list := range worklist {
|
||
for _, link := range list {
|
||
if !seen[link] {
|
||
seen[link] = true
|
||
go func(link string) {
|
||
worklist <- crawl(link)
|
||
}(link)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>注意这里的crawl所在的goroutine会将link作为一个显式的参数传入,来避免“循环变量快照”的问题(在5.6.1中有讲解)。另外注意这里将命令行参数传入worklist也是在一个另外的goroutine中进行的,这是为了避免channel两端的main goroutine与crawler goroutine都尝试向对方发送内容,却没有一端接收内容时发生死锁。当然,这里我们也可以用buffered channel来解决问题,这里不再赘述。</p>
|
||
<p>现在爬虫可以高并发地运行起来,并且可以产生一大坨的URL了,不过还是会有俩问题。一个问题是在运行一段时间后可能会出现在log的错误信息里的:</p>
|
||
<pre><code>$ go build gopl.io/ch8/crawl1
|
||
$ ./crawl1 http://gopl.io/
|
||
http://gopl.io/
|
||
https://golang.org/help/
|
||
https://golang.org/doc/
|
||
https://golang.org/blog/
|
||
...
|
||
2015/07/15 18:22:12 Get ...: dial tcp: lookup blog.golang.org: no such host
|
||
2015/07/15 18:22:12 Get ...: dial tcp 23.21.222.120:443: socket: too many open files
|
||
...
|
||
</code></pre>
|
||
<p>最初的错误信息是一个让人莫名的DNS查找失败,即使这个域名是完全可靠的。而随后的错误信息揭示了原因:这个程序一次性创建了太多网络连接,超过了每一个进程的打开文件数限制,既而导致了在调用net.Dial像DNS查找失败这样的问题。</p>
|
||
<p>这个程序实在是太他妈并行了。无穷无尽地并行化并不是什么好事情,因为不管怎么说,你的系统总是会有一些个限制因素,比如CPU核心数会限制你的计算负载,比如你的硬盘转轴和磁头数限制了你的本地磁盘IO操作频率,比如你的网络带宽限制了你的下载速度上限,或者是你的一个web服务的服务容量上限等等。为了解决这个问题,我们可以限制并发程序所使用的资源来使之适应自己的运行环境。对于我们的例子来说,最简单的方法就是限制对links.Extract在同一时间最多不会有超过n次调用,这里的n一般小于文件描述符的上限值,比如20。这和一个夜店里限制客人数目是一个道理,只有当有客人离开时,才会允许新的客人进入店内。</p>
|
||
<p>我们可以用一个有容量限制的buffered channel来控制并发,这类似于操作系统里的计数信号量概念。从概念上讲,channel里的n个空槽代表n个可以处理内容的token(通行证),从channel里接收一个值会释放其中的一个token,并且生成一个新的空槽位。这样保证了在没有接收介入时最多有n个发送操作。(这里可能我们拿channel里填充的槽来做token更直观一些,不过还是这样吧。)由于channel里的元素类型并不重要,我们用一个零值的struct{}来作为其元素。</p>
|
||
<p>让我们重写crawl函数,将对links.Extract的调用操作用获取、释放token的操作包裹起来,来确保同一时间对其只有20个调用。信号量数量和其能操作的IO资源数量应保持接近。</p>
|
||
<p><u><i>gopl.io/ch8/crawl2</i></u></p>
|
||
<pre><code class="language-go">// tokens is a counting semaphore used to
|
||
// enforce a limit of 20 concurrent requests.
|
||
var tokens = make(chan struct{}, 20)
|
||
|
||
func crawl(url string) []string {
|
||
fmt.Println(url)
|
||
tokens <- struct{}{} // acquire a token
|
||
list, err := links.Extract(url)
|
||
<-tokens // release the token
|
||
if err != nil {
|
||
log.Print(err)
|
||
}
|
||
return list
|
||
}
|
||
</code></pre>
|
||
<p>第二个问题是这个程序永远都不会终止,即使它已经爬到了所有初始链接衍生出的链接。(当然,除非你慎重地选择了合适的初始化URL或者已经实现了练习8.6中的深度限制,你应该还没有意识到这个问题。)为了使这个程序能够终止,我们需要在worklist为空或者没有crawl的goroutine在运行时退出主循环。</p>
|
||
<pre><code class="language-go">func main() {
|
||
worklist := make(chan []string)
|
||
var n int // number of pending sends to worklist
|
||
|
||
// Start with the command-line arguments.
|
||
n++
|
||
go func() { worklist <- os.Args[1:] }()
|
||
|
||
// Crawl the web concurrently.
|
||
seen := make(map[string]bool)
|
||
|
||
for ; n > 0; n-- {
|
||
list := <-worklist
|
||
for _, link := range list {
|
||
if !seen[link] {
|
||
seen[link] = true
|
||
n++
|
||
go func(link string) {
|
||
worklist <- crawl(link)
|
||
}(link)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>这个版本中,计数器n对worklist的发送操作数量进行了限制。每一次我们发现有元素需要被发送到worklist时,我们都会对n进行++操作,在向worklist中发送初始的命令行参数之前,我们也进行过一次++操作。这里的操作++是在每启动一个crawler的goroutine之前。主循环会在n减为0时终止,这时候说明没活可干了。</p>
|
||
<p>现在这个并发爬虫会比5.6节中的深度优先搜索版快上20倍,而且不会出什么错,并且在其完成任务时也会正确地终止。</p>
|
||
<p>下面的程序是避免过度并发的另一种思路。这个版本使用了原来的crawl函数,但没有使用计数信号量,取而代之用了20个常驻的crawler goroutine,这样来保证最多20个HTTP请求在并发。</p>
|
||
<pre><code class="language-go">func main() {
|
||
worklist := make(chan []string) // lists of URLs, may have duplicates
|
||
unseenLinks := make(chan string) // de-duplicated URLs
|
||
|
||
// Add command-line arguments to worklist.
|
||
go func() { worklist <- os.Args[1:] }()
|
||
|
||
// Create 20 crawler goroutines to fetch each unseen link.
|
||
for i := 0; i < 20; i++ {
|
||
go func() {
|
||
for link := range unseenLinks {
|
||
foundLinks := crawl(link)
|
||
go func() { worklist <- foundLinks }()
|
||
}
|
||
}()
|
||
}
|
||
|
||
// The main goroutine de-duplicates worklist items
|
||
// and sends the unseen ones to the crawlers.
|
||
seen := make(map[string]bool)
|
||
for list := range worklist {
|
||
for _, link := range list {
|
||
if !seen[link] {
|
||
seen[link] = true
|
||
unseenLinks <- link
|
||
}
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>所有的爬虫goroutine现在都是被同一个channel - unseenLinks喂饱的了。主goroutine负责拆分它从worklist里拿到的元素,然后把没有抓过的经由unseenLinks channel发送给一个爬虫的goroutine。</p>
|
||
<p>seen这个map被限定在main goroutine中;也就是说这个map只能在main goroutine中进行访问。类似于其它的信息隐藏方式,这样的约束可以让我们从一定程度上保证程序的正确性。例如,内部变量不能够在函数外部被访问到;变量(§2.3.4)在没有发生变量逃逸(译注:局部变量被全局变量引用地址导致变量被分配在堆上)的情况下是无法在函数外部访问的;一个对象的封装字段无法被该对象的方法以外的方法访问到。在所有的情况下,信息隐藏都可以帮助我们约束我们的程序,使其不发生意料之外的情况。</p>
|
||
<p>crawl函数爬到的链接在一个专有的goroutine中被发送到worklist中来避免死锁。为了节省篇幅,这个例子的终止问题我们先不进行详细阐述了。</p>
|
||
<p><strong>练习 8.6:</strong> 为并发爬虫增加深度限制。也就是说,如果用户设置了depth=3,那么只有从首页跳转三次以内能够跳到的页面才能被抓取到。</p>
|
||
<p><strong>练习 8.7:</strong> 完成一个并发程序来创建一个线上网站的本地镜像,把该站点的所有可达的页面都抓取到本地硬盘。为了省事,我们这里可以只取出现在该域下的所有页面(比如golang.org开头,译注:外链的应该就不算了。)当然了,出现在页面里的链接你也需要进行一些处理,使其能够在你的镜像站点上进行跳转,而不是指向原始的链接。</p>
|
||
<p><strong>译注:</strong>
|
||
拓展阅读 <a href="http://marcio.io/2015/07/handling-1-million-requests-per-minute-with-golang/">Handling 1 Million Requests per Minute with Go</a>。</p>
|
||
|
||
|
||
<!-- 公众号 -->
|
||
<hr>
|
||
<table>
|
||
<tr>
|
||
<td>
|
||
<img width="222px" src="https://chai2010.cn/advanced-go-programming-book/css.png">
|
||
</td>
|
||
<td>
|
||
<img width="222px" src="https://chai2010.cn/advanced-go-programming-book/cch.png">
|
||
</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<div id="giscus-container"></div>
|
||
<footer class="page-footer">
|
||
<span>© 2015-2016 | <a href="https://github.com/gopl-zh"> Go语言圣经中文版</a>, 仅学习交流使用</span>
|
||
</footer>
|
||
</main>
|
||
|
||
<nav class="nav-wrapper" aria-label="Page navigation">
|
||
<!-- Mobile navigation buttons -->
|
||
<a rel="prev" href="../ch8/ch8-05.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
||
<i class="fa fa-angle-left"></i>
|
||
</a>
|
||
|
||
<a rel="next" href="../ch8/ch8-07.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
||
<i class="fa fa-angle-right"></i>
|
||
</a>
|
||
|
||
<div style="clear: both"></div>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
|
||
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
||
<a rel="prev" href="../ch8/ch8-05.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
||
<i class="fa fa-angle-left"></i>
|
||
</a>
|
||
|
||
<a rel="next" href="../ch8/ch8-07.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
||
<i class="fa fa-angle-right"></i>
|
||
</a>
|
||
</nav>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<script type="text/javascript">
|
||
window.playground_copyable = true;
|
||
</script>
|
||
|
||
|
||
<script src="../elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="../mark.min.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="../searcher.js" type="text/javascript" charset="utf-8"></script>
|
||
|
||
<script src="../clipboard.min.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="../highlight.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="../book.js" type="text/javascript" charset="utf-8"></script>
|
||
<script type="text/javascript" charset="utf-8">
|
||
var pagePath = "ch8/ch8-06.md"
|
||
</script>
|
||
|
||
|
||
<!-- Custom JS scripts -->
|
||
<script type="text/javascript" src="../js/custom.js"></script>
|
||
<script type="text/javascript" src="../js/bigPicture.js"></script>
|
||
|
||
|
||
</body>
|
||
</html>
|