mirror of
synced 2024-12-27 07:16:22 +00:00
130 lines
5.3 KiB
130 lines
5.3 KiB
## 7.14. 示例: 基於標記的XML解碼
package xml
type Name struct {
Local string // e.g., "Title" or "id"
type Attr struct { // e.g., name="value"
Name Name
Value string
// A Token includes StartElement, EndElement, CharData,
// and Comment, plus a few esoteric types (not shown).
type Token interface{}
type StartElement struct { // e.g., <name>
Name Name
Attr []Attr
type EndElement struct { Name Name } // e.g., </name>
type CharData []byte // e.g., <p>CharData</p>
type Comment []byte // e.g., <!-- Comment -->
type Decoder struct{ /* ... */ }
func NewDecoder(io.Reader) *Decoder
func (*Decoder) Token() (Token, error) // returns next Token in sequence
// Xmlselect prints the text of selected elements of an XML document.
package main
import (
func main() {
dec := xml.NewDecoder(os.Stdin)
var stack []string // stack of element names
for {
tok, err := dec.Token()
if err == io.EOF {
} else if err != nil {
fmt.Fprintf(os.Stderr, "xmlselect: %v\n", err)
switch tok := tok.(type) {
case xml.StartElement:
stack = append(stack, tok.Name.Local) // push
case xml.EndElement:
stack = stack[:len(stack)-1] // pop
case xml.CharData:
if containsAll(stack, os.Args[1:]) {
fmt.Printf("%s: %s\n", strings.Join(stack, " "), tok)
// containsAll reports whether x contains the elements of y, in order.
func containsAll(x, y []string) bool {
for len(y) <= len(x) {
if len(y) == 0 {
return true
if x[0] == y[0] {
y = y[1:]
x = x[1:]
return false
$ go build gopl.io/ch1/fetch
$ ./fetch http://www.w3.org/TR/2006/REC-xml11-20060816 |
./xmlselect div div h2
html body div div h2: 1 Introduction
html body div div h2: 2 Documents
html body div div h2: 3 Logical Structures
html body div div h2: 4 Physical Structures
html body div div h2: 5 Conformance
html body div div h2: 6 Notation
html body div div h2: A References
html body div div h2: B Definitions for Character Normalization
**練習 7.17:** 擴展xmlselect程序以便讓元素不僅僅可以通過名稱選擇,也可以通過它們CSS樣式上屬性進行選擇;例如一個像這樣<div id="page" class="wide">的元素可以通過匹配id或者class同時還有它的名稱來進行選擇。
**練習 7.18:** 使用基於標記的解碼API,編寫一個可以讀取任意XML文檔和構造這個文檔所代表的普通節點樹的程序。節點有兩種類型:CharData節點表示文本字符串,和 Element節點表示被命名的元素和它們的屬性。每一個元素節點有一個字節點的切片。
import "encoding/xml"
type Node interface{} // CharData or *Element
type CharData string
type Element struct {
Type xml.Name
Attr []xml.Attr
Children []Node