完善 zh2tw, 减少map的乱序循环对结果产生的影响

This commit is contained in:
chai2010 2015-12-18 14:26:55 +08:00
parent bd113ff8ed
commit 9fde1ff772
2 changed files with 26 additions and 18 deletions

View File

@ -16,3 +16,8 @@ zh2tw:
tw2zh: tw2zh:
go run zh2tw.go . .md$$ tw2zh go run zh2tw.go . .md$$ tw2zh
loop:
go run zh2tw.go . .md$$ tw2zh
go run zh2tw.go . .md$$ zh2tw

View File

@ -25,6 +25,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"sort"
"unicode/utf8" "unicode/utf8"
) )
@ -158,25 +159,24 @@ func tw2zh(s string) string {
} }
func init() { func init() {
// 剔除出现多次的字符 // 作为map键的繁体没有重复
vvMap := make(map[rune]int) // 但一个繁体可能对应多个简体, 需要按照key字典顺序导入
for k, v := range _TSCharactersMap { // 只保留根据key字典顺序一个出现的简体
vvMap[k]++ kkMap := make([]int, 0, len(_TSCharactersMap))
vvMap[v]++ for k, _ := range _TSCharactersMap {
kkMap = append(kkMap, int(k))
} }
for k, v := range _TSCharactersMap { sort.Ints(kkMap)
if vvMap[k] > 1 {
delete(_TSCharactersMap, k) // 导入初始转换表
} for _, k := range kkMap {
if vvMap[v] > 1 { k := rune(k)
delete(_TSCharactersMap, v) v := _TSCharactersMap[k]
} tw2zhMap[k] = v
}
for k, v := range _TSCharactersMap {
zh2twMap[v] = k zh2twMap[v] = k
} }
// 修正错误的转换 // 修正错误的转换(仅简体到繁体)
for k, v := range zh2twMapPatch { for k, v := range zh2twMapPatch {
zh2twMap[k] = v zh2twMap[k] = v
} }
@ -184,7 +184,7 @@ func init() {
var ( var (
zh2twMap = make(map[rune]rune) zh2twMap = make(map[rune]rune)
tw2zhMap = _TSCharactersMap tw2zhMap = make(map[rune]rune)
) )
// 修正错误的转换 // 修正错误的转换
@ -196,8 +196,11 @@ var zh2twMapPatch = map[rune]rune{
'同': '同', '同': '同',
'向': '向', '向': '向',
'合': '合', '合': '合',
'針': '针', '针': '針',
'別': '别', '别': '别',
'个': '個',
'家': '家',
'当': '當',
} }
var _TSCharactersMap = map[rune]rune{ var _TSCharactersMap = map[rune]rune{