完善 zh2tw, 减少map的乱序循环对结果产生的影响

This commit is contained in:
chai2010 2015-12-18 14:26:55 +08:00
parent bd113ff8ed
commit 9fde1ff772
2 changed files with 26 additions and 18 deletions

View File

@ -16,3 +16,8 @@ zh2tw:
tw2zh:
go run zh2tw.go . .md$$ tw2zh
loop:
go run zh2tw.go . .md$$ tw2zh
go run zh2tw.go . .md$$ zh2tw

View File

@ -25,6 +25,7 @@ import (
"os"
"path/filepath"
"regexp"
"sort"
"unicode/utf8"
)
@ -158,25 +159,24 @@ func tw2zh(s string) string {
}
func init() {
// 剔除出现多次的字符
vvMap := make(map[rune]int)
for k, v := range _TSCharactersMap {
vvMap[k]++
vvMap[v]++
// 作为map键的繁体没有重复
// 但一个繁体可能对应多个简体, 需要按照key字典顺序导入
// 只保留根据key字典顺序一个出现的简体
kkMap := make([]int, 0, len(_TSCharactersMap))
for k, _ := range _TSCharactersMap {
kkMap = append(kkMap, int(k))
}
for k, v := range _TSCharactersMap {
if vvMap[k] > 1 {
delete(_TSCharactersMap, k)
}
if vvMap[v] > 1 {
delete(_TSCharactersMap, v)
}
}
for k, v := range _TSCharactersMap {
sort.Ints(kkMap)
// 导入初始转换表
for _, k := range kkMap {
k := rune(k)
v := _TSCharactersMap[k]
tw2zhMap[k] = v
zh2twMap[v] = k
}
// 修正错误的转换
// 修正错误的转换(仅简体到繁体)
for k, v := range zh2twMapPatch {
zh2twMap[k] = v
}
@ -184,7 +184,7 @@ func init() {
var (
zh2twMap = make(map[rune]rune)
tw2zhMap = _TSCharactersMap
tw2zhMap = make(map[rune]rune)
)
// 修正错误的转换
@ -196,8 +196,11 @@ var zh2twMapPatch = map[rune]rune{
'同': '同',
'向': '向',
'合': '合',
'針': '针',
'別': '别',
'针': '針',
'别': '别',
'个': '個',
'家': '家',
'当': '當',
}
var _TSCharactersMap = map[rune]rune{