You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
181 lines
4.0 KiB
181 lines
4.0 KiB
package chinese |
|
|
|
import ( |
|
"context" |
|
"encoding/json" |
|
"fmt" |
|
"strings" |
|
|
|
"go-common/library/log" |
|
) |
|
|
|
var ( |
|
defaultConversion = "s2twp" |
|
) |
|
|
|
// Group holds a sequence of dicts |
|
type Group struct { |
|
Files []string |
|
Dicts []*dict |
|
} |
|
|
|
func (g *Group) String() string { |
|
return fmt.Sprintf("%+v", g.Files) |
|
} |
|
|
|
// OpenCC contains the converter |
|
type openCC struct { |
|
Conversion string |
|
Description string |
|
DictGroup []*Group |
|
} |
|
|
|
var conversions = map[string]*openCC{ |
|
"s2twp": {Conversion: s2twp}, |
|
// "hk2s": {Conversion: hk2s}, "s2hk": {Conversion: s2hk}, "s2t": {Conversion: s2t}, |
|
// "s2tw": {Conversion: s2tw}, "t2hk": {Conversion: t2hk}, |
|
// "t2s": {Conversion: t2s}, "t2tw": {Conversion: t2tw}, |
|
// "tw2s": {Conversion: tw2s}, "tw2sp": {Conversion: tw2sp}, |
|
} |
|
|
|
// Init construct an instance of OpenCC. |
|
func Init() { |
|
for k, v := range conversions { |
|
if err := v.dict(k); err != nil { |
|
panic(err) |
|
} |
|
} |
|
} |
|
|
|
// Converts . |
|
func Converts(ctx context.Context, in ...string) (out map[string]string) { |
|
var err error |
|
out = make(map[string]string, len(in)) |
|
for _, v := range in { |
|
if out[v], err = convert(v, defaultConversion); err != nil { |
|
log.Error("convert(%s),err:%+v", in, err) |
|
out[v] = v |
|
} |
|
} |
|
return |
|
} |
|
|
|
// Convert string from Simplified Chinese to Traditional Chinese . |
|
func Convert(ctx context.Context, in string) (out string) { |
|
var err error |
|
if out, err = convert(in, defaultConversion); err != nil { |
|
log.Error("convert(%s),err:%+v", in, err) |
|
} |
|
return |
|
} |
|
|
|
func (cc *openCC) dict(conversion string) error { |
|
var m interface{} |
|
json.Unmarshal([]byte(cc.Conversion), &m) |
|
config := m.(map[string]interface{}) |
|
cc.Description = config["name"].(string) |
|
dictChain, ok := config["conversion_chain"].([]interface{}) |
|
if !ok { |
|
return fmt.Errorf("format %+v not correct", config) |
|
} |
|
for _, v := range dictChain { |
|
d, ok := v.(map[string]interface{}) |
|
if !ok { |
|
return fmt.Errorf("should be map inside conversion_chain") |
|
} |
|
dictMap, ok := d["dict"] |
|
if !ok { |
|
return fmt.Errorf("should have dict inside conversion_chain") |
|
} |
|
if dict, ok := dictMap.(map[string]interface{}); ok { |
|
group, err := cc.group(dict) |
|
if err != nil { |
|
return err |
|
} |
|
cc.DictGroup = append(cc.DictGroup, group) |
|
} |
|
} |
|
return nil |
|
} |
|
|
|
func (cc *openCC) group(d map[string]interface{}) (*Group, error) { |
|
typ, ok := d["type"].(string) |
|
if !ok { |
|
return nil, fmt.Errorf("type should be string") |
|
} |
|
res := &Group{} |
|
switch typ { |
|
case "group": |
|
dicts, ok := d["dicts"].([]interface{}) |
|
if !ok { |
|
return nil, fmt.Errorf("dicts field invalid") |
|
} |
|
for _, dict := range dicts { |
|
d, ok := dict.(map[string]interface{}) |
|
if !ok { |
|
return nil, fmt.Errorf("dicts items invalid") |
|
} |
|
group, err := cc.group(d) |
|
if err != nil { |
|
return nil, err |
|
} |
|
res.Files = append(res.Files, group.Files...) |
|
res.Dicts = append(res.Dicts, group.Dicts...) |
|
} |
|
case "txt": |
|
file, ok := d["file"] |
|
if !ok { |
|
return nil, fmt.Errorf("no file field found") |
|
} |
|
daDict, err := buildFromFile(file.(string)) |
|
if err != nil { |
|
return nil, err |
|
} |
|
res.Files = append(res.Files, file.(string)) |
|
res.Dicts = append(res.Dicts, daDict) |
|
default: |
|
return nil, fmt.Errorf("type should be txt or group") |
|
} |
|
return res, nil |
|
} |
|
|
|
// convert string from Simplified Chinese to Traditional Chinese or vice versa |
|
func convert(in, conversion string) (string, error) { |
|
if conversion == "" { |
|
conversion = defaultConversion |
|
} |
|
for _, group := range conversions[conversion].DictGroup { |
|
r := []rune(in) |
|
var tokens []string |
|
for i := 0; i < len(r); { |
|
s := r[i:] |
|
var token string |
|
max := 0 |
|
for _, dict := range group.Dicts { |
|
ret, err := dict.prefixMatch(string(s)) |
|
if err != nil { |
|
return "", err |
|
} |
|
if len(ret) > 0 { |
|
o := "" |
|
for k, v := range ret { |
|
if len(k) > max { |
|
max = len(k) |
|
token = v[0] |
|
o = k |
|
} |
|
} |
|
i += len([]rune(o)) |
|
break |
|
} |
|
} |
|
if max == 0 { //no match |
|
token = string(r[i]) |
|
i++ |
|
} |
|
tokens = append(tokens, token) |
|
} |
|
in = strings.Join(tokens, "") |
|
} |
|
return in, nil |
|
}
|
|
|