-
Notifications
You must be signed in to change notification settings - Fork 5
/
zhconv-merge.sh
175 lines (150 loc) · 5.01 KB
/
zhconv-merge.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/bin/bash
# zhconv-merge.sh: Merge zh variant translations with OpenCC and msgmerge.
usage="Usage: $0 OLD_FILE MERGE_ME_IN [POT_FILE=MERGE_ME_IN]
if OLD_FILE is missing, assume creation of new file.
Env vars:
ZH_MSGMERGE_OPTS \`linear' array of extra flags for \`msgmerge'.
Example: '-E -C \"my compendia.po\" -w 79 --previous'
Default: '--previous'
ZH_POST_OCC:function What to do after invoking OpenCC. To use,
define a function with this name in bash,
and export it with \`export -f ZH_POST_OCC'.
Example: ZH_POST_OCC() { msgattrib --set-fuzzy \\
--no-fuzzy -o \"\$new.\$oldtype\"{,}; }
"
# This script comes with ABSOLUTELY NO WARRENTY, and can be used as if it is in
# public domain, or (optionally) under the terms of CC0, WTFPL or Unlicense.
# Please make sure that there is no Chinese characters in msgid; or bad things
# will happen when opencc converts them by mistake. msgfilter can be used, but
# it's super slow with it being called super many times. Same for sed.
# Also, don't pass non-UTF8 files in.
readonly {FALSE,NO,false,no}=0 {TRUE,YES,true,yes}=1 # boolean shorthands
die(){ echo "Fatal: $1">&2; exit "${2-1}"; }
info(){ echo "Info: $*">&2; }
[ "$2" -a -e "$2" ] || die "Arguments invalid
$usage"
type opencc sed msgmerge >/dev/null || die "required command(s) not found"
# Accept environment 'linear array' input.
declare -a ZH_MSGMERGE_OPTS="(${ZH_MSGMERGE_OPTS:---previous})"
type ZH_POST_OCC &>/dev/null || ZH_POST_OCC(){ :; }
# OpenCC example cfgs used, all with Phrace Variants:
# s2twp: CN -> TW
# tw2sp: TW -> CN
# s2hk: CN -> HK
# hk2s: HK -> CN
declare -A cn_t_word
cn_t_word[ 函式 ]=函数
cn_t_word[ 封存 ]=归档
# Extra sed commands for conversion.
to_cn_sed=(
-r # ERE for grouping
-e 's/函式/函数/g' # function
-e 's/封存/归档/g' # archive
-e 's/开启/打开/g' # open
-e 's/命令稿/脚本/g' # script
-e 's/盘案/文件/g' # file (save)
-e 's/回传/返回/g' # return (function)
-e 's/引数/参数/g' # argument (function)
-e 's/签章/签名/g' # signature (PGP)
-e 's/巨集/宏/g' # macro
-e 's/魔术字符/幻数/g' # magic number
-e 's/唯读/只读/g' # readonly
-e 's/胚腾/模式/g' # pattern, un-standardly translated to 胚腾 in TW sometimes.
-e 's/逾時/超时/g' # timed out
-e 's/相依性/依赖关系/g' -e 's/相依/依赖/g' # dependency (pkgmgr)
-e 's/万用匹配/通配符/g' -e 's/万用字符/通配符/g' # glob
-e 's/([二八十]|十六)进位制?/\1进制/g' # bin, oct, dec, hex..
# -e 's/修补/补丁/g' # patch
# -e 's/套件/软件包/g' # package
# -e 's/异动/事务/'g # transaction
-e 's/不容许/不允许/g' # not permitted
-e 's/暂存盘/临时文件/g' # tmpfile, word_struct (暂存 盘)
# -e 's/缩减/归约/g' # reduce (parser)
-e 's/算子/算符/g' # operator (parser)
-e 's/全域/全局/g' # global
-e 's/做为/作为/g' # foo as(作为) bar
-e 's/行程/进程/g' # process
-e 's/润算/渲染/g' # render
-e 's/堆栈/堆叠/g' # stack
-e 's/指标/指针/g' # pointer
-e 's/印出/打印/g' # print
# -e 's/行/__CoLM_列__/g' -e 's/列/行/g' -e 's/__CoLM_列__/列/g' # different ideas on lines and cols
-e 's/「/“/g' -e 's/」/”/g' -e 's/『/‘/g' -e 's/』/’/g' # crude quoting
)
from_cn_sed=(
-e 's/函数/函式/g' # function
-e 's/归档/封存/g' # archive
-e 's/宏/巨集/g' # macro
-e 's/只读/唯读/g' # readonly
-e 's/全局/全域/g' # global
)
zhvar(){
case "$1" in
(*zh[_-]CN*|*zh[_-]Hans*)
echo "CN";;
(*zh[_-]TW*|*zh[_-]Hant*)
echo "TW";;
(*zh[_-]HK*) echo "HK";;
(*) echo "??" ;;
esac
}
occ_conv(){
local occcfg
case "$1,$2" in
(CN,TW) occcfg=s2twp;;
(TW,CN) occcfg=tw2sp;;
(CN,HK) occcfg=s2hk;;
(HK,CN) occcfg=hk2s;;
(TW,HK) occcfg=tw2sp,s2hk;;
(HK,TW) occcfg=hk2s,s2twp;;
(CN,CN|HK,HK|TW,TW)
occcfg=NULL;;
esac
if [ -z "$occcfg" ]; then
die "Specified pair $1,$2 not supported. Add it yourself."
fi
do_occ "$occcfg" "$3" "$4"
}
do_occ(){
local curr IFS=,
cp "$2" "${3:-$2}.work" || return
for curr in $1; do
[ "$curr" != NULL ] || continue
opencc -c "$curr" -i "${3:-$2}.work" -o "${3:-$2}.work" || return
done
mv "${3:-$2}"{.work,}
}
old="$1" oldtype="$(zhvar "$old")"
new="$2" newtype="$(zhvar "$new")"
pot="${3:-$2}"
if [ ! -e "$old" ]; then
info "Creating $old."
:> "$old"
fi
echo "
OLD $oldtype $old
NEW $newtype $new
POT -- $pot
"
case "$newtype" in
(CN) sed "${from_cn_sed[@]}" "$new" > "$new.$oldtype";;
(*) cp "$new" "$new.$oldtype"
esac
occ_conv "$newtype" "$oldtype" "$new"{,".$oldtype"} ||
die "opencc returned $?."
ZH_POST_OCC
cp "$old"{,'~'}
msgattrib --translated -o "$old"{,}
msgcat -o "$old.all" --use-first "$old" "$new.$oldtype"
msgmerge --lang="zh_$oldtype" "${ZH_MSGMERGE_OPTS[@]}" -o "$old"{,.all} "$pot" ||
die "msgmerge returned $?."
case "$oldtype" in
(CN) sed -i.pre_final "${to_cn_sed[@]}" "$old"
OUTFILES+="SED $oldtype $old.pre_final"$'\n'
esac
echo "
OUT $oldtype $old
ALL $oldtype $old.all
TMP $oldtype $new.$oldtype
$OUTFILES
Verify the results in a po editor, with some basic knowledge in zh_$oldtype."