-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert.py
More file actions
executable file
·38 lines (33 loc) · 807 Bytes
/
convert.py
File metadata and controls
executable file
·38 lines (33 loc) · 807 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/python -tt
import re
import codecs
import sys
def gbk2unicode(match):
if match:
str = match.group(1)
# print str
strs = str.replace('\\',' ').split()
i = 0
words = []
while (i < len(strs)):
word = chr(int(strs[i])) + chr(int(strs[i+1]))
words.append(word.decode('gbk'))
i += 2
word = ''.join(words)
# print word
return word
def main():
if len(sys.argv) != 2:
print "usage: convert.py filename"
sys.exit(1)
filename = sys.argv[1]
f = open(filename,'rU')
text = f.read()
converted = re.sub(r'((\\\d\d\d){2,})', gbk2unicode, text)
f.close()
print converted
out = codecs.open(filename+".converted",'w', 'utf-8')
out.write(converted);
out.close()
if __name__ == '__main__':
main()