-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpure_dictionary.py
More file actions
executable file
·40 lines (35 loc) · 1.24 KB
/
pure_dictionary.py
File metadata and controls
executable file
·40 lines (35 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AUTHOR : Shin Zoqchiuq
VERSION : v2021.07.17
從 `字表.tsv` 生成 `甬城.tsv`
"""
import pandas as pd
import numpy as np
from gninpou import compat2gninpou
sheet = pd.read_csv(
open("../字表.tsv", encoding="utf-8"), sep="\t",
usecols=["繁體", "简体", "兼容格式", "出處(甬)", "備註"]
)
sheet.rename(columns={"出處(甬)":"出處"}, inplace=True)
sheet.rename(columns={"兼容格式":"吳拼"}, inplace=True)
sheet.drop(
(
(sheet["出處"] > 5).to_numpy() & (sheet["出處"] != 14).to_numpy() |
(sheet["出處"] == 0).to_numpy() | np.isnan(sheet["出處"]).to_numpy() |
[type(i) != str for i in iter(sheet["吳拼"])]
).nonzero()[0]
, inplace=True
)
sheet.reset_index(inplace=True, drop=True)
sheet["出處"] = [int(i) for i in iter(sheet["出處"])]
sheet["吳拼"] = [compat2gninpou(i) for i in iter(sheet["吳拼"])]
sheet["備註"] = [
i if type(i) == float or "(" not in i else np.nan for i in iter(sheet["備註"])
]
sheet.drop_duplicates(inplace=True)
sheet.reset_index(drop=True, inplace=True)
sheet[["繁體", "简体", "吳拼", "出處", "備註"]].to_csv(
"../各地字表/甬城.tsv", encoding="utf-8", index=False, sep="\t"
)