grep '<title>' dewiki-latest-pages-articles.xml > titles.txt
cat titles.txt | \
sed 's@ <title>@@g' | \
sed 's@</title>@@g' | \
sed 's/[0-9]//g' | \
sed 's/\([A-Z]\)/\n\1/g' | \
sed 's/[^a-zA-Z0-9]/\n/g' | \
tr '[:upper:]' '[:lower:]' | \
grep -E '.{5,}' | \
iconv -f UTF-8 -t ASCII//TRANSLIT | \
sort | \
uniq > dictionary-de.txt
iMi-digital/dictionary
Folders and files
| Name | Name | Last commit date | ||
|---|---|---|---|---|