-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild.sh
More file actions
executable file
·39 lines (30 loc) · 983 Bytes
/
build.sh
File metadata and controls
executable file
·39 lines (30 loc) · 983 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash -e
TIME="time -f %E"
TIMESTAMP=latest
BASE_URL=https://dumps.wikimedia.org
EN_INPUT=enwiktionary-${TIMESTAMP}-pages-articles.xml.bz2
FI_INPUT=fiwiktionary-${TIMESTAMP}-pages-articles.xml.bz2
FREQ_INPUT=fi-opensubtitles-word-freq.txt
ROOT="$(realname "$(dirname "$0")")"
echo "Cleaning"
$TIME rm -rf "output/all"
mkdir -p data
if [ ! -e data/$FI_INPUT ]; then
wget -P data $BASE_URL/fiwiktionary/$TIMESTAMP/$FI_INPUT
fi
if [ ! -e data/$EN_INPUT ]; then
wget -P data $BASE_URL/enwiktionary/$TIMESTAMP/$EN_INPUT
fi
echo "Extracting fi"
$TIME ./wiktionarymunge.py --lang=fi -e output/all/dict data/$FI_INPUT "$@"
echo "Extracting en"
$TIME ./wiktionarymunge.py --lang=en -e output/all/dict data/$EN_INPUT "$@"
cd output/all
echo "Mapping forms"
$TIME "$ROOT/map-forms.py"
if [ -e "$ROOT/data/$FREQ_INPUT" ]; then
echo "Adding frequency data"
$TIME "$ROOT/add-frequencies.py" "$ROOT/data/$FREQ_INPUT"
fi
echo "Compressing dict"
$TIME "$ROOT/compress-dict.py"