-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfsgrammar.js
More file actions
84 lines (76 loc) · 2.63 KB
/
Copy pathfsgrammar.js
File metadata and controls
84 lines (76 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
const tinynlp = require('./earley-parser.js');
wordlist = new Map([
['است','ast'],
['.','noghte'],
['?','alamat_soal'],
['(','parantez_baz'],
[')','parantez_baste'],
['،','virgool'],
['؟','alamat_soal'],
['چیست','chist'],
['نوعی','noyi'],
['در','harfe_ezafe'],
['با','harfe_ezafe'],
['از','harfe_ezafe'],
['تا','harfe_ezafe'],
['به','harfe_ezafe'],
['برای','harfe_ezafe'],
['هر','har'],
['کن','kon'],
['را','ra'],
['سپس','sepas'],
['یعنی','yani'],
]);
fsg = new tinynlp.Grammar([
'root -> assign noghte | func_assign noghte | ask alamat_soal | cmd noghte | cmd_assign noghte',
'cmd_root -> cmd',
'cmd -> cmd_tak | cmd_tak sepas cmd',
'cmd_tak -> eval_task ra esm kon | eval_task ra eval_motam_list esm kon | eval_motam_list esm kon | esm kon',
'assign -> esm eval_task ast | esm virgool eval_task ast' ,
'func_assign -> esm input virgool eval_task ast | esm input eval_motam_list virgool eval_task ast',
'input -> eval_task | har esm' ,
'eval_task -> noyi esm | parantez_baz eval_task parantez_baste | num esm | num | esm | obj | esm eval_task | esm eval_task eval_motam_list' ,
'eval_motam_list -> eval_motam_list eval_motamam | eval_motamam',
'eval_motamam -> harfe_ezafe eval_task',
'ask -> eval_task chist' ,
'cmd_assign -> esm kon yani cmd',
]);
fsg.terminalSymbols = function(token){
if (wordlist.has(token)){
let op = wordlist.get(token);
if (typeof op == 'string')
return [op];
else
return op;
}
if( token.match(/^\d+$/) ) return ['num'];
if (token[0] == '"' || token[0] == '{' || token[0] == '[') return ['obj'];
return ['esm'];
}
function parseArabic(str) {
return str.replace(/[٠١٢٣٤٥٦٧٨٩]/g, function(d) {
return d.charCodeAt(0) - 1632; // Convert Arabic numbers
}).replace(/[۰۱۲۳۴۵۶۷۸۹]/g, function(d) {
return d.charCodeAt(0) - 1776; // Convert Persian numbers
});
}
function tokenize(text){
var qd=0,bd=0;
text = text.replace(x=>(x=='\n'?' ':x)).split('');
for(let i=0;i<text.length;i++){
let c = text[i];
if (c=='"') qd^=1;
if (c=='{' || c=='[') bd++;
if (c=='}' || c==']') bd--;
if (c==' ' && bd==0 && qd==0) text[i]='\n';
}
return text.join('').split('\n').filter(Boolean);
}
function fparse(text,rootRule = 'root'){
text=tokenize(parseArabic(text));
//console.log(text);
let chart = tinynlp.parse(text, fsg, rootRule);
let trees = chart.getFinishedRoot(rootRule).traverse();
return trees;
}
module.exports = fparse;