-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgendic.js
More file actions
35 lines (28 loc) · 830 Bytes
/
gendic.js
File metadata and controls
35 lines (28 loc) · 830 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/**
* Generate a dictionary file from a source file
*
* @author Mike Timms <mike@codeeverything.com>
*/
var fs = require('fs');
var dataPath = 'data/';
var files = ['big.txt'];
// load our data
var words;
// read the file
words = fs.readFileSync( dataPath + files[0] ).toString();
// extract all the words
// see: http://stackoverflow.com/questions/14061349/regular-expression-match-all-words-but-match-unique-words-only-once
words = words.match(/([a-zA-Z]+\b)(?!.*\1\b)/g);
var dic = [];
var wordObj = {};
for (var i=0; i < words.length; i++) {
var word = words[i].toLowerCase();
if (word.length < 2) {
continue;
}
if (!wordObj.hasOwnProperty(word)) {
wordObj[word] = 1;
dic.push(word);
}
}
fs.writeFile(dataPath + 'dic.json', JSON.stringify(dic, null, 2) , 'utf-8');