-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
75 lines (59 loc) · 1.94 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'use strict';
;(function() {
/** Used as a safe reference for `undefined` in pre-ES5 environments. */
var undefined;
var root = this;
var has_require = typeof require !== 'undefined';
var Tokenizer = require('node-vntokenizer');
var token = new Tokenizer();
var StopwordFilter = function(options) {
this.options = options || {};
this.language = this.options.language || this.options.lang || 'en';
this.stopwords = [];
this.loadStopwordData(this.language);
}
StopwordFilter.prototype.loadStopwordData = function(lang) {
var lang = lang || this.language || 'en';
if (lang == 'vi' || lang == 'vn') {
this.stopwords = require('vietnamese-stopwords');
} else { // default: en
this.stopwords = require('stopwords').english
}
}
StopwordFilter.prototype.setStopwords = function(data) {
this.stopwords = data;
}
StopwordFilter.prototype.setLanguage = function(lang) {
this.language = lang;
this.loadStopwordData();
}
StopwordFilter.prototype.filter = function(text, outputType) {
var words = text;
if (typeof text === 'string')
words = token.tokenize(text);
else if (typeof text !== 'array')
throw Error("Input string must be String or Array. Current: " + (typeof text));
var outputType = outputType || 'array';
var keywords = [];
for (var i = 0; i < words.length; i++) {
var word = words[i].toLowerCase().trim();
if (this.stopwords.indexOf(word) === -1 && keywords.indexOf(word) === -1) {
keywords.push(word);
}
}
// String output
if (outputType != 'array') {
return keywords.join(' ');
}
return keywords;
}
// Exports
if( typeof exports !== 'undefined' ) {
if( typeof module !== 'undefined' && module.exports ) {
exports = module.exports = StopwordFilter;
}
exports.StopwordFilter = StopwordFilter;
} else {
root.StopwordFilter = StopwordFilter;
}
}.call(this));