-
Notifications
You must be signed in to change notification settings - Fork 655
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #827 from spencermountain/dev
Dev
- Loading branch information
Showing
22 changed files
with
228 additions
and
178 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
"author": "Spencer Kelly <[email protected]> (http://spencermounta.in)", | ||
"name": "compromise", | ||
"description": "modest natural language processing", | ||
"version": "13.10.3", | ||
"version": "13.10.4", | ||
"main": "./builds/compromise.js", | ||
"unpkg": "./builds/compromise.min.js", | ||
"module": "./builds/compromise.mjs", | ||
|
@@ -40,8 +40,10 @@ | |
"coverage:html": "nyc --reporter=html tape \"./tests/**/*.test.js\" | tap-dancer --color always", | ||
"coverage": "nyc -r lcov -n 'src/**/*' -n 'plugins/**/*' npm run test", | ||
"codecov": "npm run coverage && codecov -t 15039ad1-b495-48cd-b4a0-bcf124c9b318", | ||
"perf": "node ./scripts/test/perf/index.js", | ||
"perf:build": "node ./scripts/test/perf/build-speed.js", | ||
"perf": "node ./scripts/perf/index.js", | ||
"perf:build": "TESTENV=prod node ./scripts/perf/index.js", | ||
"perf:versions": "node ./scripts/perf/versions.js", | ||
"flame": "clinic flame -- node ./scripts/perf/flame", | ||
"lint": "eslint ./src/ && eslint ./plugins/**/src/", | ||
"watch": "amble ./scratch.js", | ||
"build:all": "node ./scripts/build/build-all.js && npm run build --silent", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,29 @@ | ||
const nlp = require('./src/index') | ||
nlp.extend(require('./plugins/numbers/src')) | ||
// nlp.extend(require('./plugins/typeahead/src')) | ||
// nlp.extend(require('./plugins/numbers/src')) | ||
// nlp.extend(require('./plugins/dates/src')) | ||
// nlp.extend(require('./plugins/sentences/src')) | ||
// nlp.verbose(true) | ||
// nlp.typeahead({ march: 'Date' }, { min: 1, safe: false }) | ||
// let str = | ||
// '/^(?=d)(?:(?:31(?!.(?:0?[2469]|11))|(?:30|29)(?!.0?2)|29(?=.0?2.(?:(?:(?:1[6-9]|[2-9]d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(?:\x20|$))|(?:2[0-8]|1d|0?[1-9]))([-./])(?:1[012]|0?[1-9])\1(?:1[6-9]|[2-9]d)?dd(?:(?=\x20d)\x20|$))?(((0?[1-9]|1[012])(:[0-5]d){0,2}(\x20[AP]M))|([01]d|2[0-3])(:[0-5]d){1,2})?$/' | ||
// let r = new RegExp(str) | ||
// // console.log(r) | ||
// let res = nlp.parseMatch(`start (one|two|three four)? end`) | ||
// console.log(res) | ||
nlp.extend(require('./plugins/match-runner/src')) | ||
const text = require('/Users/spencer/mountain/compromise/scripts/perf/flame/_sotu-text.js') | ||
|
||
// let doc = nlp.tokenize('16 marc') | ||
// doc.match() | ||
// let list = [ | ||
// // ==== Holiday ==== | ||
// { match: '#Holiday (day|eve)', tag: 'Holiday', reason: 'holiday-day' }, // the captain who | ||
|
||
// const doc = nlp('i was walking') | ||
// const m = doc.normalize({ | ||
// verbs: true, | ||
// }) | ||
// m.debug() | ||
// // ==== WeekDay ==== | ||
// // sun the 5th | ||
// { match: '[sun] the #Ordinal', tag: 'WeekDay', reason: 'sun-the-5th' }, | ||
// //sun feb 2 | ||
// { match: '[sun] #Date', group: 0, tag: 'WeekDay', reason: 'sun-feb' }, | ||
// ] | ||
|
||
// let doc = nlp('no one tunes into their 2nd favourite no-radio station. no lyin!') | ||
// doc.matchRunner(list) | ||
// doc.debug() | ||
// nlp(text) | ||
|
||
// const reg = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\([^\)]+[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/g | ||
|
||
// let str = '(one two) (upto) snooz(et)oDate' | ||
// console.log(str.split(/(\(.*?\))/)) | ||
// console.log(str.split(/(?:^|\s)([\!\[\^]*\(.*?[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/)) | ||
// console.log(str.split(/(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\([^\)]+[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/)) | ||
// console.log(nlp.parseMatch('(snooze|wait|delay|punt|later|sleep) (up to) [<snooze_to>#Date+]')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,4 +18,5 @@ const fetch = function (url) { | |
}) | ||
}) | ||
} | ||
|
||
module.exports = fetch |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
const txt = require('./_sotu-text') | ||
const path = '../../../src' | ||
|
||
console.log('\n-- testing: --') | ||
console.time('load') | ||
const nlp = require(path) | ||
console.timeEnd('load') | ||
|
||
console.time('parse') | ||
let doc = nlp(txt) | ||
console.timeEnd('parse') | ||
|
||
console.time('match') | ||
doc.match('#Noun') | ||
console.timeEnd('match') | ||
console.log('\n v' + nlp.version, '\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
const Pool = require('./pool/pool') | ||
const fetch = require('./_fetch') | ||
|
||
const BASELINE = 92 //node 12 | ||
|
||
if (!process.version.match(/^v12\./)) { | ||
console.warn('Warn: Expecting node v12.x - got ' + process.version) | ||
} | ||
|
||
let docs = [ | ||
'nlp-corpus-1.json', | ||
'nlp-corpus-2.json', | ||
'nlp-corpus-3.json', | ||
'nlp-corpus-4.json', | ||
'nlp-corpus-5.json', | ||
'nlp-corpus-6.json', | ||
'nlp-corpus-7.json', | ||
'nlp-corpus-8.json', | ||
'nlp-corpus-9.json', | ||
'nlp-corpus-10.json', | ||
] | ||
|
||
const fetchAll = function (urls) { | ||
return Promise.all(urls.map(u => fetch(u))).then(res => res.map(texts => texts.join('\n'))) | ||
} | ||
|
||
const diff = function (time) { | ||
let delta = time - BASELINE | ||
let percent = (delta / time) * 100 | ||
percent = Math.round(percent * 10) / 10 | ||
return percent | ||
} | ||
|
||
;(async () => { | ||
let p = new Pool() | ||
let texts = await fetchAll(docs.map(file => `https://unpkg.com/[email protected]/builds/${file}`)) | ||
console.log(`\n\n running ${texts.length} texts on ${p.count()} workers`) | ||
let nums = [] | ||
for (let i = 0; i < texts.length; i += 1) { | ||
console.log(` text #${i + 1} - 🕰`) | ||
let num = await p.do(texts[i]) | ||
nums.push(num) | ||
} | ||
let sum = nums.reduce((h, n) => h + n, 0) | ||
sum = Math.round(sum * 10) / 10 | ||
console.log('\n\n', sum, ' total') | ||
console.log(' +/- ', diff(sum), '% ') | ||
p.close() | ||
})() |
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
if (typeof process !== undefined && typeof module !== undefined) { | ||
let nlp | ||
if (process.env.TESTENV === 'prod') { | ||
console.warn('== production build test 🚀 ==') | ||
nlp = require('../../../builds/compromise.min.js') | ||
} else { | ||
nlp = require('../../../src') | ||
// nlp.extend(require('../plugins/numbers/src')) | ||
} | ||
|
||
module.exports = nlp | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
const { Worker } = require('worker_threads') | ||
const os = require('os') | ||
const cpus = os.cpus().length | ||
|
||
class Pool { | ||
constructor() { | ||
this.workers = [] | ||
for (let i = 0; i < cpus; i += 1) { | ||
this.workers.push(new Worker(__dirname + '/worker.js')) | ||
} | ||
} | ||
do(msg) { | ||
let ps = this.workers.map(w => { | ||
return new Promise(resolve => { | ||
w.on('message', res => { | ||
w.removeAllListeners('message') | ||
resolve(res) | ||
}) | ||
w.postMessage(msg) | ||
}) | ||
}) | ||
return Promise.all(ps).then(nums => { | ||
// console.log(nums) | ||
let avg = nums.reduce((h, n) => h + n, 0) / nums.length | ||
avg = Math.round(avg * 10) / 10 | ||
return avg | ||
}) | ||
} | ||
count() { | ||
return this.workers.length | ||
} | ||
close() { | ||
this.workers.forEach(w => w.terminate()) | ||
} | ||
} | ||
module.exports = Pool | ||
|
||
// let p = new Pool() | ||
// p.do("hey now, you're a rockstar").then(() => p.close()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
const { parentPort } = require('worker_threads') | ||
// const nlp = require('../../src') | ||
let nlp = require('./_lib') | ||
|
||
let matches = [ | ||
'out of range', | ||
'#Person #Person', | ||
'. of the world', | ||
'#Noun+ house', | ||
'range #Noun+', | ||
'doubt . of #Verb', | ||
'(watch|house|#Verb) .', | ||
'(watch|house|#Verb)?', | ||
'(watch a film|eat a cake)+', | ||
'(#Noun of #Noun)+', | ||
'. @hasQuestionMark', | ||
'the .+', | ||
'keep a #Noun', | ||
] | ||
|
||
const doit = async function (txt) { | ||
let doc = nlp(txt) | ||
matches.forEach(reg => { | ||
doc.match(reg).text() | ||
}) | ||
doc.json() | ||
} | ||
|
||
parentPort.on('message', async msg => { | ||
let begin = new Date() | ||
doit(msg) | ||
let end = new Date() | ||
let delta = (end.getTime() - begin.getTime()) / 1000 | ||
parentPort.postMessage(delta) | ||
}) | ||
|
||
// new Promise(async resolve => { | ||
// parentPort.postMessage(r) | ||
// resolve(r) | ||
// }) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.