diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 82ceee2..64021d0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index a4e00f0..a77effd 100644 --- a/.gitignore +++ b/.gitignore @@ -10,12 +10,8 @@ src/python/.idea npm-debug.log.* .idea .coverage -index.js -index.js.map -pyShellType.js -pyShellType.js.map -*.test.js -*.test.js.map +*.js +*.js.map *.d.ts .pytest_cache .vscode/settings.json diff --git a/.vscode/launch.json b/.vscode/launch.json index 4ce0199..99b2347 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -53,8 +53,7 @@ "request": "launch", "program": "${workspaceRoot}/node_modules/mocha/bin/_mocha", "args": ["-r", "ts-node/register", "${relativeFile}","--ui","tdd","--no-timeouts"], - "cwd": "${workspaceRoot}", - "protocol": "inspector" + "cwd": "${workspaceRoot}" } ] } \ No newline at end of file diff --git a/index.ts b/PythonExecutor.ts similarity index 70% rename from index.ts rename to PythonExecutor.ts index 6df75db..50a4a03 100644 --- a/index.ts +++ b/PythonExecutor.ts @@ -1,6 +1,6 @@ import { PythonShell, Options, NewlineTransformer } from 'python-shell' import { EOL } from 'os' -import { Readable } from 'stream' +import { randomBytes } from 'crypto' export interface FrameSummary { _line: string @@ -51,23 +51,32 @@ export interface PythonResult { internalError: string, caller: string, lineno: number, - done: boolean + done: boolean, + startResult: boolean, + evaluatorName: string, } -export class PythonEvaluator { - private static readonly areplPythonBackendFolderPath = __dirname + '/python/' - - /** - * whether python is busy executing inputted code - */ - executing = false +/** + * Starting = Starting or restarting. + * Ending = Process is exiting. + * Executing = Executing inputted code. + * DirtyFree = evaluator may have been polluted by side-effects from previous code, but is free for more code. + * FreshFree = evaluator is ready for the first run of code + */ +export enum PythonState { + Starting, + Ending, + Executing, + DirtyFree, + FreshFree +} - /** - * whether python backend process is running / not running - */ - running = false +export class PythonExecutor { + private static readonly areplPythonBackendFolderPath = __dirname + '/python/' - restarting = false + state: PythonState = PythonState.Starting + finishedStartingCallback: Function + evaluatorName: string private startTime: number /** @@ -98,7 +107,9 @@ export class PythonEvaluator { this.options.mode = 'binary' this.options.stdio = ['pipe', 'pipe', 'pipe', 'pipe'] if (!options.pythonPath) this.options.pythonPath = PythonShell.defaultPythonPath - if (!options.scriptPath) this.options.scriptPath = PythonEvaluator.areplPythonBackendFolderPath + if (!options.scriptPath) this.options.scriptPath = PythonExecutor.areplPythonBackendFolderPath + + this.evaluatorName = randomBytes(16).toString('hex') } @@ -106,8 +117,11 @@ export class PythonEvaluator { * does not do anything if program is currently executing code */ execCode(code: ExecArgs) { - if (this.executing) return - this.executing = true + if (this.state == PythonState.Executing){ + console.error('Incoming code detected while process is still executing. \ + This should never happen') + } + this.state = PythonState.Executing this.startTime = Date.now() this.pyshell.send(JSON.stringify(code) + EOL) } @@ -125,64 +139,58 @@ export class PythonEvaluator { */ restart(callback = () => { }) { - this.restarting = false + this.state = PythonState.Ending // register callback for restart // using childProcess callback instead of pyshell callback // (pyshell callback only happens when process exits voluntarily) this.pyshell.childProcess.on('exit', () => { - this.restarting = true - this.executing = false - this.start() - callback() + this.start(callback) }) this.stop() } /** - * kills python process. force-kills if necessary after 50ms. - * you can check python_evaluator.running to see if process is dead yet + * Kills python process. Force-kills if necessary after 50ms. + * You can check python_evaluator.running to see if process is dead yet */ - stop() { - // pyshell has 50 ms to die gracefully - this.pyshell.childProcess.kill() - this.running = !this.pyshell.childProcess.killed - if (this.running) console.info("pyshell refused to die") - else this.executing = false - - setTimeout(() => { - if (this.running && !this.restarting) { - // murder the process with extreme prejudice - this.pyshell.childProcess.kill('SIGKILL') - if (this.pyshell.childProcess.killed) { - console.error("the python process simply cannot be killed!") + stop(kill_immediately=false) { + this.state = PythonState.Ending + const kill_signal = kill_immediately ? 'SIGKILL' : 'SIGTERM' + this.pyshell.childProcess.kill(kill_signal) + + if(!kill_immediately){ + // pyshell has 50 ms to die gracefully + setTimeout(() => { + if (this.state == PythonState.Ending) { + // python didn't respect the SIGTERM, force-kill it + this.pyshell.childProcess.kill('SIGKILL') } - else this.executing = false - } - }, 50) + }, 50) + } } /** * starts python_evaluator.py. Will NOT WORK with python 2 */ - start() { + start(finishedStartingCallback) { + this.state = PythonState.Starting console.log("Starting Python...") + this.finishedStartingCallback = finishedStartingCallback + this.startTime = Date.now() this.pyshell = new PythonShell('arepl_python_evaluator.py', this.options) const resultPipe = this.pyshell.childProcess.stdio[3] const newlineTransformer = new NewlineTransformer() resultPipe.pipe(newlineTransformer).on('data', this.handleResult.bind(this)) - // not sure why exactly I have to wrap onPrint/onStderr w/ lambda - // but tests fail if I don't this.pyshell.stdout.on('data', (message: Buffer) => { this.onPrint(message.toString()) }) this.pyshell.stderr.on('data', (log: Buffer) => { this.onStderr(log.toString()) }) - this.running = true } /** @@ -220,12 +228,22 @@ export class PythonEvaluator { internalError: "", caller: "", lineno: -1, - done: true + done: true, + startResult: false, + evaluatorName: this.evaluatorName } try { pyResult = JSON.parse(results) - this.executing = !pyResult['done'] + if(pyResult.startResult){ + console.log(`Finished starting in ${Date.now() - this.startTime}`) + this.state = PythonState.FreshFree + this.finishedStartingCallback() + return + } + if(pyResult['done'] == true){ + this.state = PythonState.DirtyFree + } pyResult.execTime = pyResult.execTime * 1000 // convert into ms pyResult.totalPyTime = pyResult.totalPyTime * 1000 @@ -258,40 +276,14 @@ export class PythonEvaluator { return PythonShell.checkSyntax(code); } - /** - * checks syntax without executing code - * @param {string} filePath - * @returns {Promise} rejects w/ stderr if syntax failure - */ - async checkSyntaxFile(filePath: string) { - // note that this should really be done in python_evaluator.py - // but communication with that happens through just one channel (stdin/stdout) - // so for now i prefer to keep this seperate - - return PythonShell.checkSyntaxFile(filePath); - } - /** * gets rid of unnecessary File "" message in exception * @example err: * Traceback (most recent call last):\n File "", line 1, in \nNameError: name \'x\' is not defined\n */ - formatPythonException(err: string) { + private formatPythonException(err: string) { //replace File "" (pointless) err = err.replace(/File \"\", /g, "") return err } - - /** - * delays execution of function by ms milliseconds, resetting clock every time it is called - * Useful for real-time execution so execCode doesn't get called too often - * thanks to https://stackoverflow.com/a/1909508/6629672 - */ - debounce = (function () { - let timer: any = 0; - return function (callback, ms: number, ...args: any[]) { - clearTimeout(timer); - timer = setTimeout(callback, ms, args); - }; - })(); } diff --git a/README.md b/README.md index a774e73..3bda6ea 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Although it is meant for AREPL, it is not dependent upon AREPL and can be used b > npm install [arepl-backend](https://www.npmjs.com/package/arepl-backend) -must have python 3.7 or greater +Must have python 3.7 or greater ## Usage @@ -35,11 +35,9 @@ Semantic release cheatsheet: #### Table of Contents +* [PythonState](#pythonstate) * [constructor](#constructor) * [Parameters](#parameters) -* [executing](#executing) -* [running](#running) -* [debounce](#debounce) * [execCode](#execcode) * [Parameters](#parameters-1) * [sendStdin](#sendstdin) @@ -47,23 +45,31 @@ Semantic release cheatsheet: * [restart](#restart) * [Parameters](#parameters-3) * [stop](#stop) -* [start](#start) -* [onResult](#onresult) * [Parameters](#parameters-4) -* [onPrint](#onprint) +* [start](#start) * [Parameters](#parameters-5) -* [onStderr](#onstderr) +* [onResult](#onresult) * [Parameters](#parameters-6) -* [handleResult](#handleresult) +* [onPrint](#onprint) * [Parameters](#parameters-7) -* [checkSyntax](#checksyntax) +* [onStderr](#onstderr) * [Parameters](#parameters-8) -* [checkSyntaxFile](#checksyntaxfile) +* [handleResult](#handleresult) * [Parameters](#parameters-9) -* [formatPythonException](#formatpythonexception) +* [checkSyntax](#checksyntax) * [Parameters](#parameters-10) +* [formatPythonException](#formatpythonexception) + * [Parameters](#parameters-11) * [Examples](#examples) +### PythonState + +Starting = Starting or restarting. +Ending = Process is exiting. +Executing = Executing inputted code. +DirtyFree = evaluator may have been polluted by side-effects from previous code, but is free for more code. +FreshFree = evaluator is ready for the first run of code + ### constructor starts python\_evaluator.py @@ -72,20 +78,6 @@ starts python\_evaluator.py * `options` Process / Python options. If not specified sensible defaults are inferred. (optional, default `{}`) -### executing - -whether python is busy executing inputted code - -### running - -whether python backend process is running / not running - -### debounce - -delays execution of function by ms milliseconds, resetting clock every time it is called -Useful for real-time execution so execCode doesn't get called too often -thanks to - ### execCode does not do anything if program is currently executing code @@ -111,13 +103,21 @@ After process restarts the callback passed in is invoked ### stop -kills python process. force-kills if necessary after 50ms. -you can check python\_evaluator.running to see if process is dead yet +Kills python process. Force-kills if necessary after 50ms. +You can check python\_evaluator.running to see if process is dead yet + +#### Parameters + +* `kill_immediately` (optional, default `false`) ### start starts python\_evaluator.py. Will NOT WORK with python 2 +#### Parameters + +* `finishedStartingCallback` + ### onResult Overwrite this with your own handler. @@ -163,16 +163,6 @@ checks syntax without executing code Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)** rejects w/ stderr if syntax failure -### checkSyntaxFile - -checks syntax without executing code - -#### Parameters - -* `filePath` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** - -Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)** rejects w/ stderr if syntax failure - ### formatPythonException gets rid of unnecessary File "" message in exception diff --git a/package-lock.json b/package-lock.json index c5f751e..73fd14d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,14 +12,14 @@ "python-shell": "^5.0.0" }, "devDependencies": { - "@types/mocha": "^9.1.1", + "@types/mocha": "^10.0.1", "@types/node": "^12.11.7", "documentation": "^14.0.1", - "mocha": "^10.0.0", + "mocha": "^10.2.0", "mocha-appveyor-reporter": "^0.4.0", "semantic-release": "^19.0.3", - "ts-node": "^8.6.2", - "typescript": "^3.6.3" + "ts-node": "^10.9.1", + "typescript": "^5.0.4" } }, "node_modules/@ampproject/remapping": { @@ -387,6 +387,28 @@ "node": ">=0.1.90" } }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dev": true, + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@cspotcode/source-map-support/node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dev": true, + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, "node_modules/@jridgewell/gen-mapping": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.1.1.tgz", @@ -965,6 +987,30 @@ "node": ">= 10" } }, + "node_modules/@tsconfig/node10": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz", + "integrity": "sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA==", + "dev": true + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", + "dev": true + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", + "dev": true + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.3.tgz", + "integrity": "sha512-yOlFc+7UtL/89t2ZhjPvvB/DeAr3r+Dq58IgzsFkOAvVC6NMJXmCGjbptdXdR9qsX7pKcTL+s87FtYREi2dEEQ==", + "dev": true + }, "node_modules/@types/debug": { "version": "4.1.7", "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.7.tgz", @@ -1005,9 +1051,9 @@ "dev": true }, "node_modules/@types/mocha": { - "version": "9.1.1", - "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-9.1.1.tgz", - "integrity": "sha512-Z61JK7DKDtdKTWwLeElSEBcWGRLY8g95ic5FoQqI9CMx0ns/Ghep3B4DfcEimiKMvtamNVULVNKEsiwV3aQmXw==", + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.1.tgz", + "integrity": "sha512-/fvYntiO1GeICvqbQ3doGDIP97vWmvFt83GKguJ6prmQM2iXZfFcq6YE8KteFyRtX2/h5Hf91BYvPodJKFYv5Q==", "dev": true }, "node_modules/@types/ms": { @@ -1058,12 +1104,6 @@ "integrity": "sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ==", "dev": true }, - "node_modules/@ungap/promise-all-settled": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz", - "integrity": "sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q==", - "dev": true - }, "node_modules/@vue/compiler-core": { "version": "3.2.47", "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.2.47.tgz", @@ -1139,6 +1179,27 @@ "dev": true, "optional": true }, + "node_modules/acorn": { + "version": "8.8.2", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz", + "integrity": "sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw==", + "dev": true, + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz", + "integrity": "sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA==", + "dev": true, + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/agent-base": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", @@ -1427,12 +1488,6 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, - "node_modules/buffer-from": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", - "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", - "dev": true - }, "node_modules/callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -1845,6 +1900,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", + "dev": true + }, "node_modules/cross-spawn": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", @@ -5062,12 +5123,11 @@ } }, "node_modules/mocha": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.0.0.tgz", - "integrity": "sha512-0Wl+elVUD43Y0BqPZBzZt8Tnkw9CMUdNYnUsTfOM1vuhJVZL+kiesFYsqwBkEEuEixaiPe5ZQdqDgX2jddhmoA==", + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.2.0.tgz", + "integrity": "sha512-IDY7fl/BecMwFHzoqF2sg/SHHANeBoMMXFlS9r0OXKDssYE1M5O43wUY/9BVPeIvfH2zmEbBfseqN9gBQZzXkg==", "dev": true, "dependencies": { - "@ungap/promise-all-settled": "1.1.2", "ansi-colors": "4.1.1", "browser-stdout": "1.3.1", "chokidar": "3.5.3", @@ -9249,16 +9309,6 @@ "node": ">=0.10.0" } }, - "node_modules/source-map-support": { - "version": "0.5.9", - "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz", - "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==", - "dev": true, - "dependencies": { - "buffer-from": "^1.0.0", - "source-map": "^0.6.0" - } - }, "node_modules/sourcemap-codec": { "version": "1.4.8", "resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz", @@ -9672,23 +9722,46 @@ } }, "node_modules/ts-node": { - "version": "8.6.2", - "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-8.6.2.tgz", - "integrity": "sha512-4mZEbofxGqLL2RImpe3zMJukvEvcO1XP8bj8ozBPySdCUXEcU5cIRwR0aM3R+VoZq7iXc8N86NC0FspGRqP4gg==", - "dev": true, - "dependencies": { + "version": "10.9.1", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz", + "integrity": "sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==", + "dev": true, + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", "arg": "^4.1.0", + "create-require": "^1.1.0", "diff": "^4.0.1", "make-error": "^1.1.1", - "source-map-support": "^0.5.6", + "v8-compile-cache-lib": "^3.0.1", "yn": "3.1.1" }, "bin": { "ts-node": "dist/bin.js", - "ts-script": "dist/script.js" + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" }, - "engines": { - "node": ">=6.0.0" + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } } }, "node_modules/ts-node/node_modules/diff": { @@ -9731,16 +9804,16 @@ } }, "node_modules/typescript": { - "version": "3.6.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.6.4.tgz", - "integrity": "sha512-unoCll1+l+YK4i4F8f22TaNVPRHcD9PA3yCuZ8g5e0qGqlVlJ/8FSateOLLSagn+Yg5+ZwuPkL8LFUc0Jcvksg==", + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.4.tgz", + "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==", "dev": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" }, "engines": { - "node": ">=4.2.0" + "node": ">=12.20" } }, "node_modules/uglify-js": { @@ -9995,6 +10068,12 @@ "node": ">=8" } }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", + "dev": true + }, "node_modules/validate-npm-package-license": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", @@ -10726,6 +10805,27 @@ "dev": true, "optional": true }, + "@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dev": true, + "requires": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "dependencies": { + "@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dev": true, + "requires": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + } + } + }, "@jridgewell/gen-mapping": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.1.1.tgz", @@ -11187,6 +11287,30 @@ "integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==", "dev": true }, + "@tsconfig/node10": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz", + "integrity": "sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA==", + "dev": true + }, + "@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", + "dev": true + }, + "@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", + "dev": true + }, + "@tsconfig/node16": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.3.tgz", + "integrity": "sha512-yOlFc+7UtL/89t2ZhjPvvB/DeAr3r+Dq58IgzsFkOAvVC6NMJXmCGjbptdXdR9qsX7pKcTL+s87FtYREi2dEEQ==", + "dev": true + }, "@types/debug": { "version": "4.1.7", "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.7.tgz", @@ -11227,9 +11351,9 @@ "dev": true }, "@types/mocha": { - "version": "9.1.1", - "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-9.1.1.tgz", - "integrity": "sha512-Z61JK7DKDtdKTWwLeElSEBcWGRLY8g95ic5FoQqI9CMx0ns/Ghep3B4DfcEimiKMvtamNVULVNKEsiwV3aQmXw==", + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.1.tgz", + "integrity": "sha512-/fvYntiO1GeICvqbQ3doGDIP97vWmvFt83GKguJ6prmQM2iXZfFcq6YE8KteFyRtX2/h5Hf91BYvPodJKFYv5Q==", "dev": true }, "@types/ms": { @@ -11280,12 +11404,6 @@ "integrity": "sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ==", "dev": true }, - "@ungap/promise-all-settled": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz", - "integrity": "sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q==", - "dev": true - }, "@vue/compiler-core": { "version": "3.2.47", "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.2.47.tgz", @@ -11361,6 +11479,18 @@ "dev": true, "optional": true }, + "acorn": { + "version": "8.8.2", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz", + "integrity": "sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw==", + "dev": true + }, + "acorn-walk": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz", + "integrity": "sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA==", + "dev": true + }, "agent-base": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", @@ -11583,12 +11713,6 @@ "update-browserslist-db": "^1.0.10" } }, - "buffer-from": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", - "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", - "dev": true - }, "callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -11898,6 +12022,12 @@ } } }, + "create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", + "dev": true + }, "cross-spawn": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", @@ -14237,12 +14367,11 @@ } }, "mocha": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.0.0.tgz", - "integrity": "sha512-0Wl+elVUD43Y0BqPZBzZt8Tnkw9CMUdNYnUsTfOM1vuhJVZL+kiesFYsqwBkEEuEixaiPe5ZQdqDgX2jddhmoA==", + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.2.0.tgz", + "integrity": "sha512-IDY7fl/BecMwFHzoqF2sg/SHHANeBoMMXFlS9r0OXKDssYE1M5O43wUY/9BVPeIvfH2zmEbBfseqN9gBQZzXkg==", "dev": true, "requires": { - "@ungap/promise-all-settled": "1.1.2", "ansi-colors": "4.1.1", "browser-stdout": "1.3.1", "chokidar": "3.5.3", @@ -17222,16 +17351,6 @@ "dev": true, "optional": true }, - "source-map-support": { - "version": "0.5.9", - "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz", - "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==", - "dev": true, - "requires": { - "buffer-from": "^1.0.0", - "source-map": "^0.6.0" - } - }, "sourcemap-codec": { "version": "1.4.8", "resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz", @@ -17554,15 +17673,23 @@ "dev": true }, "ts-node": { - "version": "8.6.2", - "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-8.6.2.tgz", - "integrity": "sha512-4mZEbofxGqLL2RImpe3zMJukvEvcO1XP8bj8ozBPySdCUXEcU5cIRwR0aM3R+VoZq7iXc8N86NC0FspGRqP4gg==", - "dev": true, - "requires": { + "version": "10.9.1", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz", + "integrity": "sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==", + "dev": true, + "requires": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", "arg": "^4.1.0", + "create-require": "^1.1.0", "diff": "^4.0.1", "make-error": "^1.1.1", - "source-map-support": "^0.5.6", + "v8-compile-cache-lib": "^3.0.1", "yn": "3.1.1" }, "dependencies": { @@ -17596,9 +17723,9 @@ "dev": true }, "typescript": { - "version": "3.6.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.6.4.tgz", - "integrity": "sha512-unoCll1+l+YK4i4F8f22TaNVPRHcD9PA3yCuZ8g5e0qGqlVlJ/8FSateOLLSagn+Yg5+ZwuPkL8LFUc0Jcvksg==", + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.4.tgz", + "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==", "dev": true }, "uglify-js": { @@ -17775,6 +17902,12 @@ "sade": "^1.7.3" } }, + "v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", + "dev": true + }, "validate-npm-package-license": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", diff --git a/package.json b/package.json index 95525cf..e0708ee 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "arepl-backend", "version": "1.3.15", "description": "JS interface to python evaluator for AREPL", - "main": "index.js", + "main": "PythonExecutors.js", "scripts": { "compile": "tsc -watch -p ./", "compileOnce": "tsc", @@ -34,14 +34,14 @@ "author": "Almenon", "license": "MIT", "devDependencies": { - "@types/mocha": "^9.1.1", + "@types/mocha": "^10.0.1", "@types/node": "^12.11.7", "documentation": "^14.0.1", - "mocha": "^10.0.0", + "mocha": "^10.2.0", "mocha-appveyor-reporter": "^0.4.0", "semantic-release": "^19.0.3", - "ts-node": "^8.6.2", - "typescript": "^3.6.3" + "ts-node": "^10.9.1", + "typescript": "^5.0.4" }, "dependencies": { "python-shell": "^5.0.0" diff --git a/python/arepl_jsonpickle/__init__.py b/python/arepl_jsonpickle/__init__.py index c5b22a4..c438125 100644 --- a/python/arepl_jsonpickle/__init__.py +++ b/python/arepl_jsonpickle/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # Copyright (C) 2008 John Paulett (john -at- paulett.org) # Copyright (C) 2009, 2011, 2013 David Aguilar (davvid -at- gmail.com) # All rights reserved. @@ -67,8 +65,6 @@ def __init__(self, name): that contain non-string dictionary keys. """ -from __future__ import absolute_import, division, unicode_literals - # Export other names not in __all__ from .backend import JSONBackend # noqa: F401 from .backend import json diff --git a/python/arepl_jsonpickle/backend.py b/python/arepl_jsonpickle/backend.py index 692d319..f44d5bb 100644 --- a/python/arepl_jsonpickle/backend.py +++ b/python/arepl_jsonpickle/backend.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import, division, unicode_literals - from .compat import string_types -class JSONBackend(object): +class JSONBackend: """Manages encoding and decoding using various backends. It tries these modules in this order: @@ -18,11 +16,7 @@ def _verify(self): """Ensures that we've loaded at least one JSON backend.""" if self._verified: return - raise AssertionError( - 'jsonpickle requires at least one of the ' - 'following:\n' - ' python2.6, simplejson' - ) + raise AssertionError('jsonpickle could not load any json modules') def encode(self, obj, indent=None, separators=None): """ @@ -101,6 +95,9 @@ def __init__(self, fallthrough=True): self.load_backend('simplejson') self.load_backend('json') self.load_backend('ujson') + self.load_backend( + 'yaml', dumps='dump', loads='safe_load', loads_exc='YAMLError' + ) # Defaults for various encoders json_opts = ((), {'sort_keys': False}) @@ -137,7 +134,6 @@ def _store(self, dct, backend, obj, name): return True def load_backend(self, name, dumps='dumps', loads='loads', loads_exc=ValueError): - """Load a JSON backend by name. This method loads a backend and sets up references to that diff --git a/python/arepl_jsonpickle/compat.py b/python/arepl_jsonpickle/compat.py index 320c64d..e971aeb 100644 --- a/python/arepl_jsonpickle/compat.py +++ b/python/arepl_jsonpickle/compat.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import queue # noqa import sys from collections.abc import Iterator as abc_iterator # noqa diff --git a/python/arepl_jsonpickle/ext/gmpy.py b/python/arepl_jsonpickle/ext/gmpy.py index d42048d..bc5e301 100644 --- a/python/arepl_jsonpickle/ext/gmpy.py +++ b/python/arepl_jsonpickle/ext/gmpy.py @@ -1,4 +1,7 @@ -import gmpy2 as gmpy +try: + import gmpy2 as gmpy +except ImportError: + gmpy = None from ..handlers import BaseHandler, register, unregister @@ -15,8 +18,10 @@ def restore(self, data): def register_handlers(): - register(gmpy.mpz, GmpyMPZHandler, base=True) + if gmpy is not None: + register(gmpy.mpz, GmpyMPZHandler, base=True) def unregister_handlers(): - unregister(gmpy.mpz) + if gmpy is not None: + unregister(gmpy.mpz) diff --git a/python/arepl_jsonpickle/ext/numpy.py b/python/arepl_jsonpickle/ext/numpy.py index af2bf71..b1e9df2 100644 --- a/python/arepl_jsonpickle/ext/numpy.py +++ b/python/arepl_jsonpickle/ext/numpy.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import ast import json import sys @@ -149,7 +147,7 @@ def flatten(self, obj, data): """encode numpy to json""" if self.size_threshold is None or self.size_threshold >= obj.size: # encode as text - data = super(NumpyNDArrayHandlerBinary, self).flatten(obj, data) + data = super().flatten(obj, data) else: # encode as binary if obj.dtype == object: @@ -192,7 +190,7 @@ def restore(self, data): values = data['values'] if isinstance(values, list): # decode text representation - arr = super(NumpyNDArrayHandlerBinary, self).restore(data) + arr = super().restore(data) elif isinstance(values, numeric_types): # single-value array arr = np.array([values], dtype=self.restore_dtype(data)) @@ -258,7 +256,7 @@ def __init__(self, mode='warn', size_threshold=16, compression=zlib): valid values for 'compression' are {zlib, bz2, None} if compression is None, no compression is applied """ - super(NumpyNDArrayHandlerView, self).__init__(size_threshold, compression) + super().__init__(size_threshold, compression) self.mode = mode def flatten(self, obj, data): @@ -266,7 +264,7 @@ def flatten(self, obj, data): base = obj.base if base is None and obj.flags.forc: # store by value - data = super(NumpyNDArrayHandlerView, self).flatten(obj, data) + data = super().flatten(obj, data) # ensure that views on arrays stored as text # are interpreted correctly if not obj.flags.c_contiguous: @@ -311,7 +309,7 @@ def flatten(self, obj, data): "not know how to serialize." ) raise ValueError(msg) - data = super(NumpyNDArrayHandlerView, self).flatten(obj.copy(), data) + data = super().flatten(obj.copy(), data) return data @@ -320,7 +318,7 @@ def restore(self, data): base = data.get('base', None) if base is None: # decode array with owndata=True - arr = super(NumpyNDArrayHandlerView, self).restore(data) + arr = super().restore(data) else: # decode array view, which references the data of another array base = self.context.restore(base, reset=False) @@ -344,10 +342,25 @@ def restore(self, data): return arr -def register_handlers(): +def register_handlers( + ndarray_mode='warn', + ndarray_size_threshold=16, + ndarray_compression=zlib, +): + """Register handlers for numpy types + + :param ndarray_abc_xyz: Forward constructor arguments to NumpyNDArrayHandlerView. + Options with an 'ndarray_' prefix correspond to the same-named + NumpyNDArrayHandlerView constructor options, sans the 'ndarray_' prefix. + """ + ndarray_handler = NumpyNDArrayHandlerView( + mode=ndarray_mode, + size_threshold=ndarray_size_threshold, + compression=ndarray_compression, + ) + register(np.ndarray, ndarray_handler, base=True) register(np.dtype, NumpyDTypeHandler, base=True) register(np.generic, NumpyGenericHandler, base=True) - register(np.ndarray, NumpyNDArrayHandlerView(), base=True) # Numpy 1.20 has custom dtypes that must be registered separately. register(np.dtype(np.void).__class__, NumpyDTypeHandler, base=True) register(np.dtype(np.float32).__class__, NumpyDTypeHandler, base=True) @@ -356,6 +369,7 @@ def register_handlers(): def unregister_handlers(): + """Remove numpy handlers from the handler registry""" unregister(np.dtype) unregister(np.generic) unregister(np.ndarray) diff --git a/python/arepl_jsonpickle/ext/pandas.py b/python/arepl_jsonpickle/ext/pandas.py index af7538b..64135ed 100644 --- a/python/arepl_jsonpickle/ext/pandas.py +++ b/python/arepl_jsonpickle/ext/pandas.py @@ -1,12 +1,13 @@ -from __future__ import absolute_import - +import warnings import zlib from io import StringIO +import numpy as np import pandas as pd from .. import decode, encode from ..handlers import BaseHandler, register, unregister +from ..tags_pd import REVERSE_TYPE_MAP, TYPE_MAP from ..util import b64decode, b64encode from .numpy import register_handlers as register_numpy_handlers from .numpy import unregister_handlers as unregister_numpy_handlers @@ -14,7 +15,51 @@ __all__ = ['register_handlers', 'unregister_handlers'] -class PandasProcessor(object): +def pd_encode(obj, **kwargs): + if isinstance(obj, np.generic): + # convert pandas/numpy scalar to native Python type + return obj.item() + return encode(obj, **kwargs) + + +def pd_decode(s, **kwargs): + return decode(s, **kwargs) + + +def rle_encode(types_list): + """ + Encodes a list of type codes using Run-Length Encoding (RLE). This allows for object columns in dataframes to contain items of different types without massively bloating the encoded representation. + """ + if not types_list: + return [] + + encoded = [] + current_type = types_list[0] + count = 1 + + for typ in types_list[1:]: + if typ == current_type: + count += 1 + else: + encoded.append([current_type, count]) + current_type = typ + count = 1 + encoded.append([current_type, count]) + + return encoded + + +def rle_decode(encoded_list): + """ + Decodes a Run-Length Encoded (RLE) list back into the original list of type codes. + """ + decoded = [] + for typ, count in encoded_list: + decoded.extend([typ] * count) + return decoded + + +class PandasProcessor: def __init__(self, size_threshold=500, compression=zlib): """ :param size_threshold: nonnegative int or None @@ -62,10 +107,13 @@ def make_read_csv_params(meta, context): parse_dates = [] converters = {} timedeltas = [] + # this is only for pandas v2+ due to a backwards-incompatible change + parse_datetime_v2 = {} dtype = {} for k, v in meta_dtypes.items(): if v.startswith('datetime'): parse_dates.append(k) + parse_datetime_v2[k] = v elif v.startswith('complex'): converters[k] = complex elif v.startswith('timedelta'): @@ -79,6 +127,7 @@ def make_read_csv_params(meta, context): dtype=dtype, header=header, parse_dates=parse_dates, converters=converters ), timedeltas, + parse_datetime_v2, ) @@ -86,37 +135,163 @@ class PandasDfHandler(BaseHandler): pp = PandasProcessor() def flatten(self, obj, data): - dtype = obj.dtypes.to_dict() - + pp = PandasProcessor() + # handle multiindex columns + if isinstance(obj.columns, pd.MultiIndex): + columns = [tuple(col) for col in obj.columns] + column_names = obj.columns.names + is_multicolumns = True + else: + columns = obj.columns.tolist() + column_names = obj.columns.name + is_multicolumns = False + + # handle multiindex index + if isinstance(obj.index, pd.MultiIndex): + index_values = [tuple(idx) for idx in obj.index.values] + index_names = obj.index.names + is_multiindex = True + else: + index_values = obj.index.tolist() + index_names = obj.index.name + is_multiindex = False + + data_columns = {} + type_codes = [] + for col in obj.columns: + col_data = obj[col] + dtype_name = col_data.dtype.name + + if dtype_name == "object": + # check if items are complex types + if col_data.apply( + lambda x: isinstance(x, (list, dict, set, tuple, np.ndarray)) + ).any(): + # if items are complex, erialize each item individually + serialized_values = col_data.apply(lambda x: encode(x)).tolist() + data_columns[col] = serialized_values + type_codes.append("py/jp") + else: + # treat it as regular object dtype + data_columns[col] = col_data.tolist() + type_codes.append(TYPE_MAP.get(dtype_name, "object")) + else: + # for other dtypes, store their values directly + data_columns[col] = col_data.tolist() + type_codes.append(TYPE_MAP.get(dtype_name, "object")) + + # store index data + index_encoded = encode(index_values, keys=True) + + rle_types = rle_encode(type_codes) + # prepare metadata meta = { - 'dtypes': self.context.flatten( - {k: str(dtype[k]) for k in dtype}, reset=False - ), - 'index': encode(obj.index), - 'column_level_names': obj.columns.names, - 'header': list(range(len(obj.columns.names))), + "dtypes_rle": rle_types, + "index": index_encoded, + "index_names": index_names, + "columns": encode(columns, keys=True), + "column_names": column_names, + "is_multiindex": is_multiindex, + "is_multicolumns": is_multicolumns, } - data = self.pp.flatten_pandas( - obj.reset_index(drop=True).to_csv(index=False), data, meta - ) + # serialize data_columns with keys=True to allow for non-object keys + data_encoded = encode(data_columns, keys=True) + + # use PandasProcessor to flatten + data = pp.flatten_pandas(data_encoded, data, meta) return data - def restore(self, data): + def restore(self, obj): + data_encoded, meta = self.pp.restore_pandas(obj) + + data_columns = decode(data_encoded, keys=True) + + # get type codes, un-RLE-ed + try: + rle_types = meta["dtypes_rle"] + except KeyError: + # was encoded with pre-v3.4 scheme + return self.restore_v3_3(obj) + type_codes = rle_decode(rle_types) + + # handle multicolumns + columns_decoded = decode(meta["columns"], keys=True) + if meta.get("is_multicolumns", False): + columns = pd.MultiIndex.from_tuples( + columns_decoded, names=meta.get("column_names") + ) + else: + columns = columns_decoded + + # progressively reconstruct dataframe as a dict + df_data = {} + dtypes = {} + for col, type_code in zip(columns, type_codes): + col_data = data_columns[col] + if type_code == "py/jp": + # deserialize each item in the column + col_values = [decode(item) for item in col_data] + df_data[col] = col_values + else: + df_data[col] = col_data + # used later to get correct dtypes + dtype_str = REVERSE_TYPE_MAP.get(type_code, "object") + dtypes[col] = dtype_str + + # turn dict into df + df = pd.DataFrame(df_data) + df.columns = columns + + # apply dtypes + for col in df.columns: + dtype_str = dtypes.get(col, "object") + try: + dtype = np.dtype(dtype_str) + df[col] = df[col].astype(dtype) + except Exception: + msg = ( + f"jsonpickle was unable to properly deserialize " + f"the column {col} into its inferred dtype. " + f"Please file a bugreport on the jsonpickle GitHub! " + ) + warnings.warn(msg) + + # decode and set the index + index_values = decode(meta["index"], keys=True) + if meta.get("is_multiindex", False): + index = pd.MultiIndex.from_tuples( + index_values, names=meta.get("index_names") + ) + else: + index = pd.Index(index_values, name=meta.get("index_names")) + df.index = index + + # restore column names for easy readability + if "column_names" in meta: + if meta.get("is_multicolumns", False): + df.columns.names = meta.get("column_names") + else: + df.columns.name = meta.get("column_names") + + return df + + def restore_v3_3(self, data): csv, meta = self.pp.restore_pandas(data) - params, timedeltas = make_read_csv_params(meta, self.context) + params, timedeltas, parse_datetime_v2 = make_read_csv_params(meta, self.context) # None makes it compatible with objects serialized before # column_levels_names has been introduced. - column_level_names = meta.get('column_level_names', None) + column_level_names = meta.get("column_level_names", None) df = ( pd.read_csv(StringIO(csv), **params) - if data['values'].strip() + if data["values"].strip() else pd.DataFrame() ) for col in timedeltas: df[col] = pd.to_timedelta(df[col]) + df = df.astype(parse_datetime_v2) - df.set_index(decode(meta['index']), inplace=True) + df.set_index(decode(meta["index"]), inplace=True) # restore the column level(s) name(s) if column_level_names: df.columns.names = column_level_names @@ -143,7 +318,6 @@ def restore(self, data): class PandasIndexHandler(BaseHandler): - pp = PandasProcessor() index_constructor = pd.Index diff --git a/python/arepl_jsonpickle/handlers.py b/python/arepl_jsonpickle/handlers.py index d06e3ed..395e253 100644 --- a/python/arepl_jsonpickle/handlers.py +++ b/python/arepl_jsonpickle/handlers.py @@ -7,7 +7,6 @@ :func:`jsonpickle.handlers.register`. """ -from __future__ import absolute_import, division, unicode_literals import array import copy @@ -21,7 +20,7 @@ from . import compat, util -class Registry(object): +class Registry: def __init__(self): self._handlers = {} self._base_handlers = {} @@ -69,7 +68,7 @@ def _register(handler_cls): return _register if not util.is_type(cls): - raise TypeError('{!r} is not a class/type'.format(cls)) + raise TypeError(f'{cls!r} is not a class/type') # store both the name and the actual type for the ugly cases like # _sre.SRE_Pattern that cannot be loaded back directly self._handlers[util.importable_name(cls)] = self._handlers[cls] = handler @@ -89,7 +88,7 @@ def unregister(self, cls): get = registry.get -class BaseHandler(object): +class BaseHandler: def __init__(self, context): """ Initialize a new handler to handle a registered type. @@ -162,7 +161,6 @@ def restore(self, data): class DatetimeHandler(BaseHandler): - """Custom handler for datetime objects Datetime objects use __reduce__, and they generate binary strings encoding @@ -233,7 +231,7 @@ def restore(self, data): QueueHandler.handles(compat.queue.Queue) -class CloneFactory(object): +class CloneFactory: """Serialization proxy for collections.defaultdict's default_factory""" def __init__(self, exemplar): @@ -244,7 +242,7 @@ def __call__(self, clone=copy.copy): return clone(self.exemplar) def __repr__(self): - return ''.format(id(self), self.exemplar) + return f'' class UUIDHandler(BaseHandler): diff --git a/python/arepl_jsonpickle/pickler.py b/python/arepl_jsonpickle/pickler.py index 32c099e..125e329 100644 --- a/python/arepl_jsonpickle/pickler.py +++ b/python/arepl_jsonpickle/pickler.py @@ -1,11 +1,9 @@ # Copyright (C) 2008 John Paulett (john -at- paulett.org) -# Copyright (C) 2009-2018 David Aguilar (davvid -at- gmail.com) +# Copyright (C) 2009-2024 David Aguilar (davvid -at- gmail.com) # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. -from __future__ import absolute_import, division, unicode_literals - import decimal import inspect import itertools @@ -36,6 +34,7 @@ def encode( indent=None, separators=None, include_properties=False, + handle_readonly=False, ): """Return a JSON formatted representation of value, a Python object. @@ -47,13 +46,13 @@ def encode( objects to be equal by ``==``, such as when serializing sklearn instances. If you experience (de)serialization being incorrect when you use numpy, pandas, or sklearn handlers, this should be set to ``False``. - If you want the output to not include the dtype for numpy arrays, add - ``jsonpickle.register(numpy.generic, - UnpicklableNumpyGenericHandler, base=True)`` before your pickling code. - :param max_depth: If set to a non-negative integer then jsonpickle will - not recurse deeper than 'max_depth' steps into the object. Anything - deeper than 'max_depth' is represented using a Python repr() of the - object. + If you want the output to not include the dtype for numpy arrays, add:: + + jsonpickle.register( + numpy.generic, UnpicklableNumpyGenericHandler, base=True + ) + + before your pickling code. :param make_refs: If set to False jsonpickle's referencing support is disabled. Objects that are id()-identical won't be preserved across encode()/decode(), but the resulting JSON stream will be conceptually @@ -63,15 +62,25 @@ def encode( dictionary keys instead of coercing them into strings via `repr()`. This is typically what you want if you need to support Integer or objects as dictionary keys. - :param numeric_keys: Only use this option if the backend supports integer - dict keys natively. This flag tells jsonpickle to leave numeric keys - as-is rather than conforming them to json-friendly strings. - Using ``keys=True`` is the typical solution for integer keys, so only - use this if you have a specific use case where you want to allow the - backend to handle serialization of numeric dict keys. + :param max_depth: If set to a non-negative integer then jsonpickle will + not recurse deeper than 'max_depth' steps into the object. Anything + deeper than 'max_depth' is represented using a Python repr() of the + object. + :param reset: Custom pickle handlers that use the `Pickler.flatten` method or + `jsonpickle.encode` function must call `encode` with `reset=False` + in order to retain object references during pickling. + This flag is not typically used outside of a custom handler or + `__getstate__` implementation. + :param backend: If set to an instance of jsonpickle.backend.JSONBackend, + jsonpickle will use that backend for deserialization. :param warn: If set to True then jsonpickle will warn when it returns None for an object which it cannot pickle (e.g. file descriptors). + :param context: Supply a pre-built Pickler or Unpickler object to the + `jsonpickle.encode` and `jsonpickle.decode` machinery instead + of creating a new instance. The `context` represents the currently + active Pickler and Unpickler objects when custom handlers are + invoked by jsonpickle. :param max_iter: If set to a non-negative integer then jsonpickle will consume at most `max_iter` items when pickling iterators. :param use_decimal: If set to True jsonpickle will allow Decimal @@ -87,6 +96,12 @@ def encode( NOTE: A side-effect of the above settings is that float values will be converted to Decimal when converting to json. + :param numeric_keys: Only use this option if the backend supports integer + dict keys natively. This flag tells jsonpickle to leave numeric keys + as-is rather than conforming them to json-friendly strings. + Using ``keys=True`` is the typical solution for integer keys, so only + use this if you have a specific use case where you want to allow the + backend to handle serialization of numeric dict keys. :param use_base85: If possible, use base85 to encode binary data. Base85 bloats binary data by 1/4 as opposed to base64, which expands it by 1/3. This argument is @@ -111,8 +126,14 @@ def encode( :param include_properties: Include the names and values of class properties in the generated json. Properties are unpickled properly regardless of this setting, this is - meant to be used if processing the json outside of Python. Defaults to - ``False``. + meant to be used if processing the json outside of Python. Certain types + such as sets will not pickle due to not having a native-json equivalent. + Defaults to ``False``. + :param handle_readonly: + Handle objects with readonly methods, such as Django's SafeString. This + basically prevents jsonpickle from raising an exception for such objects. + You MUST set ``handle_readonly=True`` for the decoding if you encode with + this flag set to ``True``. >>> encode('my string') == '"my string"' True @@ -138,6 +159,8 @@ def encode( use_base85=use_base85, fail_safe=fail_safe, include_properties=include_properties, + handle_readonly=handle_readonly, + original_object=value, ) return backend.encode( context.flatten(value, reset=reset), indent=indent, separators=separators @@ -170,7 +193,7 @@ def _wrap_string_slot(string): return string -class Pickler(object): +class Pickler: def __init__( self, unpicklable=True, @@ -185,6 +208,8 @@ def __init__( use_base85=False, fail_safe=None, include_properties=False, + handle_readonly=False, + original_object=None, ): self.unpicklable = unpicklable self.make_refs = make_refs @@ -207,6 +232,8 @@ def __init__( self._use_decimal = use_decimal # A cache of objects that have already been flattened. self._flattened = {} + # Used for util.is_readonly, see +483 + self.handle_readonly = handle_readonly if self.use_base85: self._bytes_tag = tags.B85 @@ -219,8 +246,10 @@ def __init__( self.fail_safe = fail_safe self.include_properties = include_properties + self._original_object = original_object + def _determine_sort_keys(self): - for _, options in self.backend._encoder_options.values(): + for _, options in getattr(self.backend, '_encoder_options', {}).values(): if options.get("sort_keys", False): # the user has set one of the backends to sort keys return True @@ -288,9 +317,11 @@ def _mkref(self, obj): return pretend_new or is_new def _getref(self, obj): + """Return a "py/id" entry for the specified object""" return {tags.ID: self._objs.get(id(obj))} def _flatten(self, obj): + """Flatten an object and its guts into a json-safe representation""" if self.unpicklable and self.make_refs: result = self._flatten_impl(obj) else: @@ -368,7 +399,6 @@ def _flatten_obj(self, obj): max_reached = self._max_reached() try: - in_cycle = _in_cycle(obj, self._objs, max_reached, self.make_refs) if in_cycle: # break the cycle @@ -413,6 +443,14 @@ def _flatten_key_value_pair(self, k, v, data): """Flatten a key/value pair into the passed-in dictionary.""" if not util.is_picklable(k, v): return data + # TODO: use inspect.getmembers_static on 3.11+ because it avoids dynamic + # attribute lookups + if ( + self.handle_readonly + and k in {attr for attr, val in inspect.getmembers(self._original_object)} + and util.is_readonly(self._original_object, k, v) + ): + return data if k is None: k = 'null' # for compatibility with common json encoders @@ -454,7 +492,7 @@ def _flatten_properties(self, obj, data, allslots=None): # i don't like lambdas def valid_property(x): - return not x[0].startswith("__") and x[0] not in allslots_set + return not x[0].startswith('__') and x[0] not in allslots_set properties = [ x[0] for x in inspect.getmembers(obj.__class__) if valid_property(x) @@ -501,6 +539,7 @@ def _flatten_obj_instance(self, obj): has_getnewargs_ex = util.has_method(obj, '__getnewargs_ex__') has_getinitargs = util.has_method(obj, '__getinitargs__') has_reduce, has_reduce_ex = util.has_reduce(obj) + exclude = set(getattr(obj, '_jsonpickle_exclude', ())) # Support objects with __getstate__(); this ensures that # both __setstate__() and __getstate__() are implemented @@ -607,7 +646,9 @@ def _flatten_obj_instance(self, obj): data[tags.OBJECT] = class_name if has_getnewargs_ex: - data[tags.NEWARGSEX] = list(map(self._flatten, obj.__getnewargs_ex__())) + data[tags.NEWARGSEX] = [ + self._flatten(arg) for arg in obj.__getnewargs_ex__() + ] if has_getnewargs and not has_getnewargs_ex: data[tags.NEWARGS] = self._flatten(obj.__getnewargs__()) @@ -629,13 +670,13 @@ def _flatten_obj_instance(self, obj): if util.is_module(obj): if self.unpicklable: - data[tags.REPR] = '{name}/{name}'.format(name=obj.__name__) + data[tags.MODULE] = '{name}/{name}'.format(name=obj.__name__) else: data = compat.ustr(obj) return data if util.is_dictionary_subclass(obj): - self._flatten_dict_obj(obj, data) + self._flatten_dict_obj(obj, data, exclude=exclude) return data if util.is_sequence_subclass(obj): @@ -653,7 +694,7 @@ def _flatten_obj_instance(self, obj): # hack for zope persistent objects; this unghostifies the object getattr(obj, '_', None) - return self._flatten_dict_obj(obj.__dict__, data) + return self._flatten_dict_obj(obj.__dict__, data, exclude=exclude) if has_slots: return self._flatten_newstyle_with_slots(obj, data) @@ -730,7 +771,7 @@ def _flatten_string_key_value_pair(self, k, v, data): data[k] = self._flatten(v) return data - def _flatten_dict_obj(self, obj, data=None): + def _flatten_dict_obj(self, obj, data=None, exclude=()): """Recursively call flatten() and return json-friendly dict""" if data is None: data = obj.__class__() @@ -740,17 +781,17 @@ def _flatten_dict_obj(self, obj, data=None): if self.keys: # Phase 1: serialize regular objects, ignore fancy keys. flatten = self._flatten_string_key_value_pair - for k, v in util.items(obj): + for k, v in util.items(obj, exclude=exclude): flatten(k, v, data) # Phase 2: serialize non-string keys. flatten = self._flatten_non_string_key_value_pair - for k, v in util.items(obj): + for k, v in util.items(obj, exclude=exclude): flatten(k, v, data) else: # If we have string keys only then we only need a single pass. flatten = self._flatten_key_value_pair - for k, v in util.items(obj): + for k, v in util.items(obj, exclude=exclude): flatten(k, v, data) # the collections.defaultdict protocol @@ -774,10 +815,13 @@ def _flatten_dict_obj(self, obj, data=None): data['default_factory'] = value # Sub-classes of dict - if hasattr(obj, '__dict__') and self.unpicklable: - dict_data = {} - self._flatten_dict_obj(obj.__dict__, dict_data) - data['__dict__'] = dict_data + if hasattr(obj, '__dict__') and self.unpicklable and obj != obj.__dict__: + if self._mkref(obj.__dict__): + dict_data = {} + self._flatten_dict_obj(obj.__dict__, dict_data, exclude=exclude) + data['__dict__'] = dict_data + else: + data['__dict__'] = self._getref(obj.__dict__) return data @@ -788,7 +832,6 @@ def _get_flattener(self, obj): self._list_recurse if type(obj) is list else self._flatten_dict_obj ) else: - self._push() return self._getref # We handle tuples and sets by encoding them in a "(tuple|set)dict" @@ -796,20 +839,20 @@ def _get_flattener(self, obj): if not self.unpicklable: return self._list_recurse return lambda obj: { - tags.TUPLE - if type(obj) is tuple - else tags.SET: [self._flatten(v) for v in obj] + tags.TUPLE if type(obj) is tuple else tags.SET: [ + self._flatten(v) for v in obj + ] } + elif util.is_module_function(obj): + return self._flatten_function + elif util.is_object(obj): return self._ref_obj_instance elif util.is_type(obj): return _mktyperef - elif util.is_module_function(obj): - return self._flatten_function - # instance methods, lambdas, old style classes... self._pickle_warning(obj) return None diff --git a/python/arepl_jsonpickle/tags.py b/python/arepl_jsonpickle/tags.py index 57225c3..d6b346f 100644 --- a/python/arepl_jsonpickle/tags.py +++ b/python/arepl_jsonpickle/tags.py @@ -6,7 +6,6 @@ these custom key names to identify dictionaries that need to be specially handled. """ -from __future__ import absolute_import, division, unicode_literals BYTES = 'py/bytes' B64 = 'py/b64' @@ -16,6 +15,7 @@ INITARGS = 'py/initargs' ITERATOR = 'py/iterator' JSON_KEY = 'json://' +MODULE = 'py/mod' NEWARGS = 'py/newargs' NEWARGSEX = 'py/newargsex' NEWOBJ = 'py/newobj' @@ -37,6 +37,7 @@ ID, INITARGS, ITERATOR, + MODULE, NEWARGS, NEWARGSEX, NEWOBJ, diff --git a/python/arepl_jsonpickle/tags_pd.py b/python/arepl_jsonpickle/tags_pd.py new file mode 100644 index 0000000..6d86c84 --- /dev/null +++ b/python/arepl_jsonpickle/tags_pd.py @@ -0,0 +1,235 @@ +""" +This file exists to automatically generate tags for numpy/pandas extensions. Because numpy/pandas follow a (relatively) rapid release schedule, updating types for new versions as bug reports come in could be infeasible, so we auto-generate them. Unfortunately, this file can't go into the ext folder because then the imports would break. +""" + +import re + +import numpy as np +import pandas as pd +from pandas.api.extensions import ExtensionDtype + + +def split_letters_numbers_brackets(s): + """ + Split the string into letters, numbers, and brackets (with their content). + This is a helper function for getting the smallest unique substring, for determining tags. + """ + # extract brackets and their content + brackets_match = re.search(r"\[.*?\]", s) + if brackets_match: + brackets_part = brackets_match.group() + s_clean = s.replace(brackets_part, "") + else: + brackets_part = "" + s_clean = s + + # find where the trailing digits start + index = len(s_clean) + while index > 0 and s_clean[index - 1].isdigit(): + index -= 1 + letters_part = s_clean[:index] + numbers_part = s_clean[index:] + return letters_part, numbers_part, brackets_part + + +def get_smallest_unique_substrings(strings, prefix="np"): + used_substrings = set() + used_letters_parts = set() + result = {} + + for s in strings: + if not isinstance(s, str): + s2 = s.__name__ + else: + s2 = s + letters_part, numbers_part, brackets_part = split_letters_numbers_brackets(s2) + letters_part = letters_part.lower() + unique_substring_found = False + + # handle the weird datetime64[...] and timedelta64[...] cases + if letters_part == "datetime" and numbers_part == "64" and brackets_part: + substr = "d64" + brackets_part + if substr not in used_substrings: + result[s] = substr + used_substrings.add(substr) + unique_substring_found = True + elif letters_part == "timedelta" and numbers_part == "64" and brackets_part: + substr = "t64" + brackets_part + if substr not in used_substrings: + result[s] = substr + used_substrings.add(substr) + unique_substring_found = True + else: + if letters_part in used_letters_parts: + # letters have been seen before, so use letters + numbers_part + brackets_part + if numbers_part or brackets_part: + # try first letter + numbers_part + brackets_part + substr = letters_part[0] + if numbers_part: + substr += numbers_part + if brackets_part: + substr += brackets_part + if substr not in used_substrings: + result[s] = substr + used_substrings.add(substr) + unique_substring_found = True + else: + # try letters_part + numbers_part + brackets_part + substr = letters_part + if numbers_part: + substr += numbers_part + if brackets_part: + substr += brackets_part + if substr not in used_substrings: + result[s] = substr + used_substrings.add(substr) + unique_substring_found = True + else: + # find a unique substring of just letters_part + for length in range(1, len(letters_part) + 1): + substr = letters_part[:length] + if substr not in used_substrings: + result[s] = substr + used_substrings.add(substr) + unique_substring_found = True + break + else: + # assign the smallest substring of letters_part + for length in range(1, len(letters_part) + 1): + substr = letters_part[:length] + if substr not in used_substrings: + result[s] = substr + used_substrings.add(substr) + unique_substring_found = True + break + used_letters_parts.add(letters_part) + + # last resort: assign the entire string + if not unique_substring_found: + result[s] = s + used_substrings.add(s) + + for key in result: + result[key] = f"{prefix}/" + result[key] + + return result + + +def all_subclasses(cls): + # use a set to avoid adding duplicates + subclasses = set() + for subclass in cls.__subclasses__(): + subclasses.add(subclass) + subclasses.update(all_subclasses(subclass)) + return list(subclasses) + + +def get_all_numpy_dtype_strings(): + dtypes = [] + + # sctypeDict is the dict of all possible numpy dtypes + some invalid dtypes too + for dtype in np.sctypeDict.values(): + try: + dtype_obj = np.dtype(dtype) + dtypes.append(dtype_obj.name.lower()) + except TypeError: + continue + + try: + char_codes = np._typing._char_codes + # datetime64 and timedelta64 are special, they have multiple variants + # python internally compiles and caches regex like this to speed it up + dt_variants = list( + dict.fromkeys( + [ + "datetime64[" + re.search(r"\[(.*?)\]", var).group(1) + "]" + for var in char_codes._DT64Codes.__args__ + if re.search(r"\[(.*?)\]", var) + ] + ) + ) + td_variants = list( + dict.fromkeys( + [ + "timedelta64[" + re.search(r"\[(.*?)\]", var).group(1) + "]" + for var in char_codes._TD64Codes.__args__ + if re.search(r"\[(.*?)\]", var) + ] + ) + ) + except AttributeError: + # AttributeError happens on numpy <1.25 because _typing isn't exposed to users + dt_variants = [ + 'datetime64[Y]', + 'datetime64[M]', + 'datetime64[W]', + 'datetime64[D]', + 'datetime64[h]', + 'datetime64[m]', + 'datetime64[s]', + 'datetime64[ms]', + 'datetime64[us]', + 'datetime64[ns]', + 'datetime64[ps]', + 'datetime64[fs]', + 'datetime64[as]', + ] + td_variants = [ + 'timedelta64[Y]', + 'timedelta64[M]', + 'timedelta64[W]', + 'timedelta64[D]', + 'timedelta64[h]', + 'timedelta64[m]', + 'timedelta64[s]', + 'timedelta64[ms]', + 'timedelta64[us]', + 'timedelta64[ns]', + 'timedelta64[ps]', + 'timedelta64[fs]', + 'timedelta64[as]', + ] + + dtypes += dt_variants + td_variants + + return list(dict.fromkeys(dtypes)) + + +def get_all_pandas_dtype_strings(): + dtypes = [] + + # get all pandas dtypes since it doesnt have a built-in api + extension_dtypes = all_subclasses(ExtensionDtype) + + for dtype_cls in extension_dtypes: + # some ExtensionDtype subclasses might not have a name attribute + if hasattr(dtype_cls, "name"): + try: + dtype_name = dtype_cls.name + dtypes.append(dtype_name.lower()) + except Exception: + continue + + # use the class object for things that np.dtype can't reconstruct + dtypes.extend([pd.Timestamp, pd.Timedelta, pd.Period, pd.Interval]) + + return list(dict.fromkeys(dtypes)) + + +np_dtypes = list( + dict.fromkeys( + [dtype for dtype in get_all_numpy_dtype_strings() if isinstance(dtype, str)] + ) +) + +pd_dtypes = list( + dict.fromkeys( + [dtype for dtype in get_all_pandas_dtype_strings() if isinstance(dtype, str)] + ) +) + + +TYPE_MAP = get_smallest_unique_substrings(np_dtypes, prefix="np") +TYPE_MAP.update(get_smallest_unique_substrings(pd_dtypes, prefix="pd")) + +REVERSE_TYPE_MAP = {v: k for k, v in TYPE_MAP.items()} diff --git a/python/arepl_jsonpickle/unpickler.py b/python/arepl_jsonpickle/unpickler.py index 491b4f7..b6a3307 100644 --- a/python/arepl_jsonpickle/unpickler.py +++ b/python/arepl_jsonpickle/unpickler.py @@ -1,11 +1,9 @@ # Copyright (C) 2008 John Paulett (john -at- paulett.org) -# Copyright (C) 2009-2018 David Aguilar (davvid -at- gmail.com) +# Copyright (C) 2009-2024 David Aguilar (davvid -at- gmail.com) # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. -from __future__ import absolute_import, division, unicode_literals - import dataclasses import sys import warnings @@ -21,46 +19,63 @@ def decode( context=None, keys=False, reset=True, - safe=False, + safe=True, classes=None, v1_decode=False, - on_missing="ignore", + on_missing='ignore', + handle_readonly=False, ): """Convert a JSON string into a Python object. - The keyword argument 'keys' defaults to False. - If set to True then jsonpickle will decode non-string dictionary keys - into python objects via the jsonpickle protocol. - - The keyword argument 'classes' defaults to None. - If set to a single class, or a sequence (list, set, tuple) of classes, - then the classes will be made available when constructing objects. - If set to a dictionary of class names to class objects, the class object - will be provided to jsonpickle to deserialize the class name into. - This can be used to give jsonpickle access to local classes that are not - available through the global module import scope, and the dict method can - be used to deserialize encoded objects into a new class. - - The keyword argument 'safe' defaults to False. - If set to True, eval() is avoided, but backwards-compatible - (pre-0.7.0) deserialization of repr-serialized objects is disabled. - - The keyword argument 'backend' defaults to None. - If set to an instance of jsonpickle.backend.JSONBackend, jsonpickle - will use that backend for deserialization. - - The keyword argument 'v1_decode' defaults to False. - If set to True it enables you to decode objects serialized in jsonpickle v1. - Please do not attempt to re-encode the objects in the v1 format! Version 2's - format fixes issue #255, and allows dictionary identity to be preserved - through an encode/decode cycle. - - The keyword argument 'on_missing' defaults to 'ignore'. - If set to 'error', it will raise an error if the class it's decoding is not - found. If set to 'warn', it will warn you in said case. If set to a - non-awaitable function, it will call said callback function with the class - name (a string) as the only parameter. Strings passed to on_missing are - lowercased automatically. + :param backend: If set to an instance of jsonpickle.backend.JSONBackend, jsonpickle + will use that backend for deserialization. + + :param context: Supply a pre-built Pickler or Unpickler object to the + `jsonpickle.encode` and `jsonpickle.decode` machinery instead + of creating a new instance. The `context` represents the currently + active Pickler and Unpickler objects when custom handlers are + invoked by jsonpickle. + + :param keys: If set to True then jsonpickle will decode non-string dictionary keys + into python objects via the jsonpickle protocol. + + :param reset: Custom pickle handlers that use the `Pickler.flatten` method or + `jsonpickle.encode` function must call `encode` with `reset=False` + in order to retain object references during pickling. + This flag is not typically used outside of a custom handler or + `__getstate__` implementation. + + :param safe: If set to ``False``, use of ``eval()`` for backwards-compatible (pre-0.7.0) + deserialization of repr-serialized objects is enabled. Defaults to ``True``. + The default value was ``False`` in jsonpickle v3 and changed to ``True`` in jsonpickle v4. + + .. warning:: + + ``eval()`` is used when set to ``False`` and is not secure against + malicious inputs. You should avoid setting ``safe=False``. + + :param classes: If set to a single class, or a sequence (list, set, tuple) of + classes, then the classes will be made available when constructing objects. + If set to a dictionary of class names to class objects, the class object + will be provided to jsonpickle to deserialize the class name into. + This can be used to give jsonpickle access to local classes that are not + available through the global module import scope, and the dict method can + be used to deserialize encoded objects into a new class. + + :param v1_decode: If set to True it enables you to decode objects serialized in + jsonpickle v1. Please do not attempt to re-encode the objects in the v1 format! + Version 2's format fixes issue #255, and allows dictionary identity to be + preserved through an encode/decode cycle. + + :param on_missing: If set to 'error', it will raise an error if the class it's + decoding is not found. If set to 'warn', it will warn you in said case. + If set to a non-awaitable function, it will call said callback function + with the class name (a string) as the only parameter. Strings passed to + `on_missing` are lowercased automatically. + + :param handle_readonly: If set to True, the Unpickler will handle objects encoded + with 'handle_readonly' properly. Do not set this flag for objects not encoded + with 'handle_readonly' set to True. >>> decode('"my string"') == 'my string' @@ -83,6 +98,7 @@ def decode( safe=safe, v1_decode=v1_decode, on_missing=on_missing, + handle_readonly=handle_readonly, ) data = backend.decode(string) return context.restore(data, reset=reset, classes=classes) @@ -102,7 +118,7 @@ def _is_json_key(key): return isinstance(key, compat.string_types) and key.startswith(tags.JSON_KEY) -class _Proxy(object): +class _Proxy: """Proxies are dummy objects that are later replaced by real instances The `restore()` function has to solve a tricky problem when pickling @@ -151,10 +167,12 @@ def get(self): def _obj_setattr(obj, attr, proxy): + """Use setattr to update a proxy entry""" setattr(obj, attr, proxy.get()) def _obj_setvalue(obj, idx, proxy): + """Use obj[key] assignments to update a proxy entry""" obj[idx] = proxy.get() @@ -192,13 +210,13 @@ def loadclass(module_and_name, classes=None): __import__(module) obj = sys.modules[module] for class_name in names[up_to:]: - try: - obj = getattr(obj, class_name) - except AttributeError: - continue + obj = getattr(obj, class_name) return obj except (AttributeError, ImportError, ValueError): continue + # NoneType is a special case and can not be imported/created + if module_and_name == "builtins.NoneType": + return type(None) return None @@ -223,7 +241,7 @@ def getargs(obj, classes=None): """Return arguments suitable for __new__()""" # Let saved newargs take precedence over everything if has_tag(obj, tags.NEWARGSEX): - raise ValueError("__newargs_ex__ returns both args and kwargs") + raise ValueError('__newargs_ex__ returns both args and kwargs') if has_tag(obj, tags.NEWARGS): return obj[tags.NEWARGS] @@ -266,6 +284,10 @@ def loadrepr(reprstr): """Returns an instance of the object from the object's repr() string. It involves the dynamic specification of code. + .. warning:: + + This function is unsafe and uses `eval()`. + >>> obj = loadrepr('datetime/datetime.datetime.now()') >>> obj.__class__.__name__ 'datetime' @@ -277,7 +299,25 @@ def loadrepr(reprstr): if '.' in localname: localname = module.split('.', 1)[0] mylocals[localname] = __import__(module) - return eval(evalstr) + return eval(evalstr, mylocals) + + +def _loadmodule(module_str): + """Returns a reference to a module. + + >>> fn = _loadmodule('datetime/datetime.datetime.fromtimestamp') + >>> fn.__name__ + 'fromtimestamp' + + """ + module, identifier = module_str.split('/') + result = __import__(module) + for name in identifier.split('.')[1:]: + try: + result = getattr(result, name) + except AttributeError: + return None + return result def has_tag_dict(obj, tag): @@ -297,15 +337,27 @@ def has_tag_dict(obj, tag): return tag in obj -class Unpickler(object): +def _passthrough(value): + """A function that returns its input as-is""" + return value + + +class Unpickler: def __init__( - self, backend=None, keys=False, safe=False, v1_decode=False, on_missing="ignore" + self, + backend=None, + keys=False, + safe=True, + v1_decode=False, + on_missing='ignore', + handle_readonly=False, ): self.backend = backend or json self.keys = keys self.safe = safe self.v1_decode = v1_decode self.on_missing = on_missing + self.handle_readonly = handle_readonly self.reset() @@ -327,18 +379,15 @@ def reset(self): def _swap_proxies(self): """Replace proxies with their corresponding instances""" - for (obj, attr, proxy, method) in self._proxies: + for obj, attr, proxy, method in self._proxies: method(obj, attr, proxy) self._proxies = [] - def _restore(self, obj): + def _restore(self, obj, _passthrough=_passthrough): # if obj isn't in these types, neither it nor nothing in it can have a tag # don't change the tuple of types to a set, it won't work with isinstance if not isinstance(obj, (str, list, dict, set, tuple)): - - def restore(x): - return x - + restore = _passthrough else: restore = self._restore_tags(obj) return restore(obj) @@ -374,8 +423,13 @@ def register_classes(self, classes): for cls in classes: self.register_classes(cls) elif isinstance(classes, dict): - for cls in classes.values(): - self.register_classes(cls) + self._classes.update( + ( + cls if isinstance(cls, str) else util.importable_name(cls), + handler, + ) + for cls, handler in classes.items() + ) else: self._classes[util.importable_name(classes)] = classes @@ -412,7 +466,7 @@ def _refname(self): def _mkref(self, obj): obj_id = id(obj) try: - self._obj_to_idx[obj_id] + _ = self._obj_to_idx[obj_id] except KeyError: self._obj_to_idx[obj_id] = len(self._objs) self._objs.append(obj) @@ -525,10 +579,15 @@ def _restore_type(self, obj): return obj return typeref + def _restore_module(self, obj): + obj = _loadmodule(obj[tags.MODULE]) + return self._mkref(obj) + + def _restore_repr_safe(self, obj): + obj = _loadmodule(obj[tags.REPR]) + return self._mkref(obj) + def _restore_repr(self, obj): - if self.safe: - # eval() is not allowed in safe mode - return None obj = loadrepr(obj[tags.REPR]) return self._mkref(obj) @@ -545,10 +604,10 @@ def _process_missing(self, class_name): if self.on_missing == 'ignore': pass elif self.on_missing == 'warn': - warnings.warn("Unpickler._restore_object could not find %s!" % class_name) + warnings.warn('Unpickler._restore_object could not find %s!' % class_name) elif self.on_missing == 'error': raise errors.ClassNotFoundError( - "Unpickler.restore_object could not find %s!" % class_name + 'Unpickler.restore_object could not find %s!' % class_name ) elif util.is_function(self.on_missing): self.on_missing(class_name) @@ -565,7 +624,7 @@ def _restore_pickled_key(self, key): ) return key - def _restore_key_fn(self): + def _restore_key_fn(self, _passthrough=_passthrough): """Return a callable that restores keys This function is responsible for restoring non-string keys @@ -580,13 +639,12 @@ def _restore_key_fn(self): if self.keys: restore_key = self._restore_pickled_key else: - - def restore_key(key): - return key - + restore_key = _passthrough return restore_key - def _restore_from_dict(self, obj, instance, ignorereserved=True): + def _restore_from_dict( + self, obj, instance, ignorereserved=True, restore_dict_items=True + ): restore_key = self._restore_key_fn() method = _obj_setattr deferred = {} @@ -600,9 +658,12 @@ def _restore_from_dict(self, obj, instance, ignorereserved=True): else: str_k = k self._namestack.append(str_k) - k = restore_key(k) - # step into the namespace - value = self._restore(v) + if restore_dict_items: + k = restore_key(k) + # step into the namespace + value = self._restore(v) + else: + value = v if util.is_noncomplex(instance) or util.is_dictionary_subclass(instance): try: if k == '__dict__': @@ -623,22 +684,25 @@ def _restore_from_dict(self, obj, instance, ignorereserved=True): # certain numpy objects require us to prepend a _ to the var # this should go in the np handler but I think this could be # useful for other code - setattr(instance, f"_{k}", value) + setattr(instance, f'_{k}', value) except dataclasses.FrozenInstanceError: # issue #240 # i think this is the only way to set frozen dataclass attrs object.__setattr__(instance, k, value) except AttributeError as e: - # some objects may raise this for read-only attributes (#422) + # some objects raise this for read-only attributes (#422) (#478) if ( - hasattr(instance, "__slots__") + hasattr(instance, '__slots__') and not len(instance.__slots__) and issubclass(instance.__class__, int) + and self.handle_readonly + # we have to handle this separately because of +483 + and issubclass(instance.__class__, str) ): continue raise e else: - setattr(instance, f"_{instance.__class__.__name__}{k}", value) + setattr(instance, f'_{instance.__class__.__name__}{k}', value) # This instance has an instance variable named `k` that is # currently a proxy and must be replaced @@ -666,12 +730,16 @@ def _restore_state(self, obj, instance): # implements described default handling # of state for object with instance dict # and no slots - instance = self._restore_from_dict(state, instance, ignorereserved=False) + instance = self._restore_from_dict( + state, instance, ignorereserved=False, restore_dict_items=False + ) elif has_slots: - instance = self._restore_from_dict(state[1], instance, ignorereserved=False) + instance = self._restore_from_dict( + state[1], instance, ignorereserved=False, restore_dict_items=False + ) if has_slots_and_dict: instance = self._restore_from_dict( - state[0], instance, ignorereserved=False + state[0], instance, ignorereserved=False, restore_dict_items=False ) elif not hasattr(instance, '__getnewargs__') and not hasattr( instance, '__getnewargs_ex__' @@ -700,7 +768,7 @@ def _restore_object_instance_variables(self, obj, instance): return instance - def _restore_object_instance(self, obj, cls, class_name=""): + def _restore_object_instance(self, obj, cls, class_name=''): # This is a placeholder proxy object which allows child objects to # reference the parent object before it has been instantiated. proxy = _Proxy() @@ -722,7 +790,7 @@ def _restore_object_instance(self, obj, cls, class_name=""): is_oldstyle = not (isinstance(cls, type) or getattr(cls, '__meta__', None)) try: - if (not is_oldstyle) and hasattr(cls, '__new__'): + if not is_oldstyle and hasattr(cls, '__new__'): # new style classes if factory: instance = cls.__new__(cls, factory, *args, **kwargs) @@ -772,6 +840,7 @@ def _restore_object(self, obj): return instance if cls is None: + self._process_missing(class_name) return self._mkref(obj) return self._restore_object_instance(obj, cls, class_name) @@ -833,14 +902,11 @@ def _restore_dict(self, obj): def _restore_tuple(self, obj): return tuple([self._restore(v) for v in obj[tags.TUPLE]]) - def _restore_tags(self, obj): + def _restore_tags(self, obj, _passthrough=_passthrough): + """Return the restoration function for the specified object""" try: - if not tags.RESERVED <= set(obj) and not type(obj) in (list, dict): - - def restore(x): - return x - - return restore + if not tags.RESERVED <= set(obj) and type(obj) not in (list, dict): + return _passthrough except TypeError: pass if type(obj) is dict: @@ -864,15 +930,17 @@ def restore(x): restore = self._restore_reduce elif tags.FUNCTION in obj: restore = self._restore_function - elif tags.REPR in obj: # Backwards compatibility - restore = self._restore_repr + elif tags.MODULE in obj: + restore = self._restore_module + elif tags.REPR in obj: + if self.safe: + restore = self._restore_repr_safe + else: + restore = self._restore_repr else: restore = self._restore_dict elif util.is_list(obj): restore = self._restore_list else: - - def restore(x): - return x - + restore = _passthrough return restore diff --git a/python/arepl_jsonpickle/util.py b/python/arepl_jsonpickle/util.py index 7a86300..3381131 100644 --- a/python/arepl_jsonpickle/util.py +++ b/python/arepl_jsonpickle/util.py @@ -8,8 +8,6 @@ """Helper functions for pickling and unpickling. Most functions assist in determining the type of an object. """ -from __future__ import absolute_import, division, unicode_literals - import base64 import collections import inspect @@ -301,7 +299,7 @@ def is_module_function(obj): and hasattr(obj, '__module__') and hasattr(obj, '__name__') and obj.__name__ != '' - ) + ) or is_cython_function(obj) def is_module(obj): @@ -388,6 +386,29 @@ def is_reducible(obj): return True +def is_cython_function(obj): + """Returns true if the object is a reference to a Cython function""" + return ( + callable(obj) + and hasattr(obj, '__repr__') + and repr(obj).startswith(' { - let pyEvaluator = new PythonEvaluator() + let pyEvaluator = new PythonExecutor() let input = { evalCode: "", savedCode: "", @@ -26,17 +26,19 @@ suite("python_evaluator Tests", () => { } const pythonStartupTime = 3000 - suiteSetup(function (done) { + suiteSetup(function () { this.timeout(pythonStartupTime + 500) - pyEvaluator.start() - // wait for for python to start - setTimeout(() => done(), pythonStartupTime) }) - setup(function () { + setup(function (done) { pyEvaluator.onPrint = () => { } pyEvaluator.onStderr = () => { } pyEvaluator.onResult = () => { } + pyEvaluator.start(done) + }) + + teardown(function(){ + pyEvaluator.stop(true) }) test("sanity check: 1+1=2", () => { @@ -171,11 +173,11 @@ suite("python_evaluator Tests", () => { test("returns result after print", function (done) { pyEvaluator.onPrint = (stdout) => { assert.strictEqual(stdout, "hello world" + EOL) - assert.strictEqual(pyEvaluator.executing, true) + assert.strictEqual(pyEvaluator.state, PythonState.Executing) } pyEvaluator.onResult = () => { - assert.strictEqual(pyEvaluator.executing, false) + assert.strictEqual(pyEvaluator.state, PythonState.DirtyFree) done() } @@ -243,27 +245,6 @@ suite("python_evaluator Tests", () => { pyEvaluator.execCode(input) }) - test("dump works properly when called repeatedly", function (done) { - let numResults = 0; - pyEvaluator.onResult = (result) => { - numResults += 1 - if (numResults == 3) { - assert.strictEqual(result.done, true) - done() - return - } - assert.notStrictEqual(result, null) - assert.strictEqual(isEmpty(result.userError), true) - assert.strictEqual(result.internalError, null) - assert.strictEqual(result.userVariables['dump output'], numResults) - assert.strictEqual(result.caller, '') - assert.strictEqual(result.lineno, numResults) - } - input.evalCode = `from arepl_dump import dump;dump(1) -dump(2)` - pyEvaluator.execCode(input) - }) - test("returns syntax error when incorrect syntax", function (done) { pyEvaluator.onResult = (result) => { assert.notStrictEqual(result.userError, null) @@ -296,20 +277,13 @@ dump(2)` this.timeout(this.timeout() + pythonStartupTime) - assert.strictEqual(pyEvaluator.running, true) - assert.strictEqual(pyEvaluator.restarting, false) - assert.strictEqual(pyEvaluator.executing, false) + assert.strictEqual(pyEvaluator.state, PythonState.FreshFree) pyEvaluator.restart(() => { - assert.strictEqual(pyEvaluator.running, true) - assert.strictEqual(pyEvaluator.executing, false) - - setTimeout(() => { - // by now python should be restarted and accepting input - pyEvaluator.onResult = () => done() - input.evalCode = "x" - pyEvaluator.execCode(input) - }, 1500) + assert.strictEqual(pyEvaluator.state, PythonState.FreshFree) + pyEvaluator.onResult = () => done() + input.evalCode = "x" + pyEvaluator.execCode(input) }) }) diff --git a/pythonExecutors.test.ts b/pythonExecutors.test.ts new file mode 100644 index 0000000..ebff664 --- /dev/null +++ b/pythonExecutors.test.ts @@ -0,0 +1,79 @@ +/*global suite, test*/ //comment for eslint + +// This test uses TDD Mocha. see https://mochajs.org/ for help +// http://ricostacruz.com/cheatsheets/mocha-tdd + +// The module 'assert' provides assertion methods from node +import * as assert from 'assert' + +import { PythonExecutors } from './pythonExecutors' + +suite("PythonExecutors", () => { + let pyExecutors = new PythonExecutors() + let input = { + evalCode: "", + savedCode: "", + filePath: "", + usePreviousVariables: false, + show_global_vars: true, + default_filter_vars: [], + default_filter_types: ["", ""] + } + const pythonStartupTime = 3000 + const num_executors = 2 + + suiteSetup(function () { + this.timeout(pythonStartupTime + 500) + }) + + setup(function (done) { + pyExecutors.onPrint = () => { } + pyExecutors.onStderr = () => { } + pyExecutors.onResult = () => { } + pyExecutors.start(num_executors) + done() + }) + + teardown(function(){ + pyExecutors.stop(true) + }) + + test("can do multiple executions", function (done) { + // we do three test runs because given that only two executors exist: + // if first fails: something is wrong with first executor + // if second fails: something is wrong with second executor + // if thid fails: logic that waits for a executor to become free is broken + let num_results = 0 + pyExecutors.onResult = (result) => { + num_results+=1 + if(num_results == 1){ + assert.strictEqual(result.userVariables['x'], 1) + input.evalCode = "x=2" + pyExecutors.execCode(input) + } + else if(num_results == 2){ + assert.strictEqual(result.userVariables['x'], 2) + input.evalCode = "x=3" + pyExecutors.execCode(input) + } + else if(num_results > num_executors){ + assert.strictEqual(result.userVariables['x'], 3) + done() + } + } + input.evalCode = "x=1" + pyExecutors.execCode(input) + }) + + test("last execution takes precedence", function (done) { + pyExecutors.onResult = (result) => { + assert.strictEqual(result.userVariables['x'], 2) + done() + } + input.evalCode = "x=1" + pyExecutors.execCode(input) + input.evalCode = "x=2" + pyExecutors.execCode(input) + }) + +}) diff --git a/pythonExecutors.ts b/pythonExecutors.ts new file mode 100644 index 0000000..e6053c6 --- /dev/null +++ b/pythonExecutors.ts @@ -0,0 +1,144 @@ +import { Options, PythonShell } from "python-shell"; +import { ExecArgs, PythonExecutor, PythonResult, PythonState } from "./PythonExecutor"; + +export * from './PythonExecutor' + +/** + * Starts multiple python executors for running user code. + * Will manage them for you, so you can treat this class + * as a single executor. + */ +export class PythonExecutors { + private executors: PythonExecutor[] = [] + private currentExecutorIndex: number = 0 + private waitForFreeExecutor: NodeJS.Timeout + + constructor(public options: Options = {}){} + + start(numExecutors=3){ + // we default to three executors, as it should be enough so that there is always + // one available to accept incoming code + + if(this.executors.length != 0) throw Error('already started!') + + for(let i = 0; i < numExecutors; i++){ + console.log('starting executor ' + i.toString()) + const pyExecutor = new PythonExecutor(this.options) + pyExecutor.start(()=>{}) + pyExecutor.evaluatorName = i.toString() + pyExecutor.onResult = result => { + // Other executor may send a result right before it dies + // So we use this function to only capture result from active executor + if(i == this.currentExecutorIndex) this.onResult(result) + } + pyExecutor.onPrint = print => { + if(i == this.currentExecutorIndex) this.onPrint(print) + } + pyExecutor.onStderr = stderr => { + if(i == this.currentExecutorIndex) this.onStderr(stderr) + } + pyExecutor.pyshell.on('error', this.onError) + pyExecutor.pyshell.childProcess.on('exit', exitCode => { + if(exitCode != 0) this.onAbnormalExit(exitCode) + }) + this.executors.push(pyExecutor) + } + } + + /** + * Sends code to the current executor. + * If current executor is busy, nothing happens + */ + execCodeCurrent(code: ExecArgs){ + this.executors[this.currentExecutorIndex].execCode(code) + } + + /** + * sends code to a free executor to be executed + * Side-effect: restarts dirty executors + */ + execCode(code: ExecArgs){ + let freeExecutor = this.executors.find(executor=>executor.state == PythonState.FreshFree) + + // old code is now irrelevant, if we are still waiting to send old code + // we should stop waiting + clearInterval(this.waitForFreeExecutor) + // executors running old code are now irrelevant, restart them + this.executors.filter(executor => executor.state == PythonState.Executing || executor.state == PythonState.DirtyFree) + .forEach(executor => executor.restart()) + if(!freeExecutor){ + this.waitForFreeExecutor = setInterval(()=>{ + freeExecutor = this.executors.find(executor=>executor.state == PythonState.FreshFree) + if(freeExecutor){ + freeExecutor.execCode(code) + this.currentExecutorIndex = parseInt(freeExecutor.evaluatorName) + clearInterval(this.waitForFreeExecutor) + } + }, 60) + } + else{ + freeExecutor.execCode(code) + this.currentExecutorIndex = parseInt(freeExecutor.evaluatorName) + } + } + + stop(kill_immediately=false){ + clearInterval(this.waitForFreeExecutor) + this.executors.forEach(executor => executor.stop(kill_immediately)) + this.executors = [] + } + + /** + * checks syntax without executing code + * @param {string} code + * @returns {Promise} rejects w/ stderr if syntax failure + */ + async checkSyntax(code: string) { + return PythonShell.checkSyntax(code); + } + + /** + * Overwrite this with your own handler. + * is called when active executor fails or completes + */ + onResult(foo: PythonResult) { } + + /** + * Overwrite this with your own handler. + * Is called when active executor prints + * @param {string} foo + */ + onPrint(foo: string) { } + + /** + * Overwrite this with your own handler. + * Is called when active executor logs stderr + * @param {string} foo + */ + onStderr(foo: string) { } + + /** + * Overwrite this with your own handler. + * Is called when there is a Node.JS error event with the python process + * The 'error' event is emitted whenever: + The process could not be spawned, or + The process could not be killed, or + Sending a message to the child process failed. + */ + onError(err: NodeJS.ErrnoException) { } + + onAbnormalExit(exitCode: number) {} + + /** + * delays execution of function by ms milliseconds, resetting clock every time it is called + * Useful for real-time execution so execCode doesn't get called too often + * thanks to https://stackoverflow.com/a/1909508/6629672 + */ + debounce = (function () { + let timer: any = 0; + return function (callback, ms: number, ...args: any[]) { + clearTimeout(timer); + timer = setTimeout(callback, ms, args); + }; + })(); +} \ No newline at end of file