From d9b1dcd6e65c0cdb1b619813e50d884a901fc7f7 Mon Sep 17 00:00:00 2001 From: Philippe Renzen Date: Thu, 4 Jan 2024 15:29:41 +0100 Subject: [PATCH 1/4] Wip --- package-lock.json | 6 ++++++ package.json | 1 + src/lib/Generator.class.ts | 14 +++++++++++++- src/lib/LDWorkbenchConfiguration.d.ts | 8 ++++++++ static/example/config.yml | 1 + static/ld-workbench.schema.json | 4 ++++ 6 files changed, 33 insertions(+), 1 deletion(-) diff --git a/package-lock.json b/package-lock.json index 701adb8..545cb56 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,6 +21,7 @@ "millify": "^6.1.0", "n3": "^1.17.2", "ora": "^7.0.1", + "parse-duration": "^1.1.0", "pretty-ms": "^8.0.0", "sparqljs": "^3.7.1" }, @@ -8320,6 +8321,11 @@ "node": ">=6" } }, + "node_modules/parse-duration": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/parse-duration/-/parse-duration-1.1.0.tgz", + "integrity": "sha512-z6t9dvSJYaPoQq7quMzdEagSFtpGu+utzHqqxmpVWNNZRIXnvqyCvn9XsTdh7c/w0Bqmdz3RB3YnRaKtpRtEXQ==" + }, "node_modules/parse-link-header": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/parse-link-header/-/parse-link-header-2.0.0.tgz", diff --git a/package.json b/package.json index 5900885..fd1f9ef 100644 --- a/package.json +++ b/package.json @@ -94,6 +94,7 @@ "millify": "^6.1.0", "n3": "^1.17.2", "ora": "^7.0.1", + "parse-duration": "^1.1.0", "pretty-ms": "^8.0.0", "sparqljs": "^3.7.1" } diff --git a/src/lib/Generator.class.ts b/src/lib/Generator.class.ts index a95b23f..3120fa1 100644 --- a/src/lib/Generator.class.ts +++ b/src/lib/Generator.class.ts @@ -9,6 +9,8 @@ import type { Endpoint, QueryEngine } from "./types.js"; import getEngine from '../utils/getEngine.js'; import getEngineSource from '../utils/getEngineSource.js'; import EventEmitter from 'node:events'; +import { setTimeout } from 'node:timers/promises'; +import parse from 'parse-duration' const DEFAULT_BATCH_SIZE = 10 @@ -24,6 +26,7 @@ declare interface Generator { class Generator extends EventEmitter { private readonly query: ConstructQuery; private readonly engine: QueryEngine; + private readonly delay: number | undefined private iterationsProcessed: number = 0 private iterationsIncoming?: number private statements: number = 0 @@ -34,6 +37,12 @@ class Generator extends EventEmitter { if (stage.configuration.generator === undefined) throw new Error('Error in Generator: no generators were present in stage configuration') super() this.index = index + if (stage.configuration.iterator.delay !== undefined){ + console.log('🪵 | file: Generator.class.ts:41 | Generator | constructor | stage.configuration.iterator.delay:', stage.configuration.iterator.delay) + const delay = parse(stage.configuration.iterator.delay) + if (delay === undefined) throw new Error(`Error in stage \`${stage.configuration.name}\`: incorrect delay format was provided.`) + this.delay = delay + } this.query = getSPARQLQuery( stage.configuration.generator[this.index].query, "construct" @@ -95,8 +104,11 @@ class Generator extends EventEmitter { this.emit("error", error(e)) }) this.$thisList.length = 0 + console.log(this.delay) + if (this.delay !== undefined) setTimeout(this.delay).then(() => {}).catch(e => {throw e}) + } } } -} + export default Generator \ No newline at end of file diff --git a/src/lib/LDWorkbenchConfiguration.d.ts b/src/lib/LDWorkbenchConfiguration.d.ts index ff295a2..72e7f4f 100644 --- a/src/lib/LDWorkbenchConfiguration.d.ts +++ b/src/lib/LDWorkbenchConfiguration.d.ts @@ -49,6 +49,10 @@ export interface LDWorkbenchConfiguration { * Overrule the iterator's behaviour of fetching 10 results per request, regardless of any limit's in your query. */ batchSize?: number; + /** + * Human readable delay for the iterator between endpoint requests (e.g. '1s', '2m', '3 hours') + */ + delay?: string; }; /** * @minItems 1 @@ -117,6 +121,10 @@ export interface LDWorkbenchConfiguration { * Overrule the iterator's behaviour of fetching 10 results per request, regardless of any limit's in your query. */ batchSize?: number; + /** + * Human readable delay for the iterator between endpoint requests (e.g. '1s', '2m', '3 hours') + */ + delay?: string; }; /** * @minItems 1 diff --git a/static/example/config.yml b/static/example/config.yml index 098ebe5..99e4156 100644 --- a/static/example/config.yml +++ b/static/example/config.yml @@ -14,6 +14,7 @@ stages: iterator: query: file://static/example/iterator-stage-1.rq endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql + delay: "30 seconds" generator: # First generator - query: file://static/example/generator-stage-1-1.rq diff --git a/static/ld-workbench.schema.json b/static/ld-workbench.schema.json index 166e0fe..ea06b09 100644 --- a/static/ld-workbench.schema.json +++ b/static/ld-workbench.schema.json @@ -47,6 +47,10 @@ "type": "number", "minimum": 1, "description": "Overrule the iterator's behaviour of fetching 10 results per request, regardless of any limit's in your query." + }, + "delay": { + "type": "string", + "description": "Human readable delay for the iterator between endpoint requests (e.g. '1s', '2m', '3 hours')" } } }, From db0201554c01a68a7148704600004c1195d5f98a Mon Sep 17 00:00:00 2001 From: Laurens Rietveld Date: Thu, 4 Jan 2024 20:22:23 +0100 Subject: [PATCH 2/4] probable settimeout fix --- src/lib/Generator.class.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/Generator.class.ts b/src/lib/Generator.class.ts index 3120fa1..8e3c209 100644 --- a/src/lib/Generator.class.ts +++ b/src/lib/Generator.class.ts @@ -9,7 +9,6 @@ import type { Endpoint, QueryEngine } from "./types.js"; import getEngine from '../utils/getEngine.js'; import getEngineSource from '../utils/getEngineSource.js'; import EventEmitter from 'node:events'; -import { setTimeout } from 'node:timers/promises'; import parse from 'parse-duration' const DEFAULT_BATCH_SIZE = 10 @@ -97,18 +96,19 @@ class Generator extends EventEmitter { }) stream.on('end', () => { if (this.iterationsIncoming !== undefined && this.iterationsProcessed >= this.iterationsIncoming) { - this.emit('end', this.iterationsIncoming, this.statements, this.iterationsProcessed) + setTimeout(() => { + this.emit('end', this.iterationsIncoming!, this.statements, this.iterationsProcessed) + }, this.delay ?? 0) + } }) }).catch(e => { this.emit("error", error(e)) }) this.$thisList.length = 0 - console.log(this.delay) - if (this.delay !== undefined) setTimeout(this.delay).then(() => {}).catch(e => {throw e}) - } } } +} export default Generator \ No newline at end of file From f0093be71dd932eca852c0741c07b57fad3b1416 Mon Sep 17 00:00:00 2001 From: Philippe Renzen Date: Fri, 5 Jan 2024 10:59:17 +0100 Subject: [PATCH 3/4] Added feature to delay iterator's endpoint requests (including generator's construct query requests) --- src/lib/Generator.class.ts | 13 +--- src/lib/Iterator.class.ts | 92 +++++++++++++++------------ src/lib/LDWorkbenchConfiguration.d.ts | 4 +- static/example/config.yml | 2 +- static/ld-workbench.schema.json | 2 +- 5 files changed, 55 insertions(+), 58 deletions(-) diff --git a/src/lib/Generator.class.ts b/src/lib/Generator.class.ts index 8e3c209..9643fcd 100644 --- a/src/lib/Generator.class.ts +++ b/src/lib/Generator.class.ts @@ -9,7 +9,6 @@ import type { Endpoint, QueryEngine } from "./types.js"; import getEngine from '../utils/getEngine.js'; import getEngineSource from '../utils/getEngineSource.js'; import EventEmitter from 'node:events'; -import parse from 'parse-duration' const DEFAULT_BATCH_SIZE = 10 @@ -25,7 +24,6 @@ declare interface Generator { class Generator extends EventEmitter { private readonly query: ConstructQuery; private readonly engine: QueryEngine; - private readonly delay: number | undefined private iterationsProcessed: number = 0 private iterationsIncoming?: number private statements: number = 0 @@ -36,12 +34,6 @@ class Generator extends EventEmitter { if (stage.configuration.generator === undefined) throw new Error('Error in Generator: no generators were present in stage configuration') super() this.index = index - if (stage.configuration.iterator.delay !== undefined){ - console.log('🪵 | file: Generator.class.ts:41 | Generator | constructor | stage.configuration.iterator.delay:', stage.configuration.iterator.delay) - const delay = parse(stage.configuration.iterator.delay) - if (delay === undefined) throw new Error(`Error in stage \`${stage.configuration.name}\`: incorrect delay format was provided.`) - this.delay = delay - } this.query = getSPARQLQuery( stage.configuration.generator[this.index].query, "construct" @@ -96,10 +88,7 @@ class Generator extends EventEmitter { }) stream.on('end', () => { if (this.iterationsIncoming !== undefined && this.iterationsProcessed >= this.iterationsIncoming) { - setTimeout(() => { - this.emit('end', this.iterationsIncoming!, this.statements, this.iterationsProcessed) - }, this.delay ?? 0) - + this.emit('end', this.iterationsIncoming, this.statements, this.iterationsProcessed) } }) }).catch(e => { diff --git a/src/lib/Iterator.class.ts b/src/lib/Iterator.class.ts index 7f67f6c..793dcfa 100644 --- a/src/lib/Iterator.class.ts +++ b/src/lib/Iterator.class.ts @@ -10,6 +10,7 @@ import getEndpoint from "../utils/getEndpoint.js"; import type { Endpoint, QueryEngine } from "./types.js"; import getEngine from "../utils/getEngine.js"; import getEngineSource from "../utils/getEngineSource.js"; +import parse from 'parse-duration' const DEFAULT_LIMIT = 10; declare interface Iterator { @@ -26,6 +27,7 @@ class Iterator extends EventEmitter { private readonly query: SelectQuery; public readonly endpoint: Endpoint; private readonly engine: QueryEngine; + private readonly delay: number | undefined private source: string = ""; private $offset = 0; private totalResults = 0; @@ -39,52 +41,58 @@ class Iterator extends EventEmitter { DEFAULT_LIMIT; this.endpoint = getEndpoint(stage); this.engine = getEngine(this.endpoint); + if (stage.configuration.iterator.delay !== undefined){ + const delay = parse(stage.configuration.iterator.delay) + if (delay === undefined) throw new Error(`Error in stage \`${stage.configuration.name}\`: incorrect delay format was provided.`) + this.delay = delay + } } public run(): void { - let resultsPerPage = 0; - if (this.source === "") this.source = getEngineSource(this.endpoint); - this.query.offset = this.$offset; - const queryString = getSPARQLQueryString(this.query); - const error = (e: any): Error => new Error( - `The Iterator did not run succesfully, it could not get the results from the endpoint ${this.source} (offset: ${this.$offset}, limit ${this.query.limit}): ${(e as Error).message}` - ) - this.engine - .queryBindings(queryString, { - sources: [this.source], - }) - .then((stream) => { - stream.on("data", (binding: Bindings) => { - resultsPerPage++; - if (!binding.has("this")) - throw new Error("Missing binding $this in the Iterator result."); - const $this = binding.get("this")!; - if ($this.termType !== "NamedNode") { - throw new Error( - `Binding $this in the Iterator result must be an Iri/NamedNode, but it is of type ${$this.termType}.` - ); - } else { - this.emit("data", $this); - } - }); - - stream.on("end", () => { - this.totalResults += resultsPerPage; - this.$offset += this.query.limit!; - if (resultsPerPage < this.query.limit!) { - this.emit("end", this.totalResults); - } else { - this.run(); - } - }); - - stream.on('error', (e) => { - this.emit("error", error(e)) + setTimeout(() => { + let resultsPerPage = 0; + if (this.source === "") this.source = getEngineSource(this.endpoint); + this.query.offset = this.$offset; + const queryString = getSPARQLQueryString(this.query); + const error = (e: any): Error => new Error( + `The Iterator did not run succesfully, it could not get the results from the endpoint ${this.source} (offset: ${this.$offset}, limit ${this.query.limit}): ${(e as Error).message}` + ) + this.engine + .queryBindings(queryString, { + sources: [this.source], + }) + .then((stream) => { + stream.on("data", (binding: Bindings) => { + resultsPerPage++; + if (!binding.has("this")) + throw new Error("Missing binding $this in the Iterator result."); + const $this = binding.get("this")!; + if ($this.termType !== "NamedNode") { + throw new Error( + `Binding $this in the Iterator result must be an Iri/NamedNode, but it is of type ${$this.termType}.` + ); + } else { + this.emit("data", $this); + } + }); + stream.on("end", () => { + this.totalResults += resultsPerPage; + this.$offset += this.query.limit!; + if (resultsPerPage < this.query.limit!) { + this.emit("end", this.totalResults); + } else { + this.run(); + } + }); + + stream.on('error', (e) => { + this.emit("error", error(e)) + }) }) - }) - .catch((e) => { - this.emit("error", error(e)) - }); + .catch((e) => { + this.emit("error", error(e)) + }); + }, this.delay ?? 0) } } diff --git a/src/lib/LDWorkbenchConfiguration.d.ts b/src/lib/LDWorkbenchConfiguration.d.ts index 72e7f4f..df718c6 100644 --- a/src/lib/LDWorkbenchConfiguration.d.ts +++ b/src/lib/LDWorkbenchConfiguration.d.ts @@ -50,7 +50,7 @@ export interface LDWorkbenchConfiguration { */ batchSize?: number; /** - * Human readable delay for the iterator between endpoint requests (e.g. '1s', '2m', '3 hours') + * Human readable time delay for the iterator's SPARQL endpoint requests - recommended to keep the delay below 1 second (e.g. '5ms', '100 milliseconds', '1s'). */ delay?: string; }; @@ -122,7 +122,7 @@ export interface LDWorkbenchConfiguration { */ batchSize?: number; /** - * Human readable delay for the iterator between endpoint requests (e.g. '1s', '2m', '3 hours') + * Human readable time delay for the iterator's SPARQL endpoint requests - recommended to keep the delay below 1 second (e.g. '5ms', '100 milliseconds', '1s'). */ delay?: string; }; diff --git a/static/example/config.yml b/static/example/config.yml index 99e4156..4df5e01 100644 --- a/static/example/config.yml +++ b/static/example/config.yml @@ -14,7 +14,7 @@ stages: iterator: query: file://static/example/iterator-stage-1.rq endpoint: https://api.triplydb.com/datasets/Triply/iris/services/demo-service/sparql - delay: "30 seconds" + delay: "50ms" generator: # First generator - query: file://static/example/generator-stage-1-1.rq diff --git a/static/ld-workbench.schema.json b/static/ld-workbench.schema.json index ea06b09..001a738 100644 --- a/static/ld-workbench.schema.json +++ b/static/ld-workbench.schema.json @@ -50,7 +50,7 @@ }, "delay": { "type": "string", - "description": "Human readable delay for the iterator between endpoint requests (e.g. '1s', '2m', '3 hours')" + "description": "Human readable time delay for the iterator's SPARQL endpoint requests - recommended to keep the delay below 1 second (e.g. '5ms', '100 milliseconds', '1s'). " } } }, From f7e41b79d82d59f2169750791ed1dd15e099adb3 Mon Sep 17 00:00:00 2001 From: Philippe Renzen Date: Fri, 5 Jan 2024 16:20:03 +0100 Subject: [PATCH 4/4] Adjusted delay description --- src/lib/LDWorkbenchConfiguration.d.ts | 4 ++-- static/ld-workbench.schema.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/LDWorkbenchConfiguration.d.ts b/src/lib/LDWorkbenchConfiguration.d.ts index df718c6..98d01d5 100644 --- a/src/lib/LDWorkbenchConfiguration.d.ts +++ b/src/lib/LDWorkbenchConfiguration.d.ts @@ -50,7 +50,7 @@ export interface LDWorkbenchConfiguration { */ batchSize?: number; /** - * Human readable time delay for the iterator's SPARQL endpoint requests - recommended to keep the delay below 1 second (e.g. '5ms', '100 milliseconds', '1s'). + * Human readable time delay for the iterator's SPARQL endpoint requests (e.g. '5ms', '100 milliseconds', '1s'). */ delay?: string; }; @@ -122,7 +122,7 @@ export interface LDWorkbenchConfiguration { */ batchSize?: number; /** - * Human readable time delay for the iterator's SPARQL endpoint requests - recommended to keep the delay below 1 second (e.g. '5ms', '100 milliseconds', '1s'). + * Human readable time delay for the iterator's SPARQL endpoint requests (e.g. '5ms', '100 milliseconds', '1s'). */ delay?: string; }; diff --git a/static/ld-workbench.schema.json b/static/ld-workbench.schema.json index 001a738..d2deac3 100644 --- a/static/ld-workbench.schema.json +++ b/static/ld-workbench.schema.json @@ -50,7 +50,7 @@ }, "delay": { "type": "string", - "description": "Human readable time delay for the iterator's SPARQL endpoint requests - recommended to keep the delay below 1 second (e.g. '5ms', '100 milliseconds', '1s'). " + "description": "Human readable time delay for the iterator's SPARQL endpoint requests (e.g. '5ms', '100 milliseconds', '1s'). " } } },