diff --git a/__tests__/main-test.ts b/__tests__/main-test.ts index 128248d..d0a44bd 100644 --- a/__tests__/main-test.ts +++ b/__tests__/main-test.ts @@ -2,6 +2,7 @@ import { DataFactory, Parser } from 'n3'; import fs from 'fs'; import path from 'path'; import { write } from '../lib'; +import 'jest-rdf'; async function getQuads(file: string) { const parser = new Parser({ rdfStar: true } as any); @@ -16,10 +17,21 @@ async function getQuads(file: string) { }; } +const loose: Record = { + 'bnodes5.ttl': true, +}; + it('It should correctly write turtle files', async () => { for (const file of fs.readdirSync(path.join(__dirname, '..', 'data'))) { - const { string } = await getQuads(file); - expect(string.replace(/b0_/g, '')).toEqual(fs.readFileSync(path.join(__dirname, '..', 'data', file)).toString()); + const { string, quads } = await getQuads(file); + + if (loose[file]) { + // If loose we only need the quads to match when we re-parse the string + expect((new Parser()).parse(string)).toBeRdfIsomorphic(quads); + } else { + // If not loose we expect an exact string match + expect(string.replace(/b\d+_/g, '')).toEqual(fs.readFileSync(path.join(__dirname, '..', 'data', file)).toString()); + } } }); diff --git a/data/bnodes.ttl b/data/bnodes.ttl index 94ca93c..d0985b1 100644 --- a/data/bnodes.ttl +++ b/data/bnodes.ttl @@ -6,4 +6,4 @@ ex:t a _:b1 . ex:k a _:b1 . -_:b2 a ex:Thing . +[] a ex:Thing . diff --git a/data/bnodes1.ttl b/data/bnodes1.ttl new file mode 100644 index 0000000..1bb1c93 --- /dev/null +++ b/data/bnodes1.ttl @@ -0,0 +1,9 @@ +@prefix ex: . + +ex:j a _:b1 . + +ex:t a _:b1, [] . + +ex:k a _:b1, [] . + +[] a ex:Thing . diff --git a/data/bnodes2.ttl b/data/bnodes2.ttl new file mode 100644 index 0000000..f81e06e --- /dev/null +++ b/data/bnodes2.ttl @@ -0,0 +1,5 @@ +@prefix ex: . + +[] ex:p [ + ex:p2 ex:o + ] . diff --git a/data/bnodes3.ttl b/data/bnodes3.ttl new file mode 100644 index 0000000..3ecd220 --- /dev/null +++ b/data/bnodes3.ttl @@ -0,0 +1,3 @@ +@prefix ex: . + +[] ex:p [], [], ([] []) . diff --git a/data/bnodes4.ttl b/data/bnodes4.ttl new file mode 100644 index 0000000..df5ccd6 --- /dev/null +++ b/data/bnodes4.ttl @@ -0,0 +1,3 @@ +@prefix ex: . + +_:s3 a _:s3 . diff --git a/data/bnodes5.ttl b/data/bnodes5.ttl new file mode 100644 index 0000000..197d4c1 --- /dev/null +++ b/data/bnodes5.ttl @@ -0,0 +1,15 @@ +@prefix ex: . + +[] ex:p [ + ex:p [] + ] . + +_:s1 ex:t ex:k . + +ex:s ex:p _:s1 . + +_:s2 ex:t ex:k . + +_:s3 ex:p _:s2 . + +_:s3 a _:s3 . diff --git a/data/bnodes6.ttl b/data/bnodes6.ttl new file mode 100644 index 0000000..fff03a7 --- /dev/null +++ b/data/bnodes6.ttl @@ -0,0 +1,7 @@ +@prefix : . + +:o :p _:s1 . + +:o1 :p _:s1 . + +_:s1 :p :o . diff --git a/lib/ttlwriter.ts b/lib/ttlwriter.ts index 41f23d4..8a73521 100644 --- a/lib/ttlwriter.ts +++ b/lib/ttlwriter.ts @@ -51,21 +51,63 @@ export class TTLWriter { this.writer.newLine(1); + // First write Named Node subjects for (const subject of this.store.getSubjects(null, null, null)) { if (subject.termType === 'NamedNode') { await this.writeTurtleSubject(subject); } } + // Then write blank node subjects that can be anonymized at the top level for (const subject of this.store.getSubjects(null, null, null)) { - await this.writeTurtleSubject(subject); + if ( + subject.termType === 'BlankNode' + && !this.explicitBnodes.has(subject.value) + // Ensure still in store as subject + && this.store.getQuads(subject, null, null, null).length > 0 + && this.store.getQuads(null, subject, null, null).length === 0 + && this.store.getQuads(null, null, subject, null).length === 0 + ) { + await this.writeTurtleSubject(subject, true); + } + } + + // Next write blank nodes that cannot be anonymized within another set of statements + // (it is not an explicit bnode, + // occurs as the object of one quad, + // and only as the subject in other quads) + for (const subject of this.store.getSubjects(null, null, null)) { + // Ensure still in store as subject + if ( + subject.termType === 'BlankNode' && !( + this.store.getQuads(null, null, subject, null).length !== 1 + || !this.store.getQuads(null, null, subject, null)[0].subject.equals(subject) + ) + ) { + this.explicitBnodes.add(subject.value); + await this.writeTurtleSubject(subject); + } + } + + for (const subject of this.store.getSubjects(null, null, null)) { + // Ensure still in store as subject + if (this.store.getQuads(subject, null, null, null).length > 0) { + if (subject.termType === 'BlankNode') { + this.explicitBnodes.add(subject.value); + } + await this.writeTurtleSubject(subject); + } } this.writer.end(); } - private async writeTurtleSubject(term: Term) { - this.writer.add(await this.termToString(term)); + private async writeTurtleSubject(term: Term, anonymizeSubject = false) { + if (anonymizeSubject) { + this.writer.add('[]'); + } else { + this.writer.add(await this.termToString(term)); + } this.writer.add(' '); this.writer.indent(); await this.writeTurtlePredicates(term); @@ -83,18 +125,17 @@ export class TTLWriter { } } } if (term.termType === 'Literal' && (term.datatypeString === 'http://www.w3.org/2001/XMLSchema#integer' - || term.datatypeString === 'http://www.w3.org/2001/XMLSchema#boolean')) { + || term.datatypeString === 'http://www.w3.org/2001/XMLSchema#boolean')) { return term.value; } if (term.termType === 'Quad') { if (!term.graph.equals(DataFactory.defaultGraph())) { throw new Error('Default graph expected on nested quads'); } - return `<<${await this.termToString(term.subject as any)} ${ - term.predicate.termType === 'NamedNode' - && term.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' - ? 'a' - : await this.termToString(term.predicate as any)} ${await this.termToString(term.object as any)}>>`; + return `<<${await this.termToString(term.subject as any)} ${term.predicate.termType === 'NamedNode' + && term.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' + ? 'a' + : await this.termToString(term.predicate as any)} ${await this.termToString(term.object as any)}>>`; } return termToString(term); } @@ -146,11 +187,11 @@ export class TTLWriter { const nonBlankObjects: Term[] = []; for (const object of objects) { if (object.termType === 'BlankNode' - && [ - ...this.store.match(null, null, object), - ...this.store.match(null, object, null), - ].length === 0 - && !this.explicitBnodes.has(object.value) + && [ + ...this.store.match(null, null, object), + ...this.store.match(null, object, null), + ].length === 0 + && !this.explicitBnodes.has(object.value) ) { blankObjects.push(object); } else { @@ -176,11 +217,13 @@ export class TTLWriter { } if (!(await this.writeList(blank))) { this.writer.add('['); - this.writer.indent(); - this.writer.newLine(1); - await this.writeTurtlePredicates(blank); - this.writer.deindent(); - this.writer.newLine(1); + if (this.store.getQuads(blank, null, null, null).length > 0) { + this.writer.indent(); + this.writer.newLine(1); + await this.writeTurtlePredicates(blank); + this.writer.deindent(); + this.writer.newLine(1); + } this.writer.add(']'); } } diff --git a/package-lock.json b/package-lock.json index 01b0a6f..83d7958 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,6 +21,7 @@ "eslint-config-airbnb-base": "^15.0.0", "eslint-plugin-import": "^2.27.5", "jest": "^29.3.1", + "jest-rdf": "^1.8.0", "pre-commit": "^1.2.2", "semantic-release": "^20.0.2", "ts-jest": "^29.0.5", @@ -4166,6 +4167,16 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/hash.js": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz", + "integrity": "sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==", + "dev": true, + "dependencies": { + "inherits": "^2.0.3", + "minimalistic-assert": "^1.0.1" + } + }, "node_modules/hook-std": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/hook-std/-/hook-std-3.0.0.tgz", @@ -5160,6 +5171,18 @@ } } }, + "node_modules/jest-rdf": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/jest-rdf/-/jest-rdf-1.8.0.tgz", + "integrity": "sha512-eQFYrRy7XTADZRVwxj21gBTkW9hRaU32eNz1aNl0F8vzsr83NeO3lbmIS5ldU6fkmRhZttET/GzEq0z9PDe8rA==", + "dev": true, + "dependencies": { + "@rdfjs/types": "*", + "rdf-isomorphic": "^1.3.0", + "rdf-string": "^1.6.0", + "rdf-terms": "^1.9.1" + } + }, "node_modules/jest-regex-util": { "version": "29.2.0", "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.2.0.tgz", @@ -5900,6 +5923,12 @@ "node": ">=4" } }, + "node_modules/minimalistic-assert": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz", + "integrity": "sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==", + "dev": true + }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -9355,6 +9384,18 @@ "@rdfjs/types": "*" } }, + "node_modules/rdf-isomorphic": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/rdf-isomorphic/-/rdf-isomorphic-1.3.1.tgz", + "integrity": "sha512-6uIhsXTVp2AtO6f41PdnRV5xZsa0zVZQDTBdn0br+DZuFf5M/YD+T6m8hKDUnALI6nFL/IujTMLgEs20MlNidQ==", + "dev": true, + "dependencies": { + "@rdfjs/types": "*", + "hash.js": "^1.1.7", + "rdf-string": "^1.6.0", + "rdf-terms": "^1.7.0" + } + }, "node_modules/rdf-js": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/rdf-js/-/rdf-js-4.0.2.tgz", @@ -9364,6 +9405,16 @@ "@rdfjs/types": "*" } }, + "node_modules/rdf-string": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/rdf-string/-/rdf-string-1.6.2.tgz", + "integrity": "sha512-tr0aStKYRmT6ShmGsA4HikIn6O3ZkCBSLWsRbeKhlPVPZodl0QNuws6HuJdD1rUyo9+MNiDw+3wvFSUz6Iwv/g==", + "dev": true, + "dependencies": { + "@rdfjs/types": "*", + "rdf-data-factory": "^1.1.0" + } + }, "node_modules/rdf-string-ttl": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/rdf-string-ttl/-/rdf-string-ttl-1.3.2.tgz", @@ -9373,6 +9424,17 @@ "rdf-data-factory": "^1.1.0" } }, + "node_modules/rdf-terms": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/rdf-terms/-/rdf-terms-1.9.1.tgz", + "integrity": "sha512-GrE8CbQSvuVEFRCywMu6VOgV1AFE6X+nFYcAhEc5pwYKI13bUvz4voiVufQiy3V8rzQKu21Sgl+dS2qcJavy7w==", + "dev": true, + "dependencies": { + "@rdfjs/types": "*", + "rdf-data-factory": "^1.1.0", + "rdf-string": "^1.6.0" + } + }, "node_modules/react-is": { "version": "18.2.0", "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz", diff --git a/package.json b/package.json index 2634c50..068dac3 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "eslint-config-airbnb-base": "^15.0.0", "eslint-plugin-import": "^2.27.5", "jest": "^29.3.1", + "jest-rdf": "^1.8.0", "pre-commit": "^1.2.2", "semantic-release": "^20.0.2", "ts-jest": "^29.0.5",