Skip to content

Commit

Permalink
Emit only valid N-Quads from toRdf.
Browse files Browse the repository at this point in the history
- Check for valid language format.
- Check for valid subject, predicate, object, and datatype IRIs.
- Drop invalid N-Quads.
  • Loading branch information
davidlehn authored and gkellogg committed Jan 14, 2020
1 parent 5800546 commit 2b4e245
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@
- Use rdf-canonize to compare n-quads test results.
- Maintain multiple graphs.
- Sort `@type` when looking for scoped contexts.
- Emit only valid N-Quads from toRdf.
- **Note**: This could have a performance impact.

### Changed
- Use JSON-LD WG tests.
Expand Down
34 changes: 34 additions & 0 deletions lib/toRdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ const {
isAbsolute: _isAbsoluteIri
} = require('./url');

const _HEX = '[0-9A-Fa-f]';
const _UCHAR = '\\u' + _HEX + '{4}|\\U' + _HEX + '{8}';
const IRIREF_RE = new RegExp('^([^\\x00-\\x20<>"{}|^`\\\\]|' + _UCHAR + ')*$');
const LANG_RE = /^[a-zA-Z]+(-[a-zA-Z0-9]+)*$/;

const api = {};
module.exports = api;

Expand All @@ -58,6 +63,11 @@ api.toRDF = (input, options) => {
if(graphName === '@default') {
graphTerm = {termType: 'DefaultGraph', value: ''};
} else if(_isAbsoluteIri(graphName)) {
// invalid graph IRI
if(!IRIREF_RE.test(graphName)) {
continue;
}

if(graphName.startsWith('_:')) {
graphTerm = {termType: 'BlankNode'};
} else {
Expand Down Expand Up @@ -110,6 +120,11 @@ function _graphToRDF(dataset, graph, graphTerm, issuer, options) {
continue;
}

// invalid subject IRI
if(!IRIREF_RE.test(id)) {
continue;
}

// RDF predicate
const predicate = {
termType: property.startsWith('_:') ? 'BlankNode' : 'NamedNode',
Expand All @@ -121,6 +136,11 @@ function _graphToRDF(dataset, graph, graphTerm, issuer, options) {
continue;
}

// invalid predicate IRI
if(!IRIREF_RE.test(property)) {
continue;
}

// skip blank node predicates unless producing generalized RDF
if(predicate.termType === 'BlankNode' &&
!options.produceGeneralizedRdf) {
Expand Down Expand Up @@ -226,6 +246,11 @@ function _objectToRDF(item, issuer, dataset, graphTerm) {
let value = item['@value'];
const datatype = item['@type'] || null;

// invalid datatype IRI
if(datatype && !IRIREF_RE.test(datatype)) {
return null;
}

// convert to XSD/JSON datatypes as appropriate
if(datatype === '@json') {
object.value = jsonCanonicalize(value);
Expand All @@ -244,6 +269,9 @@ function _objectToRDF(item, issuer, dataset, graphTerm) {
object.value = value.toFixed(0);
object.datatype.value = datatype || XSD_INTEGER;
} else if('@language' in item) {
if(!LANG_RE.test(item['@language'])) {
return null;
}
object.value = value;
object.datatype.value = datatype || RDF_LANGSTRING;
object.language = item['@language'];
Expand All @@ -258,6 +286,12 @@ function _objectToRDF(item, issuer, dataset, graphTerm) {
} else {
// convert string/node object to RDF
const id = types.isObject(item) ? item['@id'] : item;

// invalid object IRI
if(!IRIREF_RE.test(id)) {
return null;
}

object.termType = id.startsWith('_:') ? 'BlankNode' : 'NamedNode';
object.value = id;
}
Expand Down

0 comments on commit 2b4e245

Please sign in to comment.