-
Notifications
You must be signed in to change notification settings - Fork 198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Load html #347
base: main
Are you sure you want to change the base?
Load html #347
Changes from all commits
d25c882
7730b10
59da13f
bdc9343
7b29cb3
a41f081
03c4834
5fa1b2a
5315d67
31f192d
9d5ec19
2664ac7
868dae6
e38ba63
9a9bbdb
7cc3d1c
66a2092
11b68b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
const canonize = require('rdf-canonize'); | ||
const contentType = require('content-type'); | ||
const util = require('./util'); | ||
const ContextResolver = require('./ContextResolver'); | ||
const IdentifierIssuer = util.IdentifierIssuer; | ||
|
@@ -42,6 +43,7 @@ const LRU = require('lru-cache'); | |
const NQuads = require('./NQuads'); | ||
const Rdfa = require('./Rdfa'); | ||
|
||
const {prependBase: _prependBase} = require('./url'); | ||
const {expand: _expand} = require('./expand'); | ||
const {flatten: _flatten} = require('./flatten'); | ||
const {fromRDF: _fromRDF} = require('./fromRdf'); | ||
|
@@ -378,6 +380,7 @@ jsonld.flatten = async function(input, ctx, options) { | |
// set default options | ||
options = _setDefaults(options, { | ||
base: _isString(input) ? input : '', | ||
extractAllScripts: true, | ||
contextResolver: new ContextResolver( | ||
{sharedCache: _resolvedContextCache}) | ||
}); | ||
|
@@ -663,6 +666,7 @@ jsonld.toRDF = async function(input, options) { | |
// set default options | ||
options = _setDefaults(options, { | ||
base: _isString(input) ? input : '', | ||
extractAllScripts: true, | ||
skipExpansion: false, | ||
contextResolver: new ContextResolver( | ||
{sharedCache: _resolvedContextCache}) | ||
|
@@ -862,6 +866,9 @@ jsonld.documentLoader = async url => { | |
* @param url the URL to fetch. | ||
* @param [options] the options to use: | ||
* [documentLoader] the document loader to use. | ||
* [extractAllScripts] concatenates all matching script elements. | ||
* [profile] used when selecting from HTML script elements. | ||
* [requestProfile] one or more profile IRIs to use in the request. | ||
* | ||
* @return a Promise that resolves to the retrieved remote document. | ||
*/ | ||
|
@@ -873,7 +880,10 @@ jsonld.get = async function(url, options) { | |
load = jsonld.documentLoader; | ||
} | ||
|
||
const remoteDoc = await load(url); | ||
// FIXME: unescape frag? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if this is necessary; it's not to pass tests. |
||
const [reference, frag] = url.split('#', 2); | ||
|
||
const remoteDoc = await load(reference, options); | ||
|
||
try { | ||
if(!remoteDoc.document) { | ||
|
@@ -882,9 +892,74 @@ jsonld.get = async function(url, options) { | |
'jsonld.NullRemoteDocument'); | ||
} | ||
if(_isString(remoteDoc.document)) { | ||
remoteDoc.document = JSON.parse(remoteDoc.document); | ||
if(remoteDoc.contentType && remoteDoc.contentType.includes('text/html')) { | ||
const domParser = new jsonld.domParser(); | ||
const dom = domParser.parseFromString(remoteDoc.document); | ||
|
||
// Use any document base | ||
const baseElem = dom.getElementsByTagName('base'); | ||
if(baseElem.length > 0) { | ||
const href = baseElem[0].getAttribute('href'); | ||
options.base = _prependBase(options.base || reference, href); | ||
} | ||
|
||
const scripts = dom.getElementsByTagName('script'); | ||
remoteDoc.document = []; | ||
|
||
for(let i = 0; i < scripts.length; i++) { | ||
const script = scripts[i]; | ||
// only application/ld+json | ||
const {type} = contentType.parse(script.getAttribute('type')); | ||
if(type !== 'application/ld+json') { | ||
continue; | ||
} | ||
if(!script.getAttribute('type').startsWith('application/ld+json')) { | ||
continue; | ||
} | ||
// If url has a fragment identifier, only matching scripts | ||
if(frag && script.getAttribute('id') !== frag) { | ||
continue; | ||
} | ||
try { | ||
remoteDoc.document.push(JSON.parse(script.textContent)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWLIW, I threw together a JSBin that uses example 10 to test parsing via jsonld.js 2.0.2: There aren't any entity decoding issues, so I think this issue is probably a bug in Here's another one which works using jsdom: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI, I use htmlparser2 in my implementation, and it doesn't seem to be decoding entities, so it allows all tests to pass. |
||
} catch(e) { | ||
throw new JsonLdError( | ||
'Illegal script content.', | ||
'jsonld.InvalidScriptElement', { | ||
code: 'invalid script element', | ||
remoteDoc | ||
}); | ||
} | ||
} | ||
if(frag && remoteDoc.document.length === 0) { | ||
throw new JsonLdError( | ||
`No script tag found with id=${frag}.`, | ||
'jsonld.InvalidScriptElement', { | ||
code: 'loading document failed', | ||
remoteDoc | ||
}); | ||
} | ||
if(frag || !options.extractAllScripts) { | ||
if(!remoteDoc.document[0]) { | ||
throw new JsonLdError( | ||
`No script tag found.`, | ||
'jsonld.InvalidScriptElement', { | ||
code: 'loading document failed', | ||
remoteDoc | ||
}); | ||
} | ||
remoteDoc.document = remoteDoc.document[0]; | ||
} | ||
} else { | ||
remoteDoc.document = JSON.parse(remoteDoc.document); | ||
} | ||
} | ||
} catch(e) { | ||
if(e.name === 'jsonld.InvalidScriptElement') { | ||
// pass error detected in HTML decode | ||
throw (e); | ||
} | ||
// otherwise, general loading error | ||
throw new JsonLdError( | ||
'Could not retrieve a JSON-LD document from the URL.', | ||
'jsonld.LoadDocumentError', { | ||
|
@@ -942,6 +1017,27 @@ jsonld.documentLoaders = {}; | |
jsonld.documentLoaders.node = require('./documentLoaders/node'); | ||
jsonld.documentLoaders.xhr = require('./documentLoaders/xhr'); | ||
|
||
// Optional DOM parser | ||
try { | ||
jsonld.domParser = require('xmldom').DOMParser || class NoDOMParser { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @davidlehn -- can you comment here? I don't think we can easily support this pattern with webpack. Can you suggest an alternative path forward? Instead of a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
parseFromString() { | ||
throw new JsonLdError( | ||
'Could not parse HTML document. ' + | ||
'HTML parsing not implemented.', 'jsonld.LoadDocumentError', | ||
{code: 'loading document failed'}); | ||
} | ||
}; | ||
} catch(e) { | ||
jsonld.domParser = class NoDOMParser { | ||
parseFromString() { | ||
throw new JsonLdError( | ||
'Could not parse HTML document. ' + | ||
'HTML parsing not implemented.', 'jsonld.LoadDocumentError', | ||
{code: 'loading document failed'}); | ||
} | ||
}; | ||
} | ||
|
||
/** | ||
* Assigns the default document loader for external document URLs to a built-in | ||
* default. Supported types currently include: 'xhr' and 'node'. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ const REGEX_LINK_HEADER = /\s*<([^>]*?)>\s*(?:;\s*(.*))?/; | |
const REGEX_LINK_HEADER_PARAMS = | ||
/(.*?)=(?:(?:"([^"]*?)")|([^"]*?))\s*(?:(?:;\s*)|$)/g; | ||
|
||
// FIXME: conditinally support text/html | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With HTML support, we should include |
||
const DEFAULTS = { | ||
headers: { | ||
accept: 'application/ld+json, application/json' | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There don't seem to be standalone XHR documentLoader tests, as there are node tests.