diff --git a/src/html2md.js b/src/html2md.js index b6a2541e..50d4aef7 100644 --- a/src/html2md.js +++ b/src/html2md.js @@ -76,8 +76,19 @@ function toGridTable(title, data) { ]); } -function validateJSON(str) { - return JSON.stringify(JSON.parse(str.trim())); +function assertValidJSON(str) { + try { + return JSON.stringify(JSON.parse(str.trim())); + } catch { + throw Error('invalid json-ld'); + } +} + +function assertMetaSizeLimit(str, limit = 128_000) { + if (str && str.length > limit) { + throw Error('metadata size limit exceeded'); + } + return str; } function addMetadata(hast, mdast) { @@ -86,22 +97,19 @@ function addMetadata(hast, mdast) { const head = select('head', hast); for (const child of head.children) { if (child.tagName === 'title') { - meta.set(text('title'), text(toString(child))); + meta.set(text('title'), text(assertMetaSizeLimit(toString(child)))); } else if (child.tagName === 'meta') { const { name, content } = child.properties; if (name && !HELIX_META.has(name) && !name.startsWith('twitter:')) { if (name === 'image') { - meta.set(text(name), image(content)); + meta.set(text(name), image(assertMetaSizeLimit(content))); } else { - meta.set(text(name), text(content)); + meta.set(text(name), text(assertMetaSizeLimit(content))); } } } else if (child.tagName === 'script' && child.properties.type === 'application/ld+json') { - try { - meta.set(text('json-ld'), text(validateJSON(toString(child)))); - } catch { - throw Error('invalid json-ld'); - } + const str = assertMetaSizeLimit(assertValidJSON(toString(child))); + meta.set(text('json-ld'), text(str)); } } diff --git a/test/fixtures/json-ld-too-large.html b/test/fixtures/json-ld-too-large.html new file mode 100644 index 00000000..bcf93c83 --- /dev/null +++ b/test/fixtures/json-ld-too-large.html @@ -0,0 +1,17 @@ + + +
+ + + + +