Skip to content

Commit

Permalink
fix: allow more meta
Browse files Browse the repository at this point in the history
  • Loading branch information
maxakuru committed Oct 10, 2024
1 parent a2319d1 commit a87b377
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 6 deletions.
59 changes: 53 additions & 6 deletions src/html2md.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ import {
} from './mdast-table-handler.js';
import formatPlugin from './markdownFormatPlugin.js';

const HELIX_META = {
viewport: true,
};

function m(type, children, props = {}) {
return {
type,
Expand All @@ -53,10 +57,6 @@ function image(url) {
};
}

const HELIX_META = new Set(Array.from([
'viewport',
]));

function toGridTable(title, data) {
return m(TYPE_GRID_TABLE, [
m(
Expand All @@ -76,6 +76,11 @@ function toGridTable(title, data) {
]);
}

/**
* @param {string} str
* @returns {string}
* @throws {Error}
*/
function assertValidJSON(str) {
try {
return JSON.stringify(JSON.parse(str.trim()));
Expand All @@ -84,13 +89,53 @@ function assertValidJSON(str) {
}
}

/**
* @param {string} str
* @param {number} [limit]
* @returns {string}
* @throws {Error}
*/
function assertMetaSizeLimit(str, limit = 128_000) {
if (str && str.length > limit) {
throw Error('metadata size limit exceeded');
}
return str;
}

/**
* Check if meta name is allowed:
* - non-reserved
* - not starting with 'twitter:'
* - except 'twitter:label' and 'twitter:data'
* @param {string} name
* @returns {boolean}
*/
function isAllowedMetaName(name) {
if (typeof name !== 'string') {
return false;
}
return !HELIX_META[name] && (
!name.startsWith('twitter:')
|| name.startsWith('twitter:label')
|| name.startsWith('twitter:data')
);
}

/**
* Check if meta property is allowed:
* - non-reserved
* - og:type
* - product:*
* @param {string|undefined} property
* @returns {boolean}
*/
function isAllowedMetaProperty(property) {
if (typeof property !== 'string') {
return false;
}
return !HELIX_META[property] && (property.startsWith('product:') || property === 'og:type');
}

function addMetadata(hast, mdast) {
const meta = new Map();

Expand All @@ -99,13 +144,15 @@ function addMetadata(hast, mdast) {
if (child.tagName === 'title') {
meta.set(text('title'), text(assertMetaSizeLimit(toString(child))));
} else if (child.tagName === 'meta') {
const { name, content } = child.properties;
if (name && !HELIX_META.has(name) && !name.startsWith('twitter:')) {
const { name, property, content } = child.properties;
if (isAllowedMetaName(name)) {
if (name === 'image') {
meta.set(text(name), image(assertMetaSizeLimit(content)));
} else {
meta.set(text(name), text(assertMetaSizeLimit(content)));
}
} else if (isAllowedMetaProperty(property)) {
meta.set(text(property), text(assertMetaSizeLimit(content)));
}
} else if (child.tagName === 'script' && child.properties.type === 'application/ld+json') {
const str = assertMetaSizeLimit(assertValidJSON(toString(child)));
Expand Down
25 changes: 25 additions & 0 deletions test/fixtures/meta-tags.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<html>

<head>
<meta property="og:type" content="product">
<meta property="product:availability" content="${inStock ? 'In stock' : 'Out of stock'}">
<meta property="product:price.amount" content="${prices.final.amount}">
<meta property="product:price.currency" content="${prices.final.currency}">
<meta property="invalid" content="foo">
<meta property="og:ignored" content="bar">
<meta name="twitter:label1" content="Price">
<meta name="twitter:data1" content="$3">
<meta name="twitter:label2" content="Availability">
<meta name="twitter:data2" content="In stock">
<meta name="twitter:image" content="this-is-removed">
</head>

<body>
<main>
<div>
<h1>Hello, World.</h1>
</div>
</main>
</body>

</html>
21 changes: 21 additions & 0 deletions test/fixtures/meta-tags.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Hello, World.

+-------------------------------------------------------------------+
| Metadata |
+------------------------+------------------------------------------+
| og:type | product |
+------------------------+------------------------------------------+
| product:availability | ${inStock ? 'In stock' : 'Out of stock'} |
+------------------------+------------------------------------------+
| product:price.amount | ${prices.final.amount} |
+------------------------+------------------------------------------+
| product:price.currency | ${prices.final.currency} |
+------------------------+------------------------------------------+
| twitter:label1 | Price |
+------------------------+------------------------------------------+
| twitter:data1 | $3 |
+------------------------+------------------------------------------+
| twitter:label2 | Availability |
+------------------------+------------------------------------------+
| twitter:data2 | In stock |
+------------------------+------------------------------------------+
4 changes: 4 additions & 0 deletions test/html2md.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ describe('html2md Tests', () => {
it('throws meaningful error when json-ld is too large', async () => {
await assert.rejects(() => test('json-ld-too-large'), Error('metadata size limit exceeded'));
});

it('convert a document with meta names and properties correctly', async () => {
await test('meta-tags');
});
});

describe('className to block type tests', () => {
Expand Down

0 comments on commit a87b377

Please sign in to comment.