This repository has been archived by the owner on Jul 3, 2024. It is now read-only.
forked from StackExchange/pagedown
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Markdown.Sanitizer.js
102 lines (86 loc) · 3.3 KB
/
Markdown.Sanitizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
(function () {
var output, Converter;
if (typeof exports === "object" && typeof require === "function") { // we're in a CommonJS (e.g. Node.js) module
output = exports;
Converter = require("./Markdown.Converter").Converter;
} else {
output = window.Markdown;
Converter = output.Converter;
}
output.getSanitizingConverter = function () {
var converter = new Converter();
converter.hooks.chain("postConversion", sanitizeHtml);
converter.hooks.chain("postConversion", balanceTags);
return converter;
}
function sanitizeHtml(html) {
return html.replace(/<[^>]*>?/gi, sanitizeTag);
}
// (tags that can be opened/closed) | (tags that stand alone)
var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|em|h1|h2|h3|i|li|ol(?: start="\d+")?|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
function sanitizeTag(tag) {
if (tag.match(basic_tag_whitelist))
return tag;
return "";
}
/// <summary>
/// attempt to balance HTML tags in the html string
/// by removing any unmatched opening or closing tags
/// IMPORTANT: we *assume* HTML has *already* been
/// sanitized and is safe/sane before balancing!
///
/// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
/// </summary>
function balanceTags(html) {
if (html == "")
return "";
var re = /<\/?\w+[^>]*(\s|$|>)/g;
// convert everything to lower case; this makes
// our case insensitive comparisons easier
var tags = html.toLowerCase().match(re);
// no HTML tags present? nothing to do; exit now
var tagcount = (tags || []).length;
if (tagcount == 0)
return html;
var tagname, tag;
var ignoredtags = "<p><img><br><li><hr>";
var match;
var tagpaired = [];
var tagremove = [];
var needsRemoval = false;
// loop through matched tags in forward order
for (var ctag = 0; ctag < tagcount; ctag++) {
tagname = tags[ctag].replace(/<\/?(\w+).*/, "$1");
// skip any already paired tags
// and skip tags in our ignore list; assume they're self-closed
if (tagpaired[ctag] || ignoredtags.search("<" + tagname + ">") > -1)
continue;
tag = tags[ctag];
match = -1;
if (!/^<\//.test(tag)) {
// this is an opening tag
// search forwards (next tags), look for closing tags
for (var ntag = ctag + 1; ntag < tagcount; ntag++) {
if (!tagpaired[ntag] && tags[ntag] == "</" + tagname + ">") {
match = ntag;
break;
}
}
}
if (match == -1)
needsRemoval = tagremove[ctag] = true; // mark for removal
else
tagpaired[match] = true; // mark paired
}
if (!needsRemoval)
return html;
// delete all orphaned tags from the string
var ctag = 0;
html = html.replace(re, function (match) {
var res = tagremove[ctag] ? "" : match;
ctag++;
return res;
});
return html;
}
})();