99 lines
3.7 KiB
JavaScript
99 lines
3.7 KiB
JavaScript
(function() {
|
|
var output, Converter;
|
|
if (typeof exports === 'object' && typeof require === 'function') { // we're in a CommonJS (e.g. Node.js) module
|
|
output = exports;
|
|
Converter = require('./Markdown.Converter').Converter;
|
|
} else {
|
|
output = window.Markdown;
|
|
Converter = output.Converter;
|
|
}
|
|
|
|
output.getSanitizingConverter = function() {
|
|
var converter = new Converter();
|
|
converter.hooks.chain('postConversion', sanitizeHtml);
|
|
converter.hooks.chain('postConversion', balanceTags);
|
|
return converter;
|
|
};
|
|
|
|
function sanitizeHtml(html) {
|
|
return html.replace(/<[^>]*>?/gi, sanitizeTag);
|
|
}
|
|
|
|
// (tags that can be opened/closed) | (tags that stand alone)
|
|
var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
|
|
// <a href="url..." optional title>|</a>
|
|
var a_white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;
|
|
|
|
// <img src="url..." optional width optional height optional alt optional title
|
|
var img_white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;
|
|
|
|
function sanitizeTag(tag) {
|
|
if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white)) { return tag; } else { return ''; }
|
|
}
|
|
|
|
// / <summary>
|
|
// / attempt to balance HTML tags in the html string
|
|
// / by removing any unmatched opening or closing tags
|
|
// / IMPORTANT: we *assume* HTML has *already* been
|
|
// / sanitized and is safe/sane before balancing!
|
|
// /
|
|
// / adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
|
|
// / </summary>
|
|
function balanceTags(html) {
|
|
if (html == '') { return ''; }
|
|
|
|
var re = /<\/?\w+[^>]*(\s|$|>)/g;
|
|
// convert everything to lower case; this makes
|
|
// our case insensitive comparisons easier
|
|
var tags = html.toLowerCase().match(re);
|
|
|
|
// no HTML tags present? nothing to do; exit now
|
|
var tagcount = (tags || []).length;
|
|
if (tagcount == 0) { return html; }
|
|
|
|
var tagname, tag;
|
|
var ignoredtags = '<p><img><br><li><hr>';
|
|
var match;
|
|
var tagpaired = [];
|
|
var tagremove = [];
|
|
var needsRemoval = false;
|
|
|
|
// loop through matched tags in forward order
|
|
for (var ctag = 0; ctag < tagcount; ctag++) {
|
|
tagname = tags[ctag].replace(/<\/?(\w+).*/, '$1');
|
|
// skip any already paired tags
|
|
// and skip tags in our ignore list; assume they're self-closed
|
|
if (tagpaired[ctag] || ignoredtags.search('<' + tagname + '>') > -1) { continue; }
|
|
|
|
tag = tags[ctag];
|
|
match = -1;
|
|
|
|
if (!/^<\//.test(tag)) {
|
|
// this is an opening tag
|
|
// search forwards (next tags), look for closing tags
|
|
for (var ntag = ctag + 1; ntag < tagcount; ntag++) {
|
|
if (!tagpaired[ntag] && tags[ntag] == '</' + tagname + '>') {
|
|
match = ntag;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (match == -1) { needsRemoval = tagremove[ctag] = true; } // mark for removal
|
|
else { tagpaired[match] = true; } // mark paired
|
|
}
|
|
|
|
if (!needsRemoval) { return html; }
|
|
|
|
// delete all orphaned tags from the string
|
|
|
|
var ctag = 0;
|
|
html = html.replace(re, function(match) {
|
|
var res = tagremove[ctag] ? '' : match;
|
|
ctag++;
|
|
return res;
|
|
});
|
|
return html;
|
|
}
|
|
}());
|