Deprecate HTML auto-merging from core (plugin may be needed)
See original GitHub issueBackground from isagalaev in #2529:
I was walking by, and while I didn’t read through the entire thing I wanted to say this. There were a few features added to the core at users’ requests when it was all small an nimble. These days I would say it makes much more sense to be opinionated and remove corner cases like useBR, tabWidth and several different spellings of “lang-”, “language-” as language prefixes in the class name (as far as I remember there is a very precise recommendation in HTML5 to use “language-”). Everyone with special cases would be expected to do pre- or post-processing.
Very early on we were perhaps a bit too open and allowed some things to slip into core that perhaps should not have - and today they are all things that could easily be handled with our new plugin API.
HTML merging is a large swath of complexity (see utils.js
) that a few use for a corner case like:
var x;
<span class="important">var y;</span>
Which will be highlighted as (the HTML “passes thru”):
<span class="keyword">var</span> x;
<span class="important"><span class="keyword">var</span> y;</span>
Sadly, this feature also makes it impossible for us to detect HTML injection type mistakes (which we could otherwise detect and warn about)… #2886 HTML (generally) should not exist inside a code block - it should always be escaped. Its existence possibly indicates an HTML injection style vulnerability. If we remove this final edge case for HTML then we can inform users about the potential vulnerability vs letting them potentially shoot themselves in the foot.
This can now be easily achieved via a “after:highlightBlock” plugin and that’s what should happen for anyone who absolutely needs this functionality.
- v10.5 Deprecate HTML merging (#2873)
- Someone may want to provide a plugin
- v11 Remove HTML merging
You can restore this functionality via a plugin. Below is plugin that we shipped as part of our own source briefly after this was ported to a plugin to make later extraction easy.
A maintainer (someone who wants to package this up as an official plugin and maintain it) is needed.
Usage:
hljs.addPlugin(mergeHTMLPlugin);
Plugin source:
var mergeHTMLPlugin = (function () {
'use strict';
var originalStream;
/**
* @param {string} value
* @returns {string}
*/
function escapeHTML(value) {
return value
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}
/* plugin itself */
/** @type {HLJSPlugin} */
const mergeHTMLPlugin = {
// preserve the original HTML token stream
"before:highlightElement": ({ el }) => {
originalStream = nodeStream(el);
},
// merge it afterwards with the highlighted token stream
"after:highlightElement": ({ el, result, text }) => {
if (!originalStream.length) return;
const resultNode = document.createElement('div');
resultNode.innerHTML = result.value;
result.value = mergeStreams(originalStream, nodeStream(resultNode), text);
el.innerHTML = result.value;
}
};
/* Stream merging support functions */
/**
* @typedef Event
* @property {'start'|'stop'} event
* @property {number} offset
* @property {Node} node
*/
/**
* @param {Node} node
*/
function tag(node) {
return node.nodeName.toLowerCase();
}
/**
* @param {Node} node
*/
function nodeStream(node) {
/** @type Event[] */
const result = [];
(function _nodeStream(node, offset) {
for (let child = node.firstChild; child; child = child.nextSibling) {
if (child.nodeType === 3) {
offset += child.nodeValue.length;
} else if (child.nodeType === 1) {
result.push({
event: 'start',
offset: offset,
node: child
});
offset = _nodeStream(child, offset);
// Prevent void elements from having an end tag that would actually
// double them in the output. There are more void elements in HTML
// but we list only those realistically expected in code display.
if (!tag(child).match(/br|hr|img|input/)) {
result.push({
event: 'stop',
offset: offset,
node: child
});
}
}
}
return offset;
})(node, 0);
return result;
}
/**
* @param {any} original - the original stream
* @param {any} highlighted - stream of the highlighted source
* @param {string} value - the original source itself
*/
function mergeStreams(original, highlighted, value) {
let processed = 0;
let result = '';
const nodeStack = [];
function selectStream() {
if (!original.length || !highlighted.length) {
return original.length ? original : highlighted;
}
if (original[0].offset !== highlighted[0].offset) {
return (original[0].offset < highlighted[0].offset) ? original : highlighted;
}
/*
To avoid starting the stream just before it should stop the order is
ensured that original always starts first and closes last:
if (event1 == 'start' && event2 == 'start')
return original;
if (event1 == 'start' && event2 == 'stop')
return highlighted;
if (event1 == 'stop' && event2 == 'start')
return original;
if (event1 == 'stop' && event2 == 'stop')
return highlighted;
... which is collapsed to:
*/
return highlighted[0].event === 'start' ? original : highlighted;
}
/**
* @param {Node} node
*/
function open(node) {
/** @param {Attr} attr */
function attributeString(attr) {
return ' ' + attr.nodeName + '="' + escapeHTML(attr.value) + '"';
}
// @ts-ignore
result += '<' + tag(node) + [].map.call(node.attributes, attributeString).join('') + '>';
}
/**
* @param {Node} node
*/
function close(node) {
result += '</' + tag(node) + '>';
}
/**
* @param {Event} event
*/
function render(event) {
(event.event === 'start' ? open : close)(event.node);
}
while (original.length || highlighted.length) {
let stream = selectStream();
result += escapeHTML(value.substring(processed, stream[0].offset));
processed = stream[0].offset;
if (stream === original) {
/*
On any opening or closing tag of the original markup we first close
the entire highlighted node stack, then render the original tag along
with all the following original tags at the same offset and then
reopen all the tags on the highlighted stack.
*/
nodeStack.reverse().forEach(close);
do {
render(stream.splice(0, 1)[0]);
stream = selectStream();
} while (stream === original && stream.length && stream[0].offset === processed);
nodeStack.reverse().forEach(open);
} else {
if (stream[0].event === 'start') {
nodeStack.push(stream[0].node);
} else {
nodeStack.pop();
}
render(stream.splice(0, 1)[0]);
}
}
return result + escapeHTML(value.substr(processed));
}
return mergeHTMLPlugin;
}());
This was built with:
rollup src/plugins/merge_html.js -f iife --output.name=mergeHTMLPlugin
After changing the source to have a single default export.
Issue Analytics
- State:
- Created 3 years ago
- Comments:36 (22 by maintainers)
For the love of god, if you are going to remove a feature and then tell people ‘just make a plugin lol’, provide a fucking plugin for feature parity. Nobody else should have to “pitch in” to re-add something that already existed. People using your syntax highlighter are using it because THEY DON’T WANT TO MAKE THEIR OWN. You need a big bold paragraph on your page about yanking this very useful feature and provide a migration path, or tell people to stay on a prior version.
For anyone following this thread: this is still not supported officially (in core) but I did just update the sample plugin source above such that it now works with the latest 11 releases (it now uses the correct callback hooks)…