import MarkdownIt from "markdown-it";

import { renderingRule } from "../utils/markdown.plugin.utils";

/**
 * We apply setting 'html: true' in our instances of Markdown-it in order to support legacy Markdown that:
 *
 * - arbitrarily styles text: <span style="color: red">...</span>
 * - centers text and images: <center>...</center>
 * - breaks lines artificially: text<br>text
 * - creates anchor links to refer to in the document: <a name="windows_steps"></a>
 * - inserts Font-awesome icons: <i class="fa fa-circle"></i>
 *
 * Besides these usages, we do not want to generally allow all HTML. For example, <script> tags
 * should certainly not be passed though. Therefore, the plugins below sanitize input by first
 * allowing only a whitelist of element names. Unsupported elements will be rendered as escaped
 * HTML. For supported elements, certain attributes may be further sanitized. For example, the
 * href attribute of anchors is checked with the same function used in Markdown-it for Markdown
 * links, where hrefs beginning with "javascript:" are considered invalid. Invalid attributes are
 * removed in the rendered output.
 */
const ELEMENT_WHITELIST = {
    a: {
        sanitizeAttrs: {
            href: attrVal =>
                getVanillaMarkdownItInstance().validateLink(attrVal),
        },
    },
    br: {},
    center: {
        renderMarkdownContents: true,
    },
    i: {},
    kbd: {},
    span: {},
};

const HTML_BLOCK_MATCHER = {
    regExp: /<(\w+)(\s*[^>]*)>([^]*)(<\s*\/[^]*>)([^]*)/,
    render: ({ tagName, attrStr, contents, tagClose, afterClose }) =>
        attrStr.length
            ? `<${tagName} ${attrStr}>${contents}${tagClose}${afterClose}`
            : `<${tagName}>${contents}${tagClose}${afterClose}`,
};

const HTML_INLINE_OPENING_MATCHER = {
    regExp: /<(\w+)(\s*[^/>]*)\s*>/,
    render: ({ tagName, attrStr }) =>
        attrStr.length ? `<${tagName} ${attrStr}>` : `<${tagName}>`,
};

const HTML_INLINE_CLOSING_MATCHER = {
    regExp: /<\s*\/\s*(\w+)\s*>/,
    render: ({ tagName }) => `</${tagName}>`,
};

const HTML_SELF_CLOSING_MATCHER = {
    regExp: /<(\w+)(\s*[^/>]*)\/?\s*>/,
    render: ({ tagName, attrStr }) =>
        attrStr.length ? `<${tagName} ${attrStr} />` : `<${tagName} />`,
};

const attrValueExpr = attrName =>
    new RegExp(`${attrName}\\s*=\\s*"?([^"]*)"?\\s*`, "g");

let vanillaMarkdownItInstance = null;
const getVanillaMarkdownItInstance = () => {
    if (vanillaMarkdownItInstance === null) {
        vanillaMarkdownItInstance = new MarkdownIt();
    }
    return vanillaMarkdownItInstance;
};

const sanitizeRendered = (...matchers) => ({ origRender, env }) => {
    const origRendered = origRender();
    let groups,
        effectiveMatcher = null;
    matchers.some(matcher => {
        effectiveMatcher = matcher;
        return (groups = matcher.regExp.exec(origRendered));
    });

    if (groups) {
        // We have split the HTML string into parts tagName, attrStr, contents, and tagClose
        const tagName = groups[1].toLowerCase();
        let attrStr = groups[2] || "";
        let contents = groups[3];
        const tagClose = groups[4];
        const afterClose = groups[5];

        const elementProps = ELEMENT_WHITELIST[tagName];
        if (elementProps) {
            // The tagName is in the whitelist, so we can render it as HTML.

            // Some legacy content has Markdown inside of HTML blocks, eg:
            //
            //   <center>![](http://example.com/image.png)</center>
            //
            // Recursively render the contents of the element for the tags we
            // choose to support this with.
            if (elementProps.renderMarkdownContents && contents) {
                contents = getVanillaMarkdownItInstance().renderInline(
                    contents,
                    env
                );
            }

            // Scrub attribute values based on the functions in sanitizeAttrs.
            // For example, we will remove href attrs on <a> elements whose
            // value starts with javascript:, etc...
            Object.keys(elementProps.sanitizeAttrs || {}).forEach(attrName => {
                const sanitizeFn = elementProps.sanitizeAttrs[attrName];
                attrStr = attrStr.replace(
                    attrValueExpr(attrName),
                    (match, attrValue) => (sanitizeFn(attrValue) ? match : "")
                );
            });

            // The *MATCHER objects know how to re-compose the HTML parts
            return effectiveMatcher.render({
                tagName,
                attrStr: attrStr.trim(),
                contents,
                tagClose,
                afterClose,
            });
        }
    }
    // Either the tag name is not in our whitelist or we were not able to decompose it into
    // parts. Render the string as escaped HTML.
    return getVanillaMarkdownItInstance().utils.escapeHtml(origRendered);
};

export default [
    renderingRule("html_block", sanitizeRendered(HTML_BLOCK_MATCHER)),
    renderingRule(
        "html_inline",
        sanitizeRendered(
            HTML_INLINE_OPENING_MATCHER,
            HTML_INLINE_CLOSING_MATCHER,
            HTML_SELF_CLOSING_MATCHER
        )
    ),
];
