import { convertMsLists } from './listConvert';
import { fragmentToHtml, htmlToFragment } from './source';
/**
 * Removes the invalid HTML. Use it as a first step for cleaning the HTML.
 */
export const sanitize = (html) => {
    html = html.replace(/^[\s\S]+?<!--StartFragment-->\s*([\s\S]*?)\s*<!--EndFragment-->[\s\S]+$/, '$1');
    html = html.replace(/<\/?[ovw]:[^>]*?>/gi, ''); // MS elements, e.g. <o:p>, <w:sdtPr>, <v:
    html = html.replace(/<\\?\??xml[^>]*>/gi, ''); // XML namespaces
    html = html.replace(/<(?:link|meta) [^>]+?>/ig, '');
    html = html.replace(/<style[^>]*?>\s*<\/style>/ig, '');
    html = html.replace(/<\/?st1:.*?>/gi, '');
    html = html.replace(/<a name="[a-zA-Z0-9_]+">/gmi, '');
    html = html.replace(/v:shapes?="[^"]+"/ig, '');
    html = html.replace(/<!\[if !supportLists\]>/ig, '');
    html = html.replace(/<!\[endif\]>/ig, '');
    return html;
};
/**
 * Removes comments in HTML.
 */
export const removeComments = (html) => {
    return html.replace(/<!--[\s\S]+?-->/g, '');
};
/**
 * Removes the specified tag(s).
 */
export const removeTag = (html, tagPattern) => {
    return html.replace(new RegExp('<\\/?(' + tagPattern + ')(?:\\s[^>]*?)?>', 'gi'), '');
};
/**
 * Removes the passed attribute.
 */
export const removeAttribute = (attr) => {
    if (attr.ownerElement) {
        attr.ownerElement.removeAttribute(attr.name);
    }
};
/**
 * Remove the attribute if its value matches /^Mso/ regex.
 */
export const sanitizeClassAttr = (attr) => {
    if (/^Mso/.test(attr.value)) {
        removeAttribute(attr);
    }
};
const stylesSplit = /\s*;\s*/;
const styleValueSplit = /\s*:\s*/;
/**
 * Removes invalid HTML styles.
 */
export const sanitizeStyleAttr = (attr) => {
    const styles = attr.value.split(stylesSplit).filter(st => Boolean(st));
    const element = attr.ownerElement;
    const supportedStyles = element.style;
    let result = '', name, value;
    styles.forEach(style => {
        [name, value] = style.split(styleValueSplit);
        if (supportedStyles[name] !== undefined) {
            result += `${name}: ${value}; `;
        }
    });
    result = result.trim();
    if (result) {
        attr.value = result;
    }
    else {
        removeAttribute(attr);
    }
};
const removeNode = (node) => {
    const parentNode = node.parentNode;
    if (parentNode) {
        while (node.firstChild) {
            parentNode.insertBefore(node.firstChild, node);
        }
        parentNode.removeChild(node);
    }
};
const sanitizeNode = (node, attributes) => {
    if (node.nodeType === Node.ELEMENT_NODE) {
        for (let i = node.attributes.length - 1; i >= 0; i--) {
            const attr = node.attributes[i];
            if (attributes[attr.name]) {
                attributes[attr.name](attr);
            }
            else if (attributes['*']) {
                attributes['*'](attr);
            }
        }
        if (node.nodeName === 'SPAN' && node.attributes.length === 0) {
            removeNode(node);
        }
    }
};
/**
 * Cleans the HTML based on passed settings.
 * Before using it, clean the HTML with the `sanitize` function.
 */
export const pasteCleanup = (html, settings) => {
    let result = html;
    if (settings.convertMsLists) {
        result = convertMsLists(result);
    }
    if (settings.stripTags) {
        result = removeTag(result, settings.stripTags);
    }
    if (settings.attributes) {
        const fragment = htmlToFragment(result);
        Array.from(fragment.querySelectorAll('*')).forEach(node => sanitizeNode(node, settings.attributes));
        result = fragmentToHtml(fragment);
    }
    return result;
};
function convertHexToBase64(hex) {
    const length = hex.length;
    const data = new Array(length / 2);
    for (let i = 0; i < length; i += 2) {
        data[i] = String.fromCharCode(parseInt(hex.substring(i, i + 2), 16));
    }
    return btoa(data.join(''));
}
const reHtmlImg = /<img\s[^>]*?src=(?:'|")file:\/[^'"]+(?:'|")[^>]*>/gi;
const reRtfImgHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
const reRtfImg = new RegExp('(?:(' + reRtfImgHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
const reNonHex = /[^\da-fA-F]/g;
const reLocalFile = /file:\/[^'"]+\.(jpg|png|gif)/i;
const reExtension = /\\(png|jpeg)blip\\/;
const textRtfType = 'text/rtf';
/**
 * If the input HTML contains images with 'src' pointing to local file system (it happens when pasting images and text from MS Word),
 * the function will extract the image sources form the RTF and replace the image 'src' with extracted base64 format data in `html` string.
 *
 * @param html - The input HTML (pasted HTML).
 * @param clipboardData - The paste event clipboardData object (event.clipboardData).
 * @returns - The html with the replaced images sources.
 */
export const replaceImageSourcesFromRtf = (html, clipboardData) => {
    const htmlImages = html.match(reHtmlImg);
    if (!htmlImages || clipboardData.types.indexOf(textRtfType) === -1) {
        return html;
    }
    const rtf = clipboardData.getData(textRtfType);
    const base64Sources = [];
    const rtfImages = rtf.match(reRtfImg);
    if (!rtf || !rtfImages) {
        return html;
    }
    for (const image of rtfImages) {
        const extension = reExtension.exec(image);
        if (extension) {
            const hex = image.replace(reRtfImgHeader, '').replace(reNonHex, '');
            base64Sources.push(`data:image/${extension[1]};base64,${convertHexToBase64(hex)}`);
        }
    }
    if (htmlImages.length !== base64Sources.length) {
        return html;
    }
    return html.replace(reHtmlImg, img => {
        const src = base64Sources.shift() || '';
        return img.replace(reLocalFile, src);
    });
};
