Skip to main content
  1. Open Source/

Snippet: A Turndown Plugin parsing Stack Overflow HTML answers

A Turndown Plugin parsing Stack Overflow HTML answers; converts it to GitHub-Flavored-Markdown (GFM) with correct language. Turndown is a JS Markdown Parser
TypeScript
2 files

plugin-so-turndown.ts #

import TurndownService from "turndown";

export const stackOverflowHighlightedCodeBlock = function (service: TurndownService): void {
    const highlightRegExp = /lang-([a-z0-9]+)/

    service.addRule('stackOverflowHighlightedCodeBlock', {
        filter: function (node: HTMLElement, options: Options): boolean | null {
            const firstChild = node.firstChild
            return (
                node.nodeName === 'PRE' &&
                firstChild &&
                firstChild.nodeName === 'CODE'
            )
        } as Filter,
        replacement: function (content: string, node: HTMLElement, options: Options): string {
            const className = node.className || ''
            const matches = (className.match(highlightRegExp) || [null, ''])

            return (
                `\n\n${options.fence}${matches.length > 0 ? matches[1] : ''}\n` +
                `${deEscape(node.firstChild!.textContent!)}` +
                `\n${options.fence}\n\n`
            )
        } as ReplacementFunction
    })
} as TurndownService.Plugin

test.ts #

import TurndownService from "turndown";

describe('stackOverflowHighlightedCodeBlock', () => {
    it('should correctly format a code block with a language', () => {
        const turndownService = new TurndownService();
        stackOverflowHighlightedCodeBlock(turndownService);

        const html = `
            <pre class="lang-javascript"><code>const a = 10;
console.log(a);</code></pre>
        `;

        const expectedResult = `
\`\`\`javascript
const a = 10;
console.log(a);
\`\`\`
        `;

        const markdown = turndownService.turndown(html).trim();
        expect(markdown).toBe(expectedResult.trim());
    });

    it('should correctly format a code block without a language', () => {
        const turndownService = new TurndownService();
        stackOverflowHighlightedCodeBlock(turndownService);

        const html = `
            <pre><code>const a = 10;
console.log(a);</code></pre>
        `;

        const expectedResult = `
\`\`\`
const a = 10;
console.log(a);
\`\`\`
        `;

        const markdown = turndownService.turndown(html).trim();
        expect(markdown).toBe(expectedResult.trim());
    });
});