import { noopTest, edit, merge } from './helpers.js'; /** * Block-Level Grammar */ export const block = { newline: /^(?: *(?:\n|$))+/, code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/, hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/, heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/, html: '^ {0,3}(?:' // optional indentation + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) + '|comment[^\\n]*(\\n+|$)' // (2) + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) + '|\\n*|$)' // (4) + '|\\n*|$)' // (5) + '|)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6) + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag + '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag + ')', def: /^ {0,3}\[(label)\]: *(?:\n *)?]+)>?(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/, table: noopTest, lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/, // regex template, placeholders will be replaced according to different paragraph // interruption rules of commonmark and the original markdown spec: _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/, text: /^[^\n]+/ }; block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/; block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; block.def = edit(block.def) .replace('label', block._label) .replace('title', block._title) .getRegex(); block.bullet = /(?:[*+-]|\d{1,9}[.)])/; block.listItemStart = edit(/^( *)(bull) */) .replace('bull', block.bullet) .getRegex(); block.list = edit(block.list) .replace(/bull/g, block.bullet) .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))') .replace('def', '\\n+(?=' + block.def.source + ')') .getRegex(); block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' + '|track|ul'; block._comment = /|$)/; block.html = edit(block.html, 'i') .replace('comment', block._comment) .replace('tag', block._tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) .getRegex(); block.paragraph = edit(block._paragraph) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('|table', '') .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); block.blockquote = edit(block.blockquote) .replace('paragraph', block.paragraph) .getRegex(); /** * Normal Block Grammar */ block.normal = merge({}, block); /** * GFM Block Grammar */ block.gfm = merge({}, block.normal, { table: '^ *([^\\n ].*\\|.*)\\n' // Header + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells }); block.gfm.table = edit(block.gfm.table) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks .getRegex(); block.gfm.paragraph = edit(block._paragraph) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('table', block.gfm.table) // interrupt paragraphs with table .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|textarea|!--)') .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); /** * Pedantic grammar (original John Gruber's loose markdown specification) */ block.pedantic = merge({}, block.normal, { html: edit( '^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)' // closed tag + '|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') .replace('comment', block._comment) .replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') .getRegex(), def: /^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest, // fences not supported paragraph: edit(block.normal._paragraph) .replace('hr', block.hr) .replace('heading', ' *#{1,6} *[^\n]') .replace('lheading', block.lheading) .replace('blockquote', ' {0,3}>') .replace('|fences', '') .replace('|list', '') .replace('|html', '') .getRegex() }); /** * Inline-Level Grammar */ export const inline = { escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noopTest, tag: '^comment' + '|^' // self-closing tag + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. + '|^' // declaration, e.g. + '|^', // CDATA section link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/, reflink: /^!?\[(label)\]\[(ref)\]/, nolink: /^!?\[(ref)\](?:\[\])?/, reflinkSearch: 'reflink|nolink(?!\\()', emStrong: { lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/, rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, del: noopTest, text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\?@\\[\\]`^{|}~'; inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex(); // sequences em should skip over [title](link), `code`, inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g; inline.escapedEmSt = /\\\*|\\_/g; inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); inline.emStrong.lDelim = edit(inline.emStrong.lDelim) .replace(/punct/g, inline._punctuation) .getRegex(); inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g') .replace(/punct/g, inline._punctuation) .getRegex(); inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g') .replace(/punct/g, inline._punctuation) .getRegex(); inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; inline.autolink = edit(inline.autolink) .replace('scheme', inline._scheme) .replace('email', inline._email) .getRegex(); inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; inline.tag = edit(inline.tag) .replace('comment', inline._comment) .replace('attribute', inline._attribute) .getRegex(); inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/; inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; inline.link = edit(inline.link) .replace('label', inline._label) .replace('href', inline._href) .replace('title', inline._title) .getRegex(); inline.reflink = edit(inline.reflink) .replace('label', inline._label) .replace('ref', block._label) .getRegex(); inline.nolink = edit(inline.nolink) .replace('ref', block._label) .getRegex(); inline.reflinkSearch = edit(inline.reflinkSearch, 'g') .replace('reflink', inline.reflink) .replace('nolink', inline.nolink) .getRegex(); /** * Normal Inline Grammar */ inline.normal = merge({}, inline); /** * Pedantic Inline Grammar */ inline.pedantic = merge({}, inline.normal, { strong: { start: /^__|\*\*/, middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, endAst: /\*\*(?!\*)/g, endUnd: /__(?!_)/g }, em: { start: /^_|\*/, middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/, endAst: /\*(?!\*)/g, endUnd: /_(?!_)/g }, link: edit(/^!?\[(label)\]\((.*?)\)/) .replace('label', inline._label) .getRegex(), reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) .replace('label', inline._label) .getRegex() }); /** * GFM Inline Grammar */ inline.gfm = merge({}, inline.normal, { escape: edit(inline.escape).replace('])', '~|])').getRegex(), _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/, del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/, text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\