Sanitizing HTML/Markdown means parsing the content into an HTML tree under‐the‐hood anyway, and it is more accurate to do mention/hashtag replacement on the text nodes in that tree than it is to try to hack it in with regexes et cetera. This undoes the overrides of `#entities` and `#rewrite` on `AdvancedTextFormatter` but also stops using them, instead keeping track of the parsed Nokogiri tree itself and using that in the `#to_s` method. Internally, this tree uses `<mastodon-entity>` nodes to keep track of hashtags, links, and mentions. Sanitization is moved to the beginning, so it should be known that these do not appear in the input.
154 lines
4.4 KiB
Ruby
154 lines
4.4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class AdvancedTextFormatter < TextFormatter
|
|
class HTMLRenderer < Redcarpet::Render::HTML
|
|
def initialize(options, &block)
|
|
super(options)
|
|
@format_link = block
|
|
end
|
|
|
|
def block_code(code, _language)
|
|
<<~HTML
|
|
<pre><code>#{ERB::Util.h(code).gsub("\n", '<br/>')}</code></pre>
|
|
HTML
|
|
end
|
|
|
|
def autolink(link, link_type)
|
|
return link if link_type == :email
|
|
@format_link.call(link)
|
|
end
|
|
end
|
|
|
|
# @param [String] text
|
|
# @param [Hash] options
|
|
# @option options [Boolean] :multiline
|
|
# @option options [Boolean] :with_domains
|
|
# @option options [Boolean] :with_rel_me
|
|
# @option options [Array<Account>] :preloaded_accounts
|
|
# @option options [String] :content_type
|
|
def initialize(text, options = {})
|
|
content_type = options.delete(:content_type)
|
|
super(text, options)
|
|
|
|
@text = format_markdown(text) if content_type == 'text/markdown'
|
|
end
|
|
|
|
# Differs from TextFormatter by operating on the parsed HTML tree ;)
|
|
#
|
|
# See +#tree+
|
|
def to_s
|
|
return ''.html_safe if text.blank?
|
|
|
|
result = tree.dup
|
|
result.css('mastodon-entity').each do |entity|
|
|
case entity['kind']
|
|
when 'hashtag'
|
|
entity.replace(link_to_hashtag({ hashtag: entity['value'] }))
|
|
when 'link'
|
|
entity.replace(link_to_url({ url: entity['value'] }))
|
|
when 'mention'
|
|
entity.replace(link_to_mention({ screen_name: entity['value'] }))
|
|
end
|
|
end
|
|
result.to_html
|
|
end
|
|
|
|
##
|
|
# Process the status into a Nokogiri document fragment, with entities
|
|
# replaced with +<mastodon-entity>+s.
|
|
#
|
|
# Since +<mastodon-entity>+ is not allowed by the sanitizer, any such
|
|
# elements in the output *must* have been produced by this algorithm.
|
|
#
|
|
# These elements will need to be replaced prior to serialization (see
|
|
# +#to_s+).
|
|
def tree
|
|
if @tree.nil?
|
|
src = text.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, '')
|
|
@tree = Nokogiri::HTML5.fragment(src)
|
|
Sanitize.node!(@tree, Sanitize::Config::MASTODON_OUTGOING)
|
|
document = @tree.document
|
|
|
|
@tree.xpath('.//text()[not(ancestor::a)]').each do |text_node|
|
|
# Iterate over text elements and build up their replacements.
|
|
content = text_node.content
|
|
replacement = Nokogiri::XML::NodeSet.new(document)
|
|
processed_index = 0
|
|
Extractor.extract_entities_with_indices(
|
|
content,
|
|
extract_url_without_protocol: false
|
|
) do |entity|
|
|
# Iterate over entities in this text node.
|
|
advance = entity[:indices].first - processed_index
|
|
if advance.positive?
|
|
# Text node for content which precedes entity.
|
|
replacement << Nokogiri::XML::Text.new(
|
|
content[processed_index, advance],
|
|
@tree.document
|
|
)
|
|
end
|
|
elt = Nokogiri::XML::Element.new('mastodon-entity', document)
|
|
if entity[:url]
|
|
elt['kind'] = 'link'
|
|
elt['value'] = entity[:url]
|
|
elsif entity[:hashtag]
|
|
elt['kind'] = 'hashtag'
|
|
elt['value'] = entity[:hashtag]
|
|
elsif entity[:screen_name]
|
|
elt['kind'] = 'mention'
|
|
elt['value'] = entity[:screen_name]
|
|
end
|
|
replacement << elt
|
|
processed_index = entity[:indices].last
|
|
end
|
|
if processed_index < content.size
|
|
# Text node for remaining content.
|
|
replacement << Nokogiri::XML::Text.new(
|
|
content[processed_index, content.size - processed_index],
|
|
document
|
|
)
|
|
end
|
|
text_node.replace(replacement)
|
|
end
|
|
end
|
|
@tree
|
|
end
|
|
|
|
private
|
|
|
|
def format_markdown(html)
|
|
html = markdown_formatter.render(html)
|
|
html.delete("\r").delete("\n")
|
|
end
|
|
|
|
def markdown_formatter
|
|
extensions = {
|
|
autolink: true,
|
|
no_intra_emphasis: true,
|
|
fenced_code_blocks: true,
|
|
disable_indented_code_blocks: true,
|
|
strikethrough: true,
|
|
lax_spacing: true,
|
|
space_after_headers: true,
|
|
superscript: true,
|
|
underline: true,
|
|
highlight: true,
|
|
footnotes: false,
|
|
}
|
|
|
|
renderer = HTMLRenderer.new({
|
|
filter_html: false,
|
|
escape_html: false,
|
|
no_images: true,
|
|
no_styles: true,
|
|
safe_links_only: true,
|
|
hard_wrap: true,
|
|
link_attributes: { target: '_blank', rel: 'nofollow noopener' },
|
|
}) do |url|
|
|
link_to_url({ url: url })
|
|
end
|
|
|
|
Redcarpet::Markdown.new(renderer, extensions)
|
|
end
|
|
end
|