|
|
|
@ -19,6 +19,8 @@ class AdvancedTextFormatter < TextFormatter
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
attr_reader :content_type
|
|
|
|
|
|
|
|
|
|
# @param [String] text
|
|
|
|
|
# @param [Hash] options
|
|
|
|
|
# @option options [Boolean] :multiline
|
|
|
|
@ -27,73 +29,95 @@ class AdvancedTextFormatter < TextFormatter
|
|
|
|
|
# @option options [Array<Account>] :preloaded_accounts
|
|
|
|
|
# @option options [String] :content_type
|
|
|
|
|
def initialize(text, options = {})
|
|
|
|
|
content_type = options.delete(:content_type)
|
|
|
|
|
@content_type = options.delete(:content_type)
|
|
|
|
|
super(text, options)
|
|
|
|
|
|
|
|
|
|
@text = format_markdown(text) if content_type == 'text/markdown'
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# Differs from TextFormatter by not messing with newline after parsing
|
|
|
|
|
# Differs from TextFormatter by operating on the parsed HTML tree ;)
|
|
|
|
|
#
|
|
|
|
|
# See +#tree+
|
|
|
|
|
def to_s
|
|
|
|
|
return ''.html_safe if text.blank?
|
|
|
|
|
|
|
|
|
|
html = rewrite do |entity|
|
|
|
|
|
if entity[:url]
|
|
|
|
|
link_to_url(entity)
|
|
|
|
|
elsif entity[:hashtag]
|
|
|
|
|
link_to_hashtag(entity)
|
|
|
|
|
elsif entity[:screen_name]
|
|
|
|
|
link_to_mention(entity)
|
|
|
|
|
result = tree.dup
|
|
|
|
|
result.css('mastodon-entity').each do |entity|
|
|
|
|
|
case entity['kind']
|
|
|
|
|
when 'hashtag'
|
|
|
|
|
entity.replace(link_to_hashtag({ hashtag: entity['value'] }))
|
|
|
|
|
when 'link'
|
|
|
|
|
entity.replace(link_to_url({ url: entity['value'] }))
|
|
|
|
|
when 'mention'
|
|
|
|
|
entity.replace(link_to_mention({ screen_name: entity['value'] }))
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
html.html_safe # rubocop:disable Rails/OutputSafety
|
|
|
|
|
result.to_html.html_safe # rubocop:disable Rails/OutputSafety
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# Differs from `TextFormatter` by skipping HTML tags and entities
|
|
|
|
|
def entities
|
|
|
|
|
@entities ||= begin
|
|
|
|
|
gaps = []
|
|
|
|
|
total_offset = 0
|
|
|
|
|
##
|
|
|
|
|
# Process the status into a Nokogiri document fragment, with entities
|
|
|
|
|
# replaced with +<mastodon-entity>+s.
|
|
|
|
|
#
|
|
|
|
|
# Since +<mastodon-entity>+ is not allowed by the sanitizer, any such
|
|
|
|
|
# elements in the output *must* have been produced by this algorithm.
|
|
|
|
|
#
|
|
|
|
|
# These elements will need to be replaced prior to serialization (see
|
|
|
|
|
# +#to_s+).
|
|
|
|
|
def tree
|
|
|
|
|
if @tree.nil?
|
|
|
|
|
src = text.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, '')
|
|
|
|
|
@tree = Nokogiri::HTML5.fragment(src)
|
|
|
|
|
Sanitize.node!(@tree, Sanitize::Config::MASTODON_OUTGOING)
|
|
|
|
|
document = @tree.document
|
|
|
|
|
|
|
|
|
|
escaped = text.gsub(/<[^>]*>|&#[0-9]+;/) do |match|
|
|
|
|
|
total_offset += match.length - 1
|
|
|
|
|
end_offset = Regexp.last_match.end(0)
|
|
|
|
|
gaps << [end_offset - total_offset, total_offset]
|
|
|
|
|
' '
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
Extractor.extract_entities_with_indices(escaped, extract_url_without_protocol: false).map do |entity|
|
|
|
|
|
start_pos, end_pos = entity[:indices]
|
|
|
|
|
offset_idx = gaps.rindex { |gap| gap.first <= start_pos }
|
|
|
|
|
offset = offset_idx.nil? ? 0 : gaps[offset_idx].last
|
|
|
|
|
entity.merge(indices: [start_pos + offset, end_pos + offset])
|
|
|
|
|
@tree.xpath('.//text()[not(ancestor::a | ancestor::code)]').each do |text_node|
|
|
|
|
|
# Iterate over text elements and build up their replacements.
|
|
|
|
|
content = text_node.content
|
|
|
|
|
replacement = Nokogiri::XML::NodeSet.new(document)
|
|
|
|
|
processed_index = 0
|
|
|
|
|
Extractor.extract_entities_with_indices(
|
|
|
|
|
content,
|
|
|
|
|
extract_url_without_protocol: false
|
|
|
|
|
) do |entity|
|
|
|
|
|
# Iterate over entities in this text node.
|
|
|
|
|
advance = entity[:indices].first - processed_index
|
|
|
|
|
if advance.positive?
|
|
|
|
|
# Text node for content which precedes entity.
|
|
|
|
|
replacement << Nokogiri::XML::Text.new(
|
|
|
|
|
content[processed_index, advance],
|
|
|
|
|
document
|
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
elt = Nokogiri::XML::Element.new('mastodon-entity', document)
|
|
|
|
|
if entity[:url]
|
|
|
|
|
elt['kind'] = 'link'
|
|
|
|
|
elt['value'] = entity[:url]
|
|
|
|
|
elsif entity[:hashtag]
|
|
|
|
|
elt['kind'] = 'hashtag'
|
|
|
|
|
elt['value'] = entity[:hashtag]
|
|
|
|
|
elsif entity[:screen_name]
|
|
|
|
|
elt['kind'] = 'mention'
|
|
|
|
|
elt['value'] = entity[:screen_name]
|
|
|
|
|
end
|
|
|
|
|
replacement << elt
|
|
|
|
|
processed_index = entity[:indices].last
|
|
|
|
|
end
|
|
|
|
|
if processed_index < content.size
|
|
|
|
|
# Text node for remaining content.
|
|
|
|
|
replacement << Nokogiri::XML::Text.new(
|
|
|
|
|
content[processed_index, content.size - processed_index],
|
|
|
|
|
document
|
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
text_node.replace(replacement)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
@tree
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
|
|
# Differs from `TextFormatter` in that it keeps HTML; but it sanitizes at the end to remain safe
|
|
|
|
|
def rewrite
|
|
|
|
|
entities.sort_by! do |entity|
|
|
|
|
|
entity[:indices].first
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
result = ''.dup
|
|
|
|
|
|
|
|
|
|
last_index = entities.reduce(0) do |index, entity|
|
|
|
|
|
indices = entity[:indices]
|
|
|
|
|
result << text[index...indices.first]
|
|
|
|
|
result << yield(entity)
|
|
|
|
|
indices.last
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
result << text[last_index..-1]
|
|
|
|
|
|
|
|
|
|
Sanitize.fragment(result, Sanitize::Config::MASTODON_OUTGOING)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def format_markdown(html)
|
|
|
|
|
html = markdown_formatter.render(html)
|
|
|
|
|
html.delete("\r").delete("\n")
|
|
|
|
|