This repository has been archived on 2023-07-01. You can view files and clone it, but cannot push or open issues or pull requests.
mastodon/app/models/glitch/keyword_mute.rb
David Yip 8fc54890e5 Prefix cache keys with the matcher type. #208.
We already know about one regex limitation, which is that they cannot
segment words in e.g. Japanese, Chinese, or Thai.  It may also end up
that regex matching is too slow compared to other methods.

However, the regex is an implementation detail.  We still want the
ability to switch between "occurs anywhere" and "match whole word", and
caching the matcher result is likely to still be important (since the
matcher itself won't change nearly as often as status ingress rate).
Therefore, we ought to be able to change the cache keys to reflect a
change of data structure.

(Old cache keys expire within minutes, so they shouldn't be too big of
an issue.  Old cache keys could also be explicitly removed by an
instance administrator.)
2017-11-15 18:12:31 -06:00

101 lines
2.3 KiB
Ruby

# frozen_string_literal: true
# == Schema Information
#
# Table name: glitch_keyword_mutes
#
# id :integer not null, primary key
# account_id :integer not null
# keyword :string not null
# whole_word :boolean default(TRUE), not null
# created_at :datetime not null
# updated_at :datetime not null
#
class Glitch::KeywordMute < ApplicationRecord
belongs_to :account, required: true
validates_presence_of :keyword
after_commit :invalidate_cached_matchers
def self.text_matcher_for(account_id)
TextMatcher.new(account_id)
end
def self.tag_matcher_for(account_id)
TagMatcher.new(account_id)
end
private
def invalidate_cached_matchers
Rails.cache.delete(TextMatcher.cache_key(account_id))
Rails.cache.delete(TagMatcher.cache_key(account_id))
end
class RegexpMatcher
attr_reader :account_id
attr_reader :regex
def initialize(account_id)
@account_id = account_id
regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text }
@regex = /#{regex_text}/
end
protected
def keywords
Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword)
end
def boundary_regex_for_keyword(keyword)
sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
/(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/
end
end
class TextMatcher < RegexpMatcher
def self.cache_key(account_id)
format('keyword_mutes:regex:text:%s', account_id)
end
def =~(str)
regex =~ str
end
private
def make_regex_text
kws = keywords.map! do |whole_word, keyword|
whole_word ? boundary_regex_for_keyword(keyword) : keyword
end
Regexp.union(kws).source
end
end
class TagMatcher < RegexpMatcher
def self.cache_key(account_id)
format('keyword_mutes:regex:tag:%s', account_id)
end
def =~(tags)
tags.pluck(:name).detect { |n| regex =~ n }
end
private
def make_regex_text
kws = keywords.map! do |whole_word, keyword|
term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword
whole_word ? boundary_regex_for_keyword(term) : term
end
Regexp.union(kws).source
end
end
end