From 47f0d7021e76c60f5336b0c005c6336cd50c8009 Mon Sep 17 00:00:00 2001 From: Kaspar V Date: Sun, 27 Nov 2022 20:41:18 +0100 Subject: [PATCH] refactor(vacuum statuses): reduce amount of db queries and load for each query - improve performance (#21487) * refactor(statuses_vacuum): remove dead code - unused Method is not called inside class and private. Clean up dead code. * refactor(statuses_vacuum): make retention_period present test explicit This private method only hides functionality. It is best practice to be as explicit as possible. * refactor(statuses_vacuum): improve query performance - fix statuses_scope having sub-select for Account.remote scope by `joins(:account).merge(Account.remote)` - fix statuses_scope unnecessary use of `Status.arel_table[:id].lt` because it is inexplicit, bad practice and even slower than normal `.where('statuses.id < ?'` - fix statuses_scope remove select(:id, :visibility) for having reusable active record query batches (no re queries) - fix vacuum_statuses! to use in_batches instead of find_in_batches, because in_batches delivers a full blown active record query result, in stead of an array - no requeries necessary - send(:unlink_from_conversations) not to perform another db query, but reuse the in_batches result instead. - remove now obsolete remove_from_account_conversations method - remove_from_search_index uses array of ids, instead of mapping the ids from an array - this should be more efficient - use the in_batches scope to call delete_all, instead of running another db query for this - because it is again more efficient - add TODO comment for calling models private method with send * refactor(status): simplify unlink_from_conversations - add `has_many through:` relation mentioned_accounts - use model scope local instead of method call `Status#local?` - more readable add account to inbox_owners when account.local? * refactor(status): searchable_by way less sub selects These queries all included a sub-select. Doing the same with a joins should be more efficient. Since this method does 5 such queries, this should be significant, since it technically halves the query count. This is how it was: ```ruby [3] pry(main)> Status.first.mentions.where(account: Account.local, silent: false).explain Status Load (1.6ms) SELECT "statuses".* FROM "statuses" WHERE "statuses"."deleted_at" IS NULL ORDER BY "statuses"."id" DESC LIMIT $1 [["LIMIT", 1]] Mention Load (1.5ms) SELECT "mentions".* FROM "mentions" WHERE "mentions"."status_id" = $1 AND "mentions"."account_id" IN (SELECT "accounts"."id" FROM "accounts" WHERE "accounts"."domain" IS NULL) AND "mentions"."silent" = $2 [["status_id", 109382923142288414], ["silent", false]] => EXPLAIN for: SELECT "mentions".* FROM "mentions" WHERE "mentions"."status_id" = $1 AND "mentions"."account_id" IN (SELECT "accounts"."id" FROM "accounts" WHERE "accounts"."domain" IS NULL) AND "mentions"."silent" = $2 [["status_id", 109382923142288414], ["silent", false]] QUERY PLAN ------------------------------------------------------------------------------------------------------------------ Nested Loop (cost=0.15..23.08 rows=1 width=41) -> Seq Scan on accounts (cost=0.00..10.90 rows=1 width=8) Filter: (domain IS NULL) -> Index Scan using index_mentions_on_account_id_and_status_id on mentions (cost=0.15..8.17 rows=1 width=41) Index Cond: ((account_id = accounts.id) AND (status_id = '109382923142288414'::bigint)) Filter: (NOT silent) (6 rows) ``` This is how it is with this change: ```ruby [4] pry(main)> Status.first.mentions.joins(:account).merge(Account.local).active.explain Status Load (1.7ms) SELECT "statuses".* FROM "statuses" WHERE "statuses"."deleted_at" IS NULL ORDER BY "statuses"."id" DESC LIMIT $1 [["LIMIT", 1]] Mention Load (0.7ms) SELECT "mentions".* FROM "mentions" INNER JOIN "accounts" ON "accounts"."id" = "mentions"."account_id" WHERE "mentions"."status_id" = $1 AND "accounts"."domain" IS NULL AND "mentions"."silent" = $2 [["status_id", 109382923142288414], ["silent", false]] => EXPLAIN for: SELECT "mentions".* FROM "mentions" INNER JOIN "accounts" ON "accounts"."id" = "mentions"."account_id" WHERE "mentions"."status_id" = $1 AND "accounts"."domain" IS NULL AND "mentions"."silent" = $2 [["status_id", 109382923142288414], ["silent", false]] QUERY PLAN ------------------------------------------------------------------------------------------------------------------ Nested Loop (cost=0.15..23.08 rows=1 width=41) -> Seq Scan on accounts (cost=0.00..10.90 rows=1 width=8) Filter: (domain IS NULL) -> Index Scan using index_mentions_on_account_id_and_status_id on mentions (cost=0.15..8.17 rows=1 width=41) Index Cond: ((account_id = accounts.id) AND (status_id = '109382923142288414'::bigint)) Filter: (NOT silent) (6 rows) ``` --- app/lib/vacuum/statuses_vacuum.rb | 41 +++++++++++++------------------ app/models/status.rb | 15 +++++------ 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/app/lib/vacuum/statuses_vacuum.rb b/app/lib/vacuum/statuses_vacuum.rb index 41d6ba270..d1c4e7197 100644 --- a/app/lib/vacuum/statuses_vacuum.rb +++ b/app/lib/vacuum/statuses_vacuum.rb @@ -8,47 +8,40 @@ class Vacuum::StatusesVacuum end def perform - vacuum_statuses! if retention_period? + vacuum_statuses! if @retention_period.present? end private def vacuum_statuses! - statuses_scope.find_in_batches do |statuses| + statuses_scope.in_batches do |statuses| # Side-effects not covered by foreign keys, such # as the search index, must be handled first. + statuses.direct_visibility + .includes(mentions: :account) + .find_each do |status| + # TODO: replace temporary solution - call of private model method + status.send(:unlink_from_conversations) + end + remove_from_search_index(statuses.ids) if Chewy.enabled? - remove_from_account_conversations(statuses) - remove_from_search_index(statuses) - - # Foreign keys take care of most associated records - # for us. Media attachments will be orphaned. - - Status.where(id: statuses.map(&:id)).delete_all + # Foreign keys take care of most associated records for us. + # Media attachments will be orphaned. + statuses.delete_all end end def statuses_scope - Status.unscoped.kept.where(account: Account.remote).where(Status.arel_table[:id].lt(retention_period_as_id)).select(:id, :visibility) + Status.unscoped.kept + .joins(:account).merge(Account.remote) + .where('statuses.id < ?', retention_period_as_id) end def retention_period_as_id Mastodon::Snowflake.id_at(@retention_period.ago, with_random: false) end - def analyze_statuses! - ActiveRecord::Base.connection.execute('ANALYZE statuses') - end - - def remove_from_account_conversations(statuses) - Status.where(id: statuses.select(&:direct_visibility?).map(&:id)).includes(:account, mentions: :account).each(&:unlink_from_conversations) - end - - def remove_from_search_index(statuses) - with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', statuses.map(&:id)) } if Chewy.enabled? - end - - def retention_period? - @retention_period.present? + def remove_from_search_index(status_ids) + with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', status_ids) } end end diff --git a/app/models/status.rb b/app/models/status.rb index 8bdb5e8db..2fe9f2de0 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -66,6 +66,7 @@ class Status < ApplicationRecord has_many :reblogged_by_accounts, through: :reblogs, class_name: 'Account', source: :account has_many :replies, foreign_key: 'in_reply_to_id', class_name: 'Status', inverse_of: :thread has_many :mentions, dependent: :destroy, inverse_of: :status + has_many :mentioned_accounts, through: :mentions, source: :account, class_name: 'Account' has_many :active_mentions, -> { active }, class_name: 'Mention', inverse_of: :status has_many :media_attachments, dependent: :nullify @@ -141,11 +142,11 @@ class Status < ApplicationRecord ids << account_id if local? if preloaded.nil? - ids += mentions.where(account: Account.local, silent: false).pluck(:account_id) - ids += favourites.where(account: Account.local).pluck(:account_id) - ids += reblogs.where(account: Account.local).pluck(:account_id) - ids += bookmarks.where(account: Account.local).pluck(:account_id) - ids += poll.votes.where(account: Account.local).pluck(:account_id) if poll.present? + ids += mentions.joins(:account).merge(Account.local).active.pluck(:account_id) + ids += favourites.joins(:account).merge(Account.local).pluck(:account_id) + ids += reblogs.joins(:account).merge(Account.local).pluck(:account_id) + ids += bookmarks.joins(:account).merge(Account.local).pluck(:account_id) + ids += poll.votes.joins(:account).merge(Account.local).pluck(:account_id) if poll.present? else ids += preloaded.mentions[id] || [] ids += preloaded.favourites[id] || [] @@ -527,8 +528,8 @@ class Status < ApplicationRecord def unlink_from_conversations return unless direct_visibility? - mentioned_accounts = (association(:mentions).loaded? ? mentions : mentions.includes(:account)).map(&:account) - inbox_owners = mentioned_accounts.select(&:local?) + (account.local? ? [account] : []) + inbox_owners = mentioned_accounts.local + inbox_owners += [account] if account.local? inbox_owners.each do |inbox_owner| AccountConversation.remove_status(inbox_owner, self)