Add batch_size option to bin/tootctl search deploy (#17049)

This commit is contained in:
OSAMU SATO 2021-11-26 16:29:53 +09:00 committed by GitHub
parent 0909874c08
commit 53aca8aecf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -17,6 +17,7 @@ module Mastodon
].freeze ].freeze
option :concurrency, type: :numeric, default: 2, aliases: [:c], desc: 'Workload will be split between this number of threads' option :concurrency, type: :numeric, default: 2, aliases: [:c], desc: 'Workload will be split between this number of threads'
option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
option :only, type: :array, enum: %w(accounts tags statuses), desc: 'Only process these indices' option :only, type: :array, enum: %w(accounts tags statuses), desc: 'Only process these indices'
desc 'deploy', 'Create or upgrade ElasticSearch indices and populate them' desc 'deploy', 'Create or upgrade ElasticSearch indices and populate them'
long_desc <<~LONG_DESC long_desc <<~LONG_DESC
@ -35,6 +36,11 @@ module Mastodon
exit(1) exit(1)
end end
if options[:batch_size] < 1
say('Cannot run with this batch_size setting, must be at least 1', :red)
exit(1)
end
indices = begin indices = begin
if options[:only] if options[:only]
options[:only].map { |str| "#{str.camelize}Index".constantize } options[:only].map { |str| "#{str.camelize}Index".constantize }
@ -73,7 +79,7 @@ module Mastodon
# is uneconomical. So we only ever add. # is uneconomical. So we only ever add.
indices.each do |index| indices.each do |index|
progress.title = "Importing #{index} " progress.title = "Importing #{index} "
batch_size = 1_000 batch_size = options[:batch_size]
slice_size = (batch_size / options[:concurrency]).ceil slice_size = (batch_size / options[:concurrency]).ceil
index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch| index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|