summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2020-12-22 17:13:55 +0100
committerGitHub <noreply@github.com>2020-12-22 17:13:55 +0100
commit9915d11c0d7a15b6775af8e78fcc4d836368f88d (patch)
treeb6b5efe86ab2686eda673e81b10e05d17af30cf9
parent67ebd61f1180e63fcc671c583e7251e1e09755d9 (diff)
Fix unnecessary queries when batch-removing statuses, 100x faster (#15387)
-rw-r--r--app/models/favourite.rb2
-rw-r--r--app/models/status.rb12
-rw-r--r--app/services/batched_remove_status_service.rb91
-rw-r--r--app/services/delete_account_service.rb126
-rw-r--r--config/initializers/chewy.rb5
-rw-r--r--lib/chewy/strategy/custom_sidekiq.rb25
-rw-r--r--spec/services/batched_remove_status_service_spec.rb5
7 files changed, 167 insertions, 99 deletions
diff --git a/app/models/favourite.rb b/app/models/favourite.rb
index bf0ec4449c4..35028b7dd33 100644
--- a/app/models/favourite.rb
+++ b/app/models/favourite.rb
@@ -36,7 +36,7 @@ class Favourite < ApplicationRecord
end
def decrement_cache_counters
- return if association(:status).loaded? && (status.marked_for_destruction? || status.marked_for_mass_destruction?)
+ return if association(:status).loaded? && status.marked_for_destruction?
status&.decrement_count!(:favourites_count)
end
end
diff --git a/app/models/status.rb b/app/models/status.rb
index 96d90e1c27e..b426f9d5bcc 100644
--- a/app/models/status.rb
+++ b/app/models/status.rb
@@ -228,14 +228,6 @@ class Status < ApplicationRecord
@emojis = CustomEmoji.from_text(fields.join(' '), account.domain)
end
- def mark_for_mass_destruction!
- @marked_for_mass_destruction = true
- end
-
- def marked_for_mass_destruction?
- @marked_for_mass_destruction
- end
-
def replies_count
status_stat&.replies_count || 0
end
@@ -430,7 +422,7 @@ class Status < ApplicationRecord
end
def decrement_counter_caches
- return if direct_visibility? || marked_for_mass_destruction?
+ return if direct_visibility?
account&.decrement_count!(:statuses_count)
reblog&.decrement_count!(:reblogs_count) if reblog?
@@ -440,7 +432,7 @@ class Status < ApplicationRecord
def unlink_from_conversations
return unless direct_visibility?
- mentioned_accounts = mentions.includes(:account).map(&:account)
+ mentioned_accounts = (association(:mentions).loaded? ? mentions : mentions.includes(:account)).map(&:account)
inbox_owners = mentioned_accounts.select(&:local?) + (account.local? ? [account] : [])
inbox_owners.each do |inbox_owner|
diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb
index 28e5468b389..63ab89f2d27 100644
--- a/app/services/batched_remove_status_service.rb
+++ b/app/services/batched_remove_status_service.rb
@@ -3,29 +3,45 @@
class BatchedRemoveStatusService < BaseService
include Redisable
- # Delete given statuses and reblogs of them
- # Remove statuses from home feeds
- # Push delete events to streaming API for home feeds and public feeds
- # @param [Enumerable<Status>] statuses A preferably batched array of statuses
+ # Delete multiple statuses and reblogs of them as efficiently as possible
+ # @param [Enumerable<Status>] statuses An array of statuses
# @param [Hash] options
- # @option [Boolean] :skip_side_effects
+ # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API
def call(statuses, **options)
- statuses = Status.where(id: statuses.map(&:id)).includes(:account).flat_map { |status| [status] + status.reblogs.includes(:account).to_a }
+ ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account])
- @mentions = statuses.each_with_object({}) { |s, h| h[s.id] = s.active_mentions.includes(:account).to_a }
- @tags = statuses.each_with_object({}) { |s, h| h[s.id] = s.tags.pluck(:name) }
+ statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs }
- @json_payloads = statuses.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
+ # The conversations for direct visibility statuses also need
+ # to be manually updated. This part is not efficient but we
+ # rely on direct visibility statuses being relatively rare.
+ statuses_with_account_conversations = statuses.select(&:direct_visibility?)
- statuses.each do |status|
- status.mark_for_mass_destruction!
- status.destroy
+ ActiveRecord::Associations::Preloader.new.preload(statuses_with_account_conversations, [mentions: :account])
+
+ statuses_with_account_conversations.each do |status|
+ status.send(:unlink_from_conversations)
end
+ # We do not batch all deletes into one to avoid having a long-running
+ # transaction lock the database, but we use the delete method instead
+ # of destroy to avoid all callbacks. We rely on foreign keys to
+ # cascade the delete faster without loading the associations.
+ statuses_and_reblogs.each(&:delete)
+
+ # Since we skipped all callbacks, we also need to manually
+ # deindex the statuses
+ Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs)
+
return if options[:skip_side_effects]
+ ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags)
+
+ @tags = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } }
+ @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
+
# Batch by source account
- statuses.group_by(&:account_id).each_value do |account_statuses|
+ statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses|
account = account_statuses.first.account
next unless account
@@ -35,27 +51,31 @@ class BatchedRemoveStatusService < BaseService
end
# Cannot be batched
- statuses.each do |status|
- unpush_from_public_timelines(status)
+ redis.pipelined do
+ statuses_and_reblogs.each do |status|
+ unpush_from_public_timelines(status)
+ end
end
end
private
def unpush_from_home_timelines(account, statuses)
- recipients = account.followers_for_local_distribution.to_a
-
- recipients << account if account.local?
-
- recipients.each do |follower|
+ account.followers_for_local_distribution.includes(:user).find_each do |follower|
statuses.each do |status|
FeedManager.instance.unpush_from_home(follower, status)
end
end
+
+ return unless account.local?
+
+ statuses.each do |status|
+ FeedManager.instance.unpush_from_home(account, status)
+ end
end
def unpush_from_list_timelines(account, statuses)
- account.lists_for_local_distribution.select(:id, :account_id).each do |list|
+ account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list|
statuses.each do |status|
FeedManager.instance.unpush_from_list(list, status)
end
@@ -67,26 +87,17 @@ class BatchedRemoveStatusService < BaseService
payload = @json_payloads[status.id]
- redis.pipelined do
- redis.publish('timeline:public', payload)
- if status.local?
- redis.publish('timeline:public:local', payload)
- else
- redis.publish('timeline:public:remote', payload)
- end
- if status.media_attachments.any?
- redis.publish('timeline:public:media', payload)
- if status.local?
- redis.publish('timeline:public:local:media', payload)
- else
- redis.publish('timeline:public:remote:media', payload)
- end
- end
+ redis.publish('timeline:public', payload)
+ redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload)
- @tags[status.id].each do |hashtag|
- redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}", payload)
- redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}:local", payload) if status.local?
- end
+ if status.media_attachments.any?
+ redis.publish('timeline:public:media', payload)
+ redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload)
+ end
+
+ @tags[status.id].each do |hashtag|
+ redis.publish("timeline:hashtag:#{hashtag}", payload)
+ redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
end
end
end
diff --git a/app/services/delete_account_service.rb b/app/services/delete_account_service.rb
index fa834e77534..5123a469715 100644
--- a/app/services/delete_account_service.rb
+++ b/app/services/delete_account_service.rb
@@ -6,15 +6,21 @@ class DeleteAccountService < BaseService
ASSOCIATIONS_ON_SUSPEND = %w(
account_pins
active_relationships
+ aliases
block_relationships
blocked_by_relationships
+ bookmarks
conversation_mutes
conversations
custom_filters
+ devices
domain_blocks
favourites
+ featured_tags
follow_requests
+ identity_proofs
list_accounts
+ migrations
mute_relationships
muted_by_relationships
notifications
@@ -25,6 +31,29 @@ class DeleteAccountService < BaseService
status_pins
).freeze
+ # The following associations have no important side-effects
+ # in callbacks and all of their own associations are secured
+ # by foreign keys, making them safe to delete without loading
+ # into memory
+ ASSOCIATIONS_WITHOUT_SIDE_EFFECTS = %w(
+ account_pins
+ aliases
+ conversation_mutes
+ conversations
+ custom_filters
+ devices
+ domain_blocks
+ featured_tags
+ follow_requests
+ identity_proofs
+ migrations
+ mute_relationships
+ muted_by_relationships
+ notifications
+ scheduled_statuses
+ status_pins
+ )
+
ASSOCIATIONS_ON_DESTROY = %w(
reports
targeted_moderation_notes
@@ -55,19 +84,25 @@ class DeleteAccountService < BaseService
@options[:skip_activitypub] = true if @options[:skip_side_effects]
- reject_follows!
- undo_follows!
- purge_user!
- purge_profile!
+ distribute_activities!
purge_content!
fulfill_deletion_request!
end
private
- def reject_follows!
- return if @account.local? || !@account.activitypub? || @options[:skip_activitypub]
+ def distribute_activities!
+ return if skip_activitypub?
+ if @account.local?
+ delete_actor!
+ elsif @account.activitypub?
+ reject_follows!
+ undo_follows!
+ end
+ end
+
+ def reject_follows!
# When deleting a remote account, the account obviously doesn't
# actually become deleted on its origin server, i.e. unlike a
# locally deleted account it continues to have access to its home
@@ -81,8 +116,6 @@ class DeleteAccountService < BaseService
end
def undo_follows!
- return if @account.local? || !@account.activitypub? || @options[:skip_activitypub]
-
# When deleting a remote account, the account obviously doesn't
# actually become deleted on its origin server, but following relationships
# are severed on our end. Therefore, make the remote server aware that the
@@ -97,7 +130,7 @@ class DeleteAccountService < BaseService
def purge_user!
return if !@account.local? || @account.user.nil?
- if @options[:reserve_email]
+ if keep_user_record?
@account.user.disable!
@account.user.invites.where(uses: 0).destroy_all
else
@@ -106,34 +139,52 @@ class DeleteAccountService < BaseService
end
def purge_content!
- distribute_delete_actor! if @account.local? && !@options[:skip_side_effects]
+ purge_user!
+ purge_profile!
+ purge_statuses!
+ purge_media_attachments!
+ purge_polls!
+ purge_generated_notifications!
+ purge_other_associations!
+ @account.destroy unless keep_account_record?
+ end
+
+ def purge_statuses!
@account.statuses.reorder(nil).find_in_batches do |statuses|
- statuses.reject! { |status| reported_status_ids.include?(status.id) } if @options[:reserve_username]
- BatchedRemoveStatusService.new.call(statuses, skip_side_effects: @options[:skip_side_effects])
+ statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record?
+
+ BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?)
end
+ end
+ def purge_media_attachments!
@account.media_attachments.reorder(nil).find_each do |media_attachment|
- next if @options[:reserve_username] && reported_status_ids.include?(media_attachment.status_id)
+ next if keep_account_record? && reported_status_ids.include?(media_attachment.status_id)
media_attachment.destroy
end
+ end
+ def purge_polls!
@account.polls.reorder(nil).find_each do |poll|
- next if @options[:reserve_username] && reported_status_ids.include?(poll.status_id)
+ next if keep_account_record? && reported_status_ids.include?(poll.status_id)
- # We can safely delete the poll rather than destroy it, as any non-reported
- # status should have been deleted already, as long as we take care of
- # notifications.
- Notification.where(poll: poll).delete_all
poll.delete
end
+ end
+ def purge_generated_notifications!
+ # By deleting polls and statuses without callbacks, we've left behind
+ # polymorphically associated notifications generated by this account
+
+ Notification.where(from_account: @account).in_batches.delete_all
+ end
+
+ def purge_other_associations!
associations_for_destruction.each do |association_name|
- destroy_all(@account.public_send(association_name))
+ purge_association(association_name)
end
-
- @account.destroy unless @options[:reserve_username]
end
def purge_profile!
@@ -141,7 +192,7 @@ class DeleteAccountService < BaseService
# there is no point wasting time updating
# its values first
- return unless @options[:reserve_username]
+ return unless keep_account_record?
@account.silenced_at = nil
@account.suspended_at = @options[:suspended_at] || Time.now.utc
@@ -156,6 +207,7 @@ class DeleteAccountService < BaseService
@account.followers_count = 0
@account.following_count = 0
@account.moved_to_account = nil
+ @account.also_known_as = []
@account.trust_level = :untrusted
@account.avatar.destroy
@account.header.destroy
@@ -166,11 +218,17 @@ class DeleteAccountService < BaseService
@account.deletion_request&.destroy
end
- def destroy_all(association)
- association.in_batches.destroy_all
+ def purge_association(association_name)
+ association = @account.public_send(association_name)
+
+ if ASSOCIATIONS_WITHOUT_SIDE_EFFECTS.include?(association_name)
+ association.in_batches.delete_all
+ else
+ association.in_batches.destroy_all
+ end
end
- def distribute_delete_actor!
+ def delete_actor!
ActivityPub::DeliveryWorker.push_bulk(delivery_inboxes) do |inbox_url|
[delete_actor_json, @account.id, inbox_url]
end
@@ -197,10 +255,26 @@ class DeleteAccountService < BaseService
end
def associations_for_destruction
- if @options[:reserve_username]
+ if keep_account_record?
ASSOCIATIONS_ON_SUSPEND
else
ASSOCIATIONS_ON_SUSPEND + ASSOCIATIONS_ON_DESTROY
end
end
+
+ def keep_user_record?
+ @options[:reserve_email]
+ end
+
+ def keep_account_record?
+ @options[:reserve_username]
+ end
+
+ def skip_side_effects?
+ @options[:skip_side_effects]
+ end
+
+ def skip_activitypub?
+ @options[:skip_activitypub]
+ end
end
diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb
index 8f54abf7789..9fc9b2f1a59 100644
--- a/config/initializers/chewy.rb
+++ b/config/initializers/chewy.rb
@@ -12,6 +12,10 @@ Chewy.settings = {
sidekiq: { queue: 'pull' },
}
+# We use our own async strategy even outside the request-response
+# cycle, which takes care of checking if ElasticSearch is enabled
+# or not. However, mind that for the Rails console, the :urgent
+# strategy is set automatically with no way to override it.
Chewy.root_strategy = :custom_sidekiq
Chewy.request_strategy = :custom_sidekiq
Chewy.use_after_commit_callbacks = false
@@ -37,6 +41,7 @@ Elasticsearch::Transport::Client.prepend Module.new {
super arguments
end
}
+
Elasticsearch::API::Indices::IndicesClient.prepend Module.new {
def create(arguments = {})
arguments[:include_type_name] = true
diff --git a/lib/chewy/strategy/custom_sidekiq.rb b/lib/chewy/strategy/custom_sidekiq.rb
index 3e54326ba8b..794ae4ed44d 100644
--- a/lib/chewy/strategy/custom_sidekiq.rb
+++ b/lib/chewy/strategy/custom_sidekiq.rb
@@ -2,29 +2,10 @@
module Chewy
class Strategy
- class CustomSidekiq < Base
- class Worker
- include ::Sidekiq::Worker
-
- sidekiq_options queue: 'pull'
-
- def perform(type, ids, options = {})
- options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
- type.constantize.import!(ids, options)
- end
- end
-
- def update(type, objects, _options = {})
- return unless Chewy.enabled?
-
- ids = type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)
-
- return if ids.empty?
-
- Worker.perform_async(type.name, ids)
+ class CustomSidekiq < Sidekiq
+ def update(_type, _objects, _options = {})
+ super if Chewy.enabled?
end
-
- def leave; end
end
end
end
diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb
index f84256f187e..239859f0668 100644
--- a/spec/services/batched_remove_status_service_spec.rb
+++ b/spec/services/batched_remove_status_service_spec.rb
@@ -26,6 +26,11 @@ RSpec.describe BatchedRemoveStatusService, type: :service do
subject.call([status1, status2])
end
+ it 'removes statuses' do
+ expect { Status.find(status1.id) }.to raise_error ActiveRecord::RecordNotFound
+ expect { Status.find(status2.id) }.to raise_error ActiveRecord::RecordNotFound
+ end
+
it 'removes statuses from author\'s home feed' do
expect(HomeFeed.new(alice).get(10)).to_not include([status1.id, status2.id])
end