summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2023-04-21 18:14:19 +0200
committerClaire <claire.github-309c@sitedethib.com>2023-07-06 13:45:40 +0200
commit0e139e3c4d8faa94fe0357d235f84a3f4c2abb50 (patch)
tree86ecf9955f73e38f656585a09fcdf5bb5044e079
parent23e7b4d28dc94ef927f6db4e5832a45e333b252e (diff)
Change automatic post deletion thresholds and load detection (#24614)
-rw-r--r--app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb49
1 files changed, 26 insertions, 23 deletions
diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
index d245f6bbdc1..26dd79e5262 100644
--- a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
+++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
@@ -7,28 +7,30 @@ class Scheduler::AccountsStatusesCleanupScheduler
# This limit is mostly to be nice to the fediverse at large and not
# generate too much traffic.
# This also helps limiting the running time of the scheduler itself.
- MAX_BUDGET = 150
+ MAX_BUDGET = 300
- # This is an attempt to spread the load across instances, as various
- # accounts are likely to have various followers.
+ # This is an attempt to spread the load across remote servers, as
+ # spreading deletions across diverse accounts is likely to spread
+ # the deletion across diverse followers. It also helps each individual
+ # user see some effect sooner.
PER_ACCOUNT_BUDGET = 5
# This is an attempt to limit the workload generated by status removal
- # jobs to something the particular instance can handle.
- PER_THREAD_BUDGET = 6
-
- # Those avoid loading an instance that is already under load
- MAX_DEFAULT_SIZE = 200
- MAX_DEFAULT_LATENCY = 5
- MAX_PUSH_SIZE = 500
- MAX_PUSH_LATENCY = 10
-
- # 'pull' queue has lower priority jobs, and it's unlikely that pushing
- # deletes would cause much issues with this queue if it didn't cause issues
- # with default and push. Yet, do not enqueue deletes if the instance is
- # lagging behind too much.
- MAX_PULL_SIZE = 10_000
- MAX_PULL_LATENCY = 5.minutes.to_i
+ # jobs to something the particular server can handle.
+ PER_THREAD_BUDGET = 5
+
+ # These are latency limits on various queues above which a server is
+ # considered to be under load, causing the auto-deletion to be entirely
+ # skipped for that run.
+ LOAD_LATENCY_THRESHOLDS = {
+ default: 5,
+ push: 10,
+ # The `pull` queue has lower priority jobs, and it's unlikely that
+ # pushing deletes would cause much issues with this queue if it didn't
+ # cause issues with `default` and `push`. Yet, do not enqueue deletes
+ # if the instance is lagging behind too much.
+ pull: 5.minutes.to_i,
+ }.freeze
sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i
@@ -61,19 +63,20 @@ class Scheduler::AccountsStatusesCleanupScheduler
end
def compute_budget
- threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
+ # Each post deletion is a `RemovalWorker` job (on `default` queue), each
+ # potentially spawning many `ActivityPub::DeliveryWorker` jobs (on the `push` queue).
+ threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.pluck('concurrency').sum
[PER_THREAD_BUDGET * threads, MAX_BUDGET].min
end
def under_load?
- queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY)
+ LOAD_LATENCY_THRESHOLDS.any? { |queue, max_latency| queue_under_load?(queue, max_latency) }
end
private
- def queue_under_load?(name, max_size, max_latency)
- queue = Sidekiq::Queue.new(name)
- queue.size > max_size || queue.latency > max_latency
+ def queue_under_load?(name, max_latency)
+ Sidekiq::Queue.new(name).latency > max_latency
end
def last_processed_id