1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# frozen_string_literal: true
class Scheduler::AccountsStatusesCleanupScheduler
include Sidekiq::Worker
include Redisable
# This limit is mostly to be nice to the fediverse at large and not
# generate too much traffic.
# This also helps limiting the running time of the scheduler itself.
MAX_BUDGET = 300
# This is an attempt to spread the load across remote servers, as
# spreading deletions across diverse accounts is likely to spread
# the deletion across diverse followers. It also helps each individual
# user see some effect sooner.
PER_ACCOUNT_BUDGET = 5
# This is an attempt to limit the workload generated by status removal
# jobs to something the particular server can handle.
PER_THREAD_BUDGET = 5
# These are latency limits on various queues above which a server is
# considered to be under load, causing the auto-deletion to be entirely
# skipped for that run.
LOAD_LATENCY_THRESHOLDS = {
default: 5,
push: 10,
# The `pull` queue has lower priority jobs, and it's unlikely that
# pushing deletes would cause much issues with this queue if it didn't
# cause issues with `default` and `push`. Yet, do not enqueue deletes
# if the instance is lagging behind too much.
pull: 5.minutes.to_i,
}.freeze
sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i
def perform
return if under_load?
budget = compute_budget
# If the budget allows it, we want to consider all accounts with enabled
# auto cleanup at least once.
#
# We start from `first_policy_id` (the last processed id in the previous
# run) and process each policy until we loop to `first_policy_id`,
# recording into `affected_policies` any policy that caused posts to be
# deleted.
#
# After that, we set `full_iteration` to `false` and continue looping on
# policies from `affected_policies`.
first_policy_id = last_processed_id || 0
first_iteration = true
full_iteration = true
affected_policies = []
loop do
num_processed_accounts = 0
scope = cleanup_policies(first_policy_id, affected_policies, first_iteration, full_iteration)
scope.find_each(order: :asc) do |policy|
num_deleted = AccountStatusesCleanupService.new.call(policy, [budget, PER_ACCOUNT_BUDGET].min)
budget -= num_deleted
unless num_deleted.zero?
num_processed_accounts += 1
affected_policies << policy.id if full_iteration
end
full_iteration = false if !first_iteration && policy.id >= first_policy_id
if budget.zero?
save_last_processed_id(policy.id)
break
end
end
# The idea here is to loop through all policies at least once until the budget is exhausted
# and start back after the last processed account otherwise
break if budget.zero? || (num_processed_accounts.zero? && !full_iteration)
full_iteration = false unless first_iteration
first_iteration = false
end
end
def compute_budget
# Each post deletion is a `RemovalWorker` job (on `default` queue), each
# potentially spawning many `ActivityPub::DeliveryWorker` jobs (on the `push` queue).
threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.pluck('concurrency').sum
[PER_THREAD_BUDGET * threads, MAX_BUDGET].min
end
def under_load?
LOAD_LATENCY_THRESHOLDS.any? { |queue, max_latency| queue_under_load?(queue, max_latency) }
end
private
def cleanup_policies(first_policy_id, affected_policies, first_iteration, full_iteration)
scope = AccountStatusesCleanupPolicy.where(enabled: true)
if full_iteration
# If we are doing a full iteration, examine all policies we have not examined yet
if first_iteration
scope.where(id: first_policy_id...)
else
scope.where(id: ..first_policy_id).or(scope.where(id: affected_policies))
end
else
# Otherwise, examine only policies that previously yielded posts to delete
scope.where(id: affected_policies)
end
end
def queue_under_load?(name, max_latency)
Sidekiq::Queue.new(name).latency > max_latency
end
def last_processed_id
redis.get('account_statuses_cleanup_scheduler:last_policy_id')&.to_i
end
def save_last_processed_id(id)
if id.nil?
redis.del('account_statuses_cleanup_scheduler:last_policy_id')
else
redis.set('account_statuses_cleanup_scheduler:last_policy_id', id, ex: 1.hour.seconds)
end
end
end
|