summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2022-09-27 03:08:19 +0200
committerGitHub <noreply@github.com>2022-09-27 03:08:19 +0200
commit5c9abdeff1d0cf3e14d84c5ae298e6a5beccaf18 (patch)
treeb4bad153eec9f2a39d96a9da342e1618ac43740b
parent3e0999cd1139d638332d62129dbf0b37263802fd (diff)
Add retention policy for cached content and media (#19232)
-rw-r--r--app/lib/redis_configuration.rb6
-rw-r--r--app/lib/vacuum.rb3
-rw-r--r--app/lib/vacuum/access_tokens_vacuum.rb (renamed from app/workers/scheduler/doorkeeper_cleanup_scheduler.rb)15
-rw-r--r--app/lib/vacuum/backups_vacuum.rb25
-rw-r--r--app/lib/vacuum/feeds_vacuum.rb34
-rw-r--r--app/lib/vacuum/media_attachments_vacuum.rb40
-rw-r--r--app/lib/vacuum/preview_cards_vacuum.rb39
-rw-r--r--app/lib/vacuum/statuses_vacuum.rb54
-rw-r--r--app/lib/vacuum/system_keys_vacuum.rb13
-rw-r--r--app/models/content_retention_policy.rb25
-rw-r--r--app/models/form/admin_settings.rb4
-rw-r--r--app/views/admin/settings/edit.html.haml8
-rw-r--r--app/workers/scheduler/backup_cleanup_scheduler.rb17
-rw-r--r--app/workers/scheduler/feed_cleanup_scheduler.rb35
-rw-r--r--app/workers/scheduler/media_cleanup_scheduler.rb17
-rw-r--r--app/workers/scheduler/vacuum_scheduler.rb56
-rw-r--r--config/locales/simple_form.en.yml8
-rw-r--r--config/settings.yml1
-rw-r--r--config/sidekiq.yml16
-rw-r--r--spec/fabricators/access_grant_fabricator.rb6
-rw-r--r--spec/fabricators/preview_card_fabricator.rb1
-rw-r--r--spec/lib/vacuum/access_tokens_vacuum_spec.rb33
-rw-r--r--spec/lib/vacuum/backups_vacuum_spec.rb24
-rw-r--r--spec/lib/vacuum/feeds_vacuum_spec.rb30
-rw-r--r--spec/lib/vacuum/media_attachments_vacuum_spec.rb47
-rw-r--r--spec/lib/vacuum/preview_cards_vacuum_spec.rb36
-rw-r--r--spec/lib/vacuum/statuses_vacuum_spec.rb36
-rw-r--r--spec/lib/vacuum/system_keys_vacuum_spec.rb22
-rw-r--r--spec/workers/scheduler/feed_cleanup_scheduler_spec.rb26
-rw-r--r--spec/workers/scheduler/media_cleanup_scheduler_spec.rb15
30 files changed, 558 insertions, 134 deletions
diff --git a/app/lib/redis_configuration.rb b/app/lib/redis_configuration.rb
index e14d6c8b670..f0e86d985b8 100644
--- a/app/lib/redis_configuration.rb
+++ b/app/lib/redis_configuration.rb
@@ -7,9 +7,7 @@ class RedisConfiguration
@pool = ConnectionPool.new(size: new_pool_size) { new.connection }
end
- def with
- pool.with { |redis| yield redis }
- end
+ delegate :with, to: :pool
def pool
@pool ||= establish_pool(pool_size)
@@ -17,7 +15,7 @@ class RedisConfiguration
def pool_size
if Sidekiq.server?
- Sidekiq.options[:concurrency]
+ Sidekiq[:concurrency]
else
ENV['MAX_THREADS'] || 5
end
diff --git a/app/lib/vacuum.rb b/app/lib/vacuum.rb
new file mode 100644
index 00000000000..9db1ec90bed
--- /dev/null
+++ b/app/lib/vacuum.rb
@@ -0,0 +1,3 @@
+# frozen_string_literal: true
+
+module Vacuum; end
diff --git a/app/workers/scheduler/doorkeeper_cleanup_scheduler.rb b/app/lib/vacuum/access_tokens_vacuum.rb
index 9303a352fb7..4f3878027ab 100644
--- a/app/workers/scheduler/doorkeeper_cleanup_scheduler.rb
+++ b/app/lib/vacuum/access_tokens_vacuum.rb
@@ -1,13 +1,18 @@
# frozen_string_literal: true
-class Scheduler::DoorkeeperCleanupScheduler
- include Sidekiq::Worker
+class Vacuum::AccessTokensVacuum
+ def perform
+ vacuum_revoked_access_tokens!
+ vacuum_revoked_access_grants!
+ end
- sidekiq_options retry: 0
+ private
- def perform
+ def vacuum_revoked_access_tokens!
Doorkeeper::AccessToken.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
+ end
+
+ def vacuum_revoked_access_grants!
Doorkeeper::AccessGrant.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
- SystemKey.expired.delete_all
end
end
diff --git a/app/lib/vacuum/backups_vacuum.rb b/app/lib/vacuum/backups_vacuum.rb
new file mode 100644
index 00000000000..3b83072f369
--- /dev/null
+++ b/app/lib/vacuum/backups_vacuum.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class Vacuum::BackupsVacuum
+ def initialize(retention_period)
+ @retention_period = retention_period
+ end
+
+ def perform
+ vacuum_expired_backups! if retention_period?
+ end
+
+ private
+
+ def vacuum_expired_backups!
+ backups_past_retention_period.in_batches.destroy_all
+ end
+
+ def backups_past_retention_period
+ Backup.unscoped.where(Backup.arel_table[:created_at].lt(@retention_period.ago))
+ end
+
+ def retention_period?
+ @retention_period.present?
+ end
+end
diff --git a/app/lib/vacuum/feeds_vacuum.rb b/app/lib/vacuum/feeds_vacuum.rb
new file mode 100644
index 00000000000..f46bcf75f7f
--- /dev/null
+++ b/app/lib/vacuum/feeds_vacuum.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+class Vacuum::FeedsVacuum
+ def perform
+ vacuum_inactive_home_feeds!
+ vacuum_inactive_list_feeds!
+ end
+
+ private
+
+ def vacuum_inactive_home_feeds!
+ inactive_users.select(:id, :account_id).find_in_batches do |users|
+ feed_manager.clean_feeds!(:home, users.map(&:account_id))
+ end
+ end
+
+ def vacuum_inactive_list_feeds!
+ inactive_users_lists.select(:id).find_in_batches do |lists|
+ feed_manager.clean_feeds!(:list, lists.map(&:id))
+ end
+ end
+
+ def inactive_users
+ User.confirmed.inactive
+ end
+
+ def inactive_users_lists
+ List.where(account_id: inactive_users.select(:account_id))
+ end
+
+ def feed_manager
+ FeedManager.instance
+ end
+end
diff --git a/app/lib/vacuum/media_attachments_vacuum.rb b/app/lib/vacuum/media_attachments_vacuum.rb
new file mode 100644
index 00000000000..7fb347ce4d5
--- /dev/null
+++ b/app/lib/vacuum/media_attachments_vacuum.rb
@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+
+class Vacuum::MediaAttachmentsVacuum
+ TTL = 1.day.freeze
+
+ def initialize(retention_period)
+ @retention_period = retention_period
+ end
+
+ def perform
+ vacuum_cached_files! if retention_period?
+ vacuum_orphaned_records!
+ end
+
+ private
+
+ def vacuum_cached_files!
+ media_attachments_past_retention_period.find_each do |media_attachment|
+ media_attachment.file.destroy
+ media_attachment.thumbnail.destroy
+ media_attachment.save
+ end
+ end
+
+ def vacuum_orphaned_records!
+ orphaned_media_attachments.in_batches.destroy_all
+ end
+
+ def media_attachments_past_retention_period
+ MediaAttachment.unscoped.remote.cached.where(MediaAttachment.arel_table[:created_at].lt(@retention_period.ago)).where(MediaAttachment.arel_table[:updated_at].lt(@retention_period.ago))
+ end
+
+ def orphaned_media_attachments
+ MediaAttachment.unscoped.unattached.where(MediaAttachment.arel_table[:created_at].lt(TTL.ago))
+ end
+
+ def retention_period?
+ @retention_period.present?
+ end
+end
diff --git a/app/lib/vacuum/preview_cards_vacuum.rb b/app/lib/vacuum/preview_cards_vacuum.rb
new file mode 100644
index 00000000000..84ef100ed94
--- /dev/null
+++ b/app/lib/vacuum/preview_cards_vacuum.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+class Vacuum::PreviewCardsVacuum
+ TTL = 1.day.freeze
+
+ def initialize(retention_period)
+ @retention_period = retention_period
+ end
+
+ def perform
+ vacuum_cached_images! if retention_period?
+ vacuum_orphaned_records!
+ end
+
+ private
+
+ def vacuum_cached_images!
+ preview_cards_past_retention_period.find_each do |preview_card|
+ preview_card.image.destroy
+ preview_card.save
+ end
+ end
+
+ def vacuum_orphaned_records!
+ orphaned_preview_cards.in_batches.destroy_all
+ end
+
+ def preview_cards_past_retention_period
+ PreviewCard.cached.where(PreviewCard.arel_table[:updated_at].lt(@retention_period.ago))
+ end
+
+ def orphaned_preview_cards
+ PreviewCard.where('NOT EXISTS (SELECT 1 FROM preview_cards_statuses WHERE preview_cards_statuses.preview_card_id = preview_cards.id)').where(PreviewCard.arel_table[:created_at].lt(TTL.ago))
+ end
+
+ def retention_period?
+ @retention_period.present?
+ end
+end
diff --git a/app/lib/vacuum/statuses_vacuum.rb b/app/lib/vacuum/statuses_vacuum.rb
new file mode 100644
index 00000000000..41d6ba270cc
--- /dev/null
+++ b/app/lib/vacuum/statuses_vacuum.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+class Vacuum::StatusesVacuum
+ include Redisable
+
+ def initialize(retention_period)
+ @retention_period = retention_period
+ end
+
+ def perform
+ vacuum_statuses! if retention_period?
+ end
+
+ private
+
+ def vacuum_statuses!
+ statuses_scope.find_in_batches do |statuses|
+ # Side-effects not covered by foreign keys, such
+ # as the search index, must be handled first.
+
+ remove_from_account_conversations(statuses)
+ remove_from_search_index(statuses)
+
+ # Foreign keys take care of most associated records
+ # for us. Media attachments will be orphaned.
+
+ Status.where(id: statuses.map(&:id)).delete_all
+ end
+ end
+
+ def statuses_scope
+ Status.unscoped.kept.where(account: Account.remote).where(Status.arel_table[:id].lt(retention_period_as_id)).select(:id, :visibility)
+ end
+
+ def retention_period_as_id
+ Mastodon::Snowflake.id_at(@retention_period.ago, with_random: false)
+ end
+
+ def analyze_statuses!
+ ActiveRecord::Base.connection.execute('ANALYZE statuses')
+ end
+
+ def remove_from_account_conversations(statuses)
+ Status.where(id: statuses.select(&:direct_visibility?).map(&:id)).includes(:account, mentions: :account).each(&:unlink_from_conversations)
+ end
+
+ def remove_from_search_index(statuses)
+ with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', statuses.map(&:id)) } if Chewy.enabled?
+ end
+
+ def retention_period?
+ @retention_period.present?
+ end
+end
diff --git a/app/lib/vacuum/system_keys_vacuum.rb b/app/lib/vacuum/system_keys_vacuum.rb
new file mode 100644
index 00000000000..ceee2fd164b
--- /dev/null
+++ b/app/lib/vacuum/system_keys_vacuum.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+class Vacuum::SystemKeysVacuum
+ def perform
+ vacuum_expired_system_keys!
+ end
+
+ private
+
+ def vacuum_expired_system_keys!
+ SystemKey.expired.delete_all
+ end
+end
diff --git a/app/models/content_retention_policy.rb b/app/models/content_retention_policy.rb
new file mode 100644
index 00000000000..b5e922c8c5f
--- /dev/null
+++ b/app/models/content_retention_policy.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class ContentRetentionPolicy
+ def self.current
+ new
+ end
+
+ def media_cache_retention_period
+ retention_period Setting.media_cache_retention_period
+ end
+
+ def content_cache_retention_period
+ retention_period Setting.content_cache_retention_period
+ end
+
+ def backups_retention_period
+ retention_period Setting.backups_retention_period
+ end
+
+ private
+
+ def retention_period(value)
+ value.days if value.is_a?(Integer) && value.positive?
+ end
+end
diff --git a/app/models/form/admin_settings.rb b/app/models/form/admin_settings.rb
index 97fabc6ac97..3a71509162c 100644
--- a/app/models/form/admin_settings.rb
+++ b/app/models/form/admin_settings.rb
@@ -32,6 +32,9 @@ class Form::AdminSettings
show_domain_blocks_rationale
noindex
require_invite_text
+ media_cache_retention_period
+ content_cache_retention_period
+ backups_retention_period
).freeze
BOOLEAN_KEYS = %i(
@@ -64,6 +67,7 @@ class Form::AdminSettings
validates :bootstrap_timeline_accounts, existing_username: { multiple: true }
validates :show_domain_blocks, inclusion: { in: %w(disabled users all) }
validates :show_domain_blocks_rationale, inclusion: { in: %w(disabled users all) }
+ validates :media_cache_retention_period, :content_cache_retention_period, :backups_retention_period, numericality: { only_integer: true }
def initialize(_attributes = {})
super
diff --git a/app/views/admin/settings/edit.html.haml b/app/views/admin/settings/edit.html.haml
index 64687b7a60f..1dfd216439d 100644
--- a/app/views/admin/settings/edit.html.haml
+++ b/app/views/admin/settings/edit.html.haml
@@ -45,7 +45,6 @@
.fields-group
= f.input :require_invite_text, as: :boolean, wrapper: :with_label, label: t('admin.settings.registrations.require_invite_text.title'), hint: t('admin.settings.registrations.require_invite_text.desc_html'), disabled: !approved_registrations?
- .fields-group
%hr.spacer/
@@ -100,5 +99,12 @@
= f.input :site_terms, wrapper: :with_block_label, as: :text, label: t('admin.settings.site_terms.title'), hint: t('admin.settings.site_terms.desc_html'), input_html: { rows: 8 }
= f.input :custom_css, wrapper: :with_block_label, as: :text, input_html: { rows: 8 }, label: t('admin.settings.custom_css.title'), hint: t('admin.settings.custom_css.desc_html')
+ %hr.spacer/
+
+ .fields-group
+ = f.input :media_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
+ = f.input :content_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
+ = f.input :backups_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
+
.actions
= f.button :button, t('generic.save_changes'), type: :submit
diff --git a/app/workers/scheduler/backup_cleanup_scheduler.rb b/app/workers/scheduler/backup_cleanup_scheduler.rb
deleted file mode 100644
index 85d5312c0ad..00000000000
--- a/app/workers/scheduler/backup_cleanup_scheduler.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-# frozen_string_literal: true
-
-class Scheduler::BackupCleanupScheduler
- include Sidekiq::Worker
-
- sidekiq_options retry: 0
-
- def perform
- old_backups.reorder(nil).find_each(&:destroy!)
- end
-
- private
-
- def old_backups
- Backup.where('created_at < ?', 7.days.ago)
- end
-end
diff --git a/app/workers/scheduler/feed_cleanup_scheduler.rb b/app/workers/scheduler/feed_cleanup_scheduler.rb
deleted file mode 100644
index aa0cc8b8df3..00000000000
--- a/app/workers/scheduler/feed_cleanup_scheduler.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-# frozen_string_literal: true
-
-class Scheduler::FeedCleanupScheduler
- include Sidekiq::Worker
- include Redisable
-
- sidekiq_options retry: 0
-
- def perform
- clean_home_feeds!
- clean_list_feeds!
- end
-
- private
-
- def clean_home_feeds!
- feed_manager.clean_feeds!(:home, inactive_account_ids)
- end
-
- def clean_list_feeds!
- feed_manager.clean_feeds!(:list, inactive_list_ids)
- end
-
- def inactive_account_ids
- @inactive_account_ids ||= User.confirmed.inactive.pluck(:account_id)
- end
-
- def inactive_list_ids
- List.where(account_id: inactive_account_ids).pluck(:id)
- end
-
- def feed_manager
- FeedManager.instance
- end
-end
diff --git a/app/workers/scheduler/media_cleanup_scheduler.rb b/app/workers/scheduler/media_cleanup_scheduler.rb
deleted file mode 100644
index 24d30a6be59..00000000000
--- a/app/workers/scheduler/media_cleanup_scheduler.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-# frozen_string_literal: true
-
-class Scheduler::MediaCleanupScheduler
- include Sidekiq::Worker
-
- sidekiq_options retry: 0
-
- def perform
- unattached_media.find_each(&:destroy)
- end
-
- private
-
- def unattached_media
- MediaAttachment.reorder(nil).unattached.where('created_at < ?', 1.day.ago)
- end
-end
diff --git a/app/workers/scheduler/vacuum_scheduler.rb b/app/workers/scheduler/vacuum_scheduler.rb
new file mode 100644
index 00000000000..ce88ff204d3
--- /dev/null
+++ b/app/workers/scheduler/vacuum_scheduler.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+class Scheduler::VacuumScheduler
+ include Sidekiq::Worker
+
+ sidekiq_options retry: 0
+
+ def perform
+ vacuum_operations.each do |operation|
+ operation.perform
+ rescue => e
+ Rails.logger.error("Error while running #{operation.class.name}: #{e}")
+ end
+ end
+
+ private
+
+ def vacuum_operations
+ [
+ statuses_vacuum,
+ media_attachments_vacuum,
+ preview_cards_vacuum,
+ backups_vacuum,
+ access_tokens_vacuum,
+ feeds_vacuum,
+ ]
+ end
+
+ def statuses_vacuum
+ Vacuum::StatusesVacuum.new(content_retention_policy.content_cache_retention_period)
+ end
+
+ def media_attachments_vacuum
+ Vacuum::MediaAttachmentsVacuum.new(content_retention_policy.media_cache_retention_period)
+ end
+
+ def preview_cards_vacuum
+ Vacuum::PreviewCardsVacuum.new(content_retention_policy.media_cache_retention_period)
+ end
+
+ def backups_vacuum
+ Vacuum::BackupsVacuum.new(content_retention_policy.backups_retention_period)
+ end
+
+ def access_tokens_vacuum
+ Vacuum::AccessTokensVacuum.new
+ end
+
+ def feeds_vacuum
+ Vacuum::FeedsVacuum.new
+ end
+
+ def content_retention_policy
+ ContentRetentionPolicy.current
+ end
+end
diff --git a/config/locales/simple_form.en.yml b/config/locales/simple_form.en.yml
index ddc83e8965c..db5b45e4103 100644
--- a/config/locales/simple_form.en.yml
+++ b/config/locales/simple_form.en.yml
@@ -73,6 +73,10 @@ en:
actions:
hide: Completely hide the filtered content, behaving as if it did not exist
warn: Hide the filtered content behind a warning mentioning the filter's title
+ form_admin_settings:
+ backups_retention_period: Keep generated user archives for the specified number of days.
+ content_cache_retention_period: Posts from other servers will be deleted after the specified number of days when set to a positive value. This may be irreversible.
+ media_cache_retention_period: Downloaded media files will be deleted after the specified number of days when set to a positive value, and re-downloaded on demand.
form_challenge:
current_password: You are entering a secure area
imports:
@@ -207,6 +211,10 @@ en:
actions:
hide: Hide completely
warn: Hide with a warning
+ form_admin_settings:
+ backups_retention_period: User archive retention period
+ content_cache_retention_period: Content cache retention period
+ media_cache_retention_period: Media cache retention period
interactions:
must_be_follower: Block notifications from non-followers
must_be_following: Block notifications from people you don't follow
diff --git a/config/settings.yml b/config/settings.yml
index eaa05071e11..41742118bc8 100644
--- a/config/settings.yml
+++ b/config/settings.yml
@@ -70,6 +70,7 @@ defaults: &defaults
show_domain_blocks: 'disabled'
show_domain_blocks_rationale: 'disabled'
require_invite_text: false
+ backups_retention_period: 7
development:
<<: *defaults
diff --git a/config/sidekiq.yml b/config/sidekiq.yml
index 9ec6eb5eca7..e3156aa346c 100644
--- a/config/sidekiq.yml
+++ b/config/sidekiq.yml
@@ -25,22 +25,14 @@
every: '5m'
class: Scheduler::IndexingScheduler
queue: scheduler
- media_cleanup_scheduler:
+ vacuum_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
- class: Scheduler::MediaCleanupScheduler
- queue: scheduler
- feed_cleanup_scheduler:
- cron: '<%= Random.rand(0..59) %> <%= Random.rand(0..2) %> * * *'
- class: Scheduler