diff options
author | Claire <claire.github-309c@sitedethib.com> | 2023-08-30 17:36:16 +0200 |
---|---|---|
committer | Claire <claire.github-309c@sitedethib.com> | 2023-09-05 18:51:01 +0200 |
commit | a62d9a9a78cb1f3307470b6c4f4398c547ed5816 (patch) | |
tree | aa04ede540cc1c21e39585cfd0f808a4a8777a62 | |
parent | 2b0cabe0d7e07db54d12145a12a0d8b2a22e2939 (diff) |
Change text extraction in `PlainTextFormatter` to be faster (#26727)
-rw-r--r-- | app/lib/plain_text_formatter.rb | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/app/lib/plain_text_formatter.rb b/app/lib/plain_text_formatter.rb index 6fa2bc5d2cc..d1ff6808b2a 100644 --- a/app/lib/plain_text_formatter.rb +++ b/app/lib/plain_text_formatter.rb @@ -1,9 +1,7 @@ # frozen_string_literal: true class PlainTextFormatter - include ActionView::Helpers::TextHelper - - NEWLINE_TAGS_RE = /(<br \/>|<br>|<\/p>)+/.freeze + NEWLINE_TAGS_RE = %r{(<br />|<br>|</p>)+} attr_reader :text, :local @@ -18,7 +16,10 @@ class PlainTextFormatter if local? text else - html_entities.decode(strip_tags(insert_newlines)).chomp + node = Nokogiri::HTML.fragment(insert_newlines) + # Elements that are entirely removed with our Sanitize config + node.xpath('.//iframe|.//math|.//noembed|.//noframes|.//noscript|.//plaintext|.//script|.//style|.//svg|.//xmp').remove + node.text.chomp end end @@ -27,8 +28,4 @@ class PlainTextFormatter def insert_newlines text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" } end - - def html_entities - HTMLEntities.new - end end |