diff options
author | Alexander Batischev <eual.jp@gmail.com> | 2021-04-30 22:47:41 +0300 |
---|---|---|
committer | Alexander Batischev <eual.jp@gmail.com> | 2021-05-17 15:23:14 +0300 |
commit | 76efdc0dca000eefde4abd46fccb5f02705e2db7 (patch) | |
tree | dae3d2b80412d3287292b9e2629c97bd821454b0 | |
parent | a78a58c77ec160a24286d798281fb8c4b0130cf4 (diff) |
Switch RssFeed's storage to Utf8String
I had to delete a test which ensures that "feedtitle" attribute of the
feed is encoded to the locale charset. This property shouldn't hold
anymore, because the attribute is only used internally (by filters), so
should be in UTF-8.
-rw-r--r-- | include/rssfeed.h | 41 | ||||
-rw-r--r-- | src/rssfeed.cpp | 37 | ||||
-rw-r--r-- | test/rssfeed.cpp | 14 |
3 files changed, 40 insertions, 52 deletions
diff --git a/include/rssfeed.h b/include/rssfeed.h index 7c7aa5a6..833fb013 100644 --- a/include/rssfeed.h +++ b/include/rssfeed.h @@ -9,6 +9,7 @@ #include "matchable.h" #include "rssitem.h" +#include "utf8string.h" #include "utils.h" namespace newsboat { @@ -23,31 +24,32 @@ public: ~RssFeed() override; std::string title_raw() const { - return title_; + return title_.to_utf8(); } std::string title() const; void set_title(const std::string& t) { - title_ = t; - utils::trim(title_); + std::string tmp(t); + utils::trim(tmp); + title_ = Utf8String::from_utf8(tmp); } std::string description() const { - return description_; + return description_.to_utf8(); } void set_description(const std::string& d) { - description_ = d; + description_ = Utf8String::from_utf8(d); } const std::string& link() const { - return link_; + return link_.to_utf8(); } void set_link(const std::string& l) { - link_ = l; + link_ = Utf8String::from_utf8(l); } std::string pubDate() const @@ -68,13 +70,13 @@ public: void add_item(std::shared_ptr<RssItem> item) { items_.push_back(item); - items_guid_map[item->guid()] = item; + items_guid_map[Utf8String::from_utf8(item->guid())] = item; } void add_items(const std::vector<std::shared_ptr<RssItem>>& items) { for (const auto& item : items) { items_.push_back(item); - items_guid_map[item->guid()] = item; + items_guid_map[Utf8String::from_utf8(item->guid())] = item; } } void set_items(std::vector<std::shared_ptr<RssItem>>& items) @@ -87,13 +89,13 @@ public: std::vector<std::shared_ptr<RssItem>>::iterator end) { for (auto it = begin; it != end; ++it) { - items_guid_map.erase((*it)->guid()); + items_guid_map.erase(Utf8String::from_utf8((*it)->guid())); } items_.erase(begin, end); } void erase_item(std::vector<std::shared_ptr<RssItem>>::iterator pos) { - items_guid_map.erase((*pos)->guid()); + items_guid_map.erase(Utf8String::from_utf8((*pos)->guid())); items_.erase(pos); } @@ -104,7 +106,7 @@ public: /// \brief User-specified feed URL. const std::string& rssurl() const { - return rssurl_; + return rssurl_.to_utf8(); } unsigned int unread_item_count() const; @@ -194,16 +196,15 @@ public: mutable std::mutex item_mutex; private: - std::string title_; - std::string description_; - std::string link_; + Utf8String title_; + Utf8String description_; + Utf8String link_; time_t pubDate_; - const std::string rssurl_; + const Utf8String rssurl_; std::vector<std::shared_ptr<RssItem>> items_; - std::unordered_map<std::string, std::shared_ptr<RssItem>> - items_guid_map; - std::vector<std::string> tags_; - std::string query; + std::unordered_map<Utf8String, std::shared_ptr<RssItem>> items_guid_map; + std::vector<Utf8String> tags_; + Utf8String query; Cache* ch; diff --git a/src/rssfeed.cpp b/src/rssfeed.cpp index 1b5c6628..0451ccf4 100644 --- a/src/rssfeed.cpp +++ b/src/rssfeed.cpp @@ -29,7 +29,7 @@ namespace newsboat { RssFeed::RssFeed(Cache* c, const std::string& rssurl) : pubDate_(0) - , rssurl_(rssurl) + , rssurl_(Utf8String::from_utf8(rssurl)) , ch(c) , search_feed(false) , is_rtl_(false) @@ -37,13 +37,13 @@ RssFeed::RssFeed(Cache* c, const std::string& rssurl) , order(0) , status_(DlStatus::SUCCESS) { - if (utils::is_query_url(rssurl_)) { + if (utils::is_query_url(rssurl_.to_utf8())) { /* Query string looks like this: * * query:Title:unread = "yes" and age between 0:7 * * So we split by colons to get title and the query itself. */ - const auto tokens = utils::tokenize(rssurl_, ":"); + const auto tokens = utils::tokenize(rssurl_.to_utf8(), ":"); if (tokens.size() < 3) { throw _s("too few arguments"); @@ -71,7 +71,7 @@ RssFeed::RssFeed(Cache* c, const std::string& rssurl) query); set_title(tokens[1]); - this->query = query; + this->query = Utf8String::from_utf8(query); } } @@ -92,8 +92,8 @@ unsigned int RssFeed::unread_item_count() const bool RssFeed::matches_tag(const std::string& tag) { return std::find_if( - tags_.begin(), tags_.end(), [&](const std::string& t) { - return tag == t; + tags_.begin(), tags_.end(), [&](const Utf8String& t) { + return tag == t.to_utf8(); }) != tags_.end(); } @@ -101,7 +101,7 @@ std::string RssFeed::get_firsttag() { for (const auto& t : tags_) { if (t.substr(0, 1) != "~") { - return t; + return t.to_utf8(); } } return ""; @@ -112,7 +112,7 @@ std::string RssFeed::get_tags() const std::string tags; for (const auto& t : tags_) { if (t.substr(0, 1) != "~" && t.substr(0, 1) != "!") { - tags.append(t); + tags.append(t.to_utf8()); tags.append(" "); } } @@ -121,7 +121,10 @@ std::string RssFeed::get_tags() const void RssFeed::set_tags(const std::vector<std::string>& tags) { - tags_ = tags; + tags_.clear(); + for (const auto& tag : tags) { + tags_.push_back(Utf8String::from_utf8(tag)); + } } std::string RssFeed::title() const @@ -131,20 +134,18 @@ std::string RssFeed::title() const for (const auto& tag : tags_) { if (tag.substr(0, 1) == "~") { found_title = true; - alt_title = tag.substr(1, tag.length() - 1); + alt_title = tag.substr(1, tag.length() - 1).to_utf8(); break; } } - return found_title - ? alt_title - : utils::utf8_to_locale(title_); + return found_title ? alt_title : title_.to_utf8(); } bool RssFeed::hidden() const { return std::any_of(tags_.begin(), tags_.end(), - [](const std::string& tag) { + [](const Utf8String& tag) { return tag.substr(0, 1) == "!"; }); } @@ -158,7 +159,7 @@ std::shared_ptr<RssItem> RssFeed::get_item_by_guid(const std::string& guid) std::shared_ptr<RssItem> RssFeed::get_item_by_guid_unlocked( const std::string& guid) { - auto it = items_guid_map.find(guid); + auto it = items_guid_map.find(Utf8String::from_utf8(guid)); if (it != items_guid_map.end()) { return it->second; } @@ -208,7 +209,7 @@ void RssFeed::update_items(std::vector<std::shared_ptr<RssFeed>> feeds) ScopeMeasure sm("RssFeed::update_items"); - Matcher m(query); + Matcher m(query.to_utf8()); items_.clear(); items_guid_map.clear(); @@ -223,7 +224,7 @@ void RssFeed::update_items(std::vector<std::shared_ptr<RssFeed>> feeds) LOG(Level::DEBUG, "RssFeed::update_items: Matcher matches!"); item->set_feedptr(feed); items_.push_back(item); - items_guid_map[item->guid()] = item; + items_guid_map[Utf8String::from_utf8(item->guid())] = item; } } } @@ -334,7 +335,7 @@ void RssFeed::purge_deleted_items() std::lock_guard<std::mutex> lock2(items_guid_map_mutex); for (const auto& item : items_) { if (item->deleted()) { - items_guid_map.erase(item->guid()); + items_guid_map.erase(Utf8String::from_utf8(item->guid())); } } } diff --git a/test/rssfeed.cpp b/test/rssfeed.cpp index d9d29bb4..7334f511 100644 --- a/test/rssfeed.cpp +++ b/test/rssfeed.cpp @@ -416,20 +416,6 @@ TEST_CASE("RssFeed contains a number of matchable attributes", "[RssFeed]") const auto attr = "feedtitle"; REQUIRE(f.attribute_value(attr) == title); - - SECTION("it is encoded to the locale's charset") { - // Due to differences in how platforms handle //TRANSLIT in iconv, - // we can't compare results to a known-good value. Instead, we - // merely check that the result is *not* UTF-8. - - TestHelpers::LcCtypeEnvVar lc_ctype; - lc_ctype.set("C"); // This means ASCII - - const auto title = "こんにちは";// "good afternoon" in Japanese - f.set_title(title); - - REQUIRE_FALSE(f.attribute_value(attr) == title); - } } SECTION("description") { |