From 42ea24f2f41ce04588aa929e5ffdaf1dbeb1a700 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Wed, 9 Dec 2020 22:13:09 +0100 Subject: Remove LastModified-based cursor when updating feeds We remove the call to readSince() as some feeds push new articles with pubDate prior to the lastModified time stored for these feeds (e.g. lemonde.fr). As we go through all items of a feed again and again, we prevent the constant update of an item's lastModified timestamp by keeping the previous one if its fingerprint does not change. Fixes #921 Signed-off-by: Kevin Decherf --- CHANGELOG.md | 1 + lib/Db/NewsMapperV2.php | 4 +++- lib/Fetcher/FeedFetcher.php | 16 +------------ lib/Service/ItemServiceV2.php | 7 ++++++ tests/Unit/Fetcher/FeedFetcherTest.php | 43 ++++++---------------------------- 5 files changed, 19 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ac0fb893..244e7aae9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. ## Changed - Remove outdated folder DB code - Fix #963 +- Fix #921 ## 15.1.0 diff --git a/lib/Db/NewsMapperV2.php b/lib/Db/NewsMapperV2.php index 708ef4f73..93500c7c9 100644 --- a/lib/Db/NewsMapperV2.php +++ b/lib/Db/NewsMapperV2.php @@ -58,7 +58,9 @@ abstract class NewsMapperV2 extends QBMapper public function update(Entity $entity): Entity { - $entity->setLastModified($this->time->getMicroTime()); + if ([] !== $entity->getUpdatedFields()) { + $entity->setLastModified($this->time->getMicroTime()); + } return parent::update($entity); } diff --git a/lib/Fetcher/FeedFetcher.php b/lib/Fetcher/FeedFetcher.php index fa6d9a346..b9526165d 100755 --- a/lib/Fetcher/FeedFetcher.php +++ b/lib/Fetcher/FeedFetcher.php @@ -111,21 +111,7 @@ class FeedFetcher implements IFeedFetcher } $url = $url2->getNormalizedURL(); $this->reader->resetFilters(); - if (empty($lastModified) || !is_string($lastModified)) { - $resource = $this->reader->read($url); - } else { - $resource = $this->reader->readSince($url, new DateTime($lastModified)); - } - - $response = $resource->getResponse(); - if (!$response->isModified()) { - $this->logger->debug('Feed {url} was not modified since last fetch. old: {old}, new: {new}', [ - 'url' => $url, - 'old' => print_r($lastModified, true), - 'new' => print_r($response->getLastModified(), true), - ]); - return [null, []]; - } + $resource = $this->reader->read($url); $location = $resource->getUrl(); $parsedFeed = $resource->getFeed(); diff --git a/lib/Service/ItemServiceV2.php b/lib/Service/ItemServiceV2.php index d0093a07e..0a04fd8ad 100644 --- a/lib/Service/ItemServiceV2.php +++ b/lib/Service/ItemServiceV2.php @@ -87,6 +87,13 @@ class ItemServiceV2 extends Service ->setStarred($db_item->isStarred()) ->setId($db_item->getId()); + $item->generateSearchIndex(); + // We don't want to update the database record if there is no + // change in the fetched item + if ($db_item->getFingerprint() === $item->getFingerprint()) { + $item->resetUpdatedFields(); + } + $this->mapper->update($item); } catch (DoesNotExistException $exception) { $this->mapper->insert($item); diff --git a/tests/Unit/Fetcher/FeedFetcherTest.php b/tests/Unit/Fetcher/FeedFetcherTest.php index 58504f3f1..6a1153b83 100644 --- a/tests/Unit/Fetcher/FeedFetcherTest.php +++ b/tests/Unit/Fetcher/FeedFetcherTest.php @@ -222,28 +222,12 @@ class FeedFetcherTest extends TestCase $this->assertTrue($this->fetcher->canHandle($url)); } - /** - * Test if empty is logged when the feed remain the same. - */ - public function testNoFetchIfNotModified() - { - $this->setUpReader($this->url, '@0', false); - $this->logger->expects($this->once()) - ->method('debug') - ->with( - 'Feed {url} was not modified since last fetch. old: {old}, new: {new}' - ); - $result = $this->fetcher->fetch($this->url, false, '@0', false, null, null); - - $this->assertSame([null, []], $result); - } - /** * Test if empty is logged when the feed remain the same. */ public function testFetchIfNoModifiedExists() { - $this->setUpReader($this->url, null, true); + $this->setUpReader($this->url, true); $item = $this->createItem(); $feed = $this->createFeed(); $this->mockIterator($this->feed_mock, [$this->item_mock]); @@ -294,7 +278,7 @@ class FeedFetcherTest extends TestCase $this->body = $body; $this->parsed_body = $parsed_body; - $this->setUpReader($this->url, null, true); + $this->setUpReader($this->url, true); $item = $this->createItem(); $feed = $this->createFeed(); $this->mockIterator($this->feed_mock, [$this->item_mock]); @@ -552,26 +536,13 @@ class FeedFetcherTest extends TestCase * @param string|null $modifiedDate Date of last fetch * @param bool $modified If the feed will be modified */ - private function setUpReader(string $url = '', ?string $modifiedDate = '@1553118393', bool $modified = true) + private function setUpReader(string $url = '', bool $modified = true) { - if (is_null($modifiedDate)) { - $this->reader->expects($this->once()) - ->method('read') - ->with($url) - ->will($this->returnValue($this->result)); - } else { - $this->reader->expects($this->once()) - ->method('readSince') - ->with($url, new DateTime($modifiedDate)) - ->will($this->returnValue($this->result)); - } + $this->reader->expects($this->once()) + ->method('read') + ->with($url) + ->will($this->returnValue($this->result)); - $this->result->expects($this->once()) - ->method('getResponse') - ->will($this->returnValue($this->response)); - $this->response->expects($this->once()) - ->method('isModified') - ->will($this->returnValue($modified !== false)); $this->location = $url; if (!$modified) { -- cgit v1.2.3