diff options
author | Kevin Decherf <kevin@kdecherf.com> | 2020-12-09 22:13:09 +0100 |
---|---|---|
committer | Benjamin Brahmer <info@b-brahmer.de> | 2020-12-15 20:02:21 +0100 |
commit | 42ea24f2f41ce04588aa929e5ffdaf1dbeb1a700 (patch) | |
tree | 657089d619d1ee76d8f86dbf8553f9ce86c6867d | |
parent | 1345cedd7e226c23a9fff6275a0fb71e444aa1b3 (diff) |
Remove LastModified-based cursor when updating feeds
We remove the call to readSince() as some feeds push new articles with
pubDate prior to the lastModified time stored for these feeds (e.g.
lemonde.fr). As we go through all items of a feed again and again, we
prevent the constant update of an item's lastModified timestamp by
keeping the previous one if its fingerprint does not change.
Fixes #921
Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | lib/Db/NewsMapperV2.php | 4 | ||||
-rwxr-xr-x | lib/Fetcher/FeedFetcher.php | 16 | ||||
-rw-r--r-- | lib/Service/ItemServiceV2.php | 7 | ||||
-rw-r--r-- | tests/Unit/Fetcher/FeedFetcherTest.php | 43 |
5 files changed, 19 insertions, 52 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ac0fb893..244e7aae9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. ## Changed - Remove outdated folder DB code - Fix #963 +- Fix #921 ## 15.1.0 diff --git a/lib/Db/NewsMapperV2.php b/lib/Db/NewsMapperV2.php index 708ef4f73..93500c7c9 100644 --- a/lib/Db/NewsMapperV2.php +++ b/lib/Db/NewsMapperV2.php @@ -58,7 +58,9 @@ abstract class NewsMapperV2 extends QBMapper public function update(Entity $entity): Entity { - $entity->setLastModified($this->time->getMicroTime()); + if ([] !== $entity->getUpdatedFields()) { + $entity->setLastModified($this->time->getMicroTime()); + } return parent::update($entity); } diff --git a/lib/Fetcher/FeedFetcher.php b/lib/Fetcher/FeedFetcher.php index fa6d9a346..b9526165d 100755 --- a/lib/Fetcher/FeedFetcher.php +++ b/lib/Fetcher/FeedFetcher.php @@ -111,21 +111,7 @@ class FeedFetcher implements IFeedFetcher } $url = $url2->getNormalizedURL(); $this->reader->resetFilters(); - if (empty($lastModified) || !is_string($lastModified)) { - $resource = $this->reader->read($url); - } else { - $resource = $this->reader->readSince($url, new DateTime($lastModified)); - } - - $response = $resource->getResponse(); - if (!$response->isModified()) { - $this->logger->debug('Feed {url} was not modified since last fetch. old: {old}, new: {new}', [ - 'url' => $url, - 'old' => print_r($lastModified, true), - 'new' => print_r($response->getLastModified(), true), - ]); - return [null, []]; - } + $resource = $this->reader->read($url); $location = $resource->getUrl(); $parsedFeed = $resource->getFeed(); diff --git a/lib/Service/ItemServiceV2.php b/lib/Service/ItemServiceV2.php index d0093a07e..0a04fd8ad 100644 --- a/lib/Service/ItemServiceV2.php +++ b/lib/Service/ItemServiceV2.php @@ -87,6 +87,13 @@ class ItemServiceV2 extends Service ->setStarred($db_item->isStarred()) ->setId($db_item->getId()); + $item->generateSearchIndex(); + // We don't want to update the database record if there is no + // change in the fetched item + if ($db_item->getFingerprint() === $item->getFingerprint()) { + $item->resetUpdatedFields(); + } + $this->mapper->update($item); } catch (DoesNotExistException $exception) { $this->mapper->insert($item); diff --git a/tests/Unit/Fetcher/FeedFetcherTest.php b/tests/Unit/Fetcher/FeedFetcherTest.php index 58504f3f1..6a1153b83 100644 --- a/tests/Unit/Fetcher/FeedFetcherTest.php +++ b/tests/Unit/Fetcher/FeedFetcherTest.php @@ -225,25 +225,9 @@ class FeedFetcherTest extends TestCase /** * Test if empty is logged when the feed remain the same. */ - public function testNoFetchIfNotModified() - { - $this->setUpReader($this->url, '@0', false); - $this->logger->expects($this->once()) - ->method('debug') - ->with( - 'Feed {url} was not modified since last fetch. old: {old}, new: {new}' - ); - $result = $this->fetcher->fetch($this->url, false, '@0', false, null, null); - - $this->assertSame([null, []], $result); - } - - /** - * Test if empty is logged when the feed remain the same. - */ public function testFetchIfNoModifiedExists() { - $this->setUpReader($this->url, null, true); + $this->setUpReader($this->url, true); $item = $this->createItem(); $feed = $this->createFeed(); $this->mockIterator($this->feed_mock, [$this->item_mock]); @@ -294,7 +278,7 @@ class FeedFetcherTest extends TestCase $this->body = $body; $this->parsed_body = $parsed_body; - $this->setUpReader($this->url, null, true); + $this->setUpReader($this->url, true); $item = $this->createItem(); $feed = $this->createFeed(); $this->mockIterator($this->feed_mock, [$this->item_mock]); @@ -552,26 +536,13 @@ class FeedFetcherTest extends TestCase * @param string|null $modifiedDate Date of last fetch * @param bool $modified If the feed will be modified */ - private function setUpReader(string $url = '', ?string $modifiedDate = '@1553118393', bool $modified = true) + private function setUpReader(string $url = '', bool $modified = true) { - if (is_null($modifiedDate)) { - $this->reader->expects($this->once()) - ->method('read') - ->with($url) - ->will($this->returnValue($this->result)); - } else { - $this->reader->expects($this->once()) - ->method('readSince') - ->with($url, new DateTime($modifiedDate)) - ->will($this->returnValue($this->result)); - } + $this->reader->expects($this->once()) + ->method('read') + ->with($url) + ->will($this->returnValue($this->result)); - $this->result->expects($this->once()) - ->method('getResponse') - ->will($this->returnValue($this->response)); - $this->response->expects($this->once()) - ->method('isModified') - ->will($this->returnValue($modified !== false)); $this->location = $url; if (!$modified) { |