summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin Decherf <kevin@kdecherf.com>2020-12-09 22:13:09 +0100
committerBenjamin Brahmer <info@b-brahmer.de>2020-12-15 20:02:21 +0100
commit42ea24f2f41ce04588aa929e5ffdaf1dbeb1a700 (patch)
tree657089d619d1ee76d8f86dbf8553f9ce86c6867d
parent1345cedd7e226c23a9fff6275a0fb71e444aa1b3 (diff)
Remove LastModified-based cursor when updating feeds
We remove the call to readSince() as some feeds push new articles with pubDate prior to the lastModified time stored for these feeds (e.g. lemonde.fr). As we go through all items of a feed again and again, we prevent the constant update of an item's lastModified timestamp by keeping the previous one if its fingerprint does not change. Fixes #921 Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
-rw-r--r--CHANGELOG.md1
-rw-r--r--lib/Db/NewsMapperV2.php4
-rwxr-xr-xlib/Fetcher/FeedFetcher.php16
-rw-r--r--lib/Service/ItemServiceV2.php7
-rw-r--r--tests/Unit/Fetcher/FeedFetcherTest.php43
5 files changed, 19 insertions, 52 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4ac0fb893..244e7aae9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
## Changed
- Remove outdated folder DB code
- Fix #963
+- Fix #921
## 15.1.0
diff --git a/lib/Db/NewsMapperV2.php b/lib/Db/NewsMapperV2.php
index 708ef4f73..93500c7c9 100644
--- a/lib/Db/NewsMapperV2.php
+++ b/lib/Db/NewsMapperV2.php
@@ -58,7 +58,9 @@ abstract class NewsMapperV2 extends QBMapper
public function update(Entity $entity): Entity
{
- $entity->setLastModified($this->time->getMicroTime());
+ if ([] !== $entity->getUpdatedFields()) {
+ $entity->setLastModified($this->time->getMicroTime());
+ }
return parent::update($entity);
}
diff --git a/lib/Fetcher/FeedFetcher.php b/lib/Fetcher/FeedFetcher.php
index fa6d9a346..b9526165d 100755
--- a/lib/Fetcher/FeedFetcher.php
+++ b/lib/Fetcher/FeedFetcher.php
@@ -111,21 +111,7 @@ class FeedFetcher implements IFeedFetcher
}
$url = $url2->getNormalizedURL();
$this->reader->resetFilters();
- if (empty($lastModified) || !is_string($lastModified)) {
- $resource = $this->reader->read($url);
- } else {
- $resource = $this->reader->readSince($url, new DateTime($lastModified));
- }
-
- $response = $resource->getResponse();
- if (!$response->isModified()) {
- $this->logger->debug('Feed {url} was not modified since last fetch. old: {old}, new: {new}', [
- 'url' => $url,
- 'old' => print_r($lastModified, true),
- 'new' => print_r($response->getLastModified(), true),
- ]);
- return [null, []];
- }
+ $resource = $this->reader->read($url);
$location = $resource->getUrl();
$parsedFeed = $resource->getFeed();
diff --git a/lib/Service/ItemServiceV2.php b/lib/Service/ItemServiceV2.php
index d0093a07e..0a04fd8ad 100644
--- a/lib/Service/ItemServiceV2.php
+++ b/lib/Service/ItemServiceV2.php
@@ -87,6 +87,13 @@ class ItemServiceV2 extends Service
->setStarred($db_item->isStarred())
->setId($db_item->getId());
+ $item->generateSearchIndex();
+ // We don't want to update the database record if there is no
+ // change in the fetched item
+ if ($db_item->getFingerprint() === $item->getFingerprint()) {
+ $item->resetUpdatedFields();
+ }
+
$this->mapper->update($item);
} catch (DoesNotExistException $exception) {
$this->mapper->insert($item);
diff --git a/tests/Unit/Fetcher/FeedFetcherTest.php b/tests/Unit/Fetcher/FeedFetcherTest.php
index 58504f3f1..6a1153b83 100644
--- a/tests/Unit/Fetcher/FeedFetcherTest.php
+++ b/tests/Unit/Fetcher/FeedFetcherTest.php
@@ -225,25 +225,9 @@ class FeedFetcherTest extends TestCase
/**
* Test if empty is logged when the feed remain the same.
*/
- public function testNoFetchIfNotModified()
- {
- $this->setUpReader($this->url, '@0', false);
- $this->logger->expects($this->once())
- ->method('debug')
- ->with(
- 'Feed {url} was not modified since last fetch. old: {old}, new: {new}'
- );
- $result = $this->fetcher->fetch($this->url, false, '@0', false, null, null);
-
- $this->assertSame([null, []], $result);
- }
-
- /**
- * Test if empty is logged when the feed remain the same.
- */
public function testFetchIfNoModifiedExists()
{
- $this->setUpReader($this->url, null, true);
+ $this->setUpReader($this->url, true);
$item = $this->createItem();
$feed = $this->createFeed();
$this->mockIterator($this->feed_mock, [$this->item_mock]);
@@ -294,7 +278,7 @@ class FeedFetcherTest extends TestCase
$this->body = $body;
$this->parsed_body = $parsed_body;
- $this->setUpReader($this->url, null, true);
+ $this->setUpReader($this->url, true);
$item = $this->createItem();
$feed = $this->createFeed();
$this->mockIterator($this->feed_mock, [$this->item_mock]);
@@ -552,26 +536,13 @@ class FeedFetcherTest extends TestCase
* @param string|null $modifiedDate Date of last fetch
* @param bool $modified If the feed will be modified
*/
- private function setUpReader(string $url = '', ?string $modifiedDate = '@1553118393', bool $modified = true)
+ private function setUpReader(string $url = '', bool $modified = true)
{
- if (is_null($modifiedDate)) {
- $this->reader->expects($this->once())
- ->method('read')
- ->with($url)
- ->will($this->returnValue($this->result));
- } else {
- $this->reader->expects($this->once())
- ->method('readSince')
- ->with($url, new DateTime($modifiedDate))
- ->will($this->returnValue($this->result));
- }
+ $this->reader->expects($this->once())
+ ->method('read')
+ ->with($url)
+ ->will($this->returnValue($this->result));
- $this->result->expects($this->once())
- ->method('getResponse')
- ->will($this->returnValue($this->response));
- $this->response->expects($this->once())
- ->method('isModified')
- ->will($this->returnValue($modified !== false));
$this->location = $url;
if (!$modified) {