diff options
author | Accalia Elementia <accalia@elementia.me> | 2022-05-24 11:07:50 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-24 17:07:50 +0200 |
commit | fdf037e7287616b5211f8f8f7c8ce85b260d781e (patch) | |
tree | 6371e0fce359c48f40450929bd0dade97cc7aa1e | |
parent | 684af84ea957ff160d49d44826525d1684b6f975 (diff) |
Use Feed Link as GUID when Feed omits Guid. (#1785)
* Use Feed Link as GUID when Feed omits Guid.
As noted in nextcloud/news#1702 some feeds omit the GUID and are
therefore not a valid RSS feed.
nextcloud/news#1738 resolved the issue to allow valid feeds to
update correctly when an invalid feed is present.
This commit allows parsing of the invalid feed as well by assuming
that the item link of the feed is unique to the feed and using
it in place of the GUID when the feed omits the GUID.
This will allow NextCloud News to accept and behave like many other
popular feed aggregators when presented with such an invalid feed.
Signed-off-by: Accalia <Accalia@Elementia.me>
* Add basic Logging when using fallback guid
Signed-off-by: Accalia <Accalia@Elementia.me>
* Add basic Logging when using fallback guid - Fix Fatfinger Typo
Signed-off-by: Accalia <Accalia@Elementia.me>
* Add basic Logging when using fallback guid - Update tests to account for additional logging
Signed-off-by: Accalia <Accalia@Elementia.me>
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rwxr-xr-x | lib/Fetcher/FeedFetcher.php | 24 | ||||
-rw-r--r-- | tests/Unit/Fetcher/FeedFetcherTest.php | 44 |
3 files changed, 61 insertions, 8 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index a48f7a531..4bf2eda5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is mostly based on [Keep a Changelog](https://keepachangelog.com/en/1 ### Changed - Add routes for starring/unstarring items by id - Improve styling of tables in articles +- Allow fetching feeds that omit guid by using link as stand-in ### Fixed - Fix updated api not returning any item after marking item as read (#1713) diff --git a/lib/Fetcher/FeedFetcher.php b/lib/Fetcher/FeedFetcher.php index a6ddbacb5..27421f525 100755 --- a/lib/Fetcher/FeedFetcher.php +++ b/lib/Fetcher/FeedFetcher.php @@ -233,11 +233,25 @@ class FeedFetcher implements IFeedFetcher { $item = new Item(); $item->setUnread(true); - $item->setUrl($parsedItem->getLink()); - if ($parsedItem->getPublicId() == null) { + $itemLink = $parsedItem->getLink(); + $itemTitle = $parsedItem->getTitle(); + $item->setUrl($itemLink); + $publicId = $parsedItem->getPublicId(); + if ($publicId == null) { + // Fallback on using the URL as the guid for the feed item if no guid provided by feed + $this->logger->debug( + "Feed item {title} with link {link} did not expose a guid, falling back to using link as guid", + [ + 'title' => $itemTitle, + 'link' => $itemLink + ] + ); + $publicId = $itemLink; + } + if ($publicId == null) { throw new ReadErrorException("Malformed feed: item has no GUID"); } - $item->setGuid($parsedItem->getPublicId()); + $item->setGuid($publicId); $item->setGuidHash(md5($item->getGuid())); $lastModified = $parsedItem->getLastModified() ?? new DateTime(); @@ -255,8 +269,8 @@ class FeedFetcher implements IFeedFetcher $item->setRtl($RTL); // unescape content because angularjs helps against XSS - if ($parsedItem->getTitle() !== null) { - $item->setTitle($this->decodeTwice($parsedItem->getTitle())); + if ($itemTitle !== null) { + $item->setTitle($this->decodeTwice($itemTitle)); } $author = $parsedItem->getAuthor(); if ($author !== null && $author->getName() !== null) { diff --git a/tests/Unit/Fetcher/FeedFetcherTest.php b/tests/Unit/Fetcher/FeedFetcherTest.php index 42f518453..af8066171 100644 --- a/tests/Unit/Fetcher/FeedFetcherTest.php +++ b/tests/Unit/Fetcher/FeedFetcherTest.php @@ -459,6 +459,40 @@ class FeedFetcherTest extends TestCase } /** + * Test if the fetch function fetches a feed that specifies a guid. + */ + public function testFetchWithGuid() + { + $this->setUpReader($this->url); + $this->createItem(); + $feed = $this->createFeed(); + $this->mockIterator($this->feed_mock, [$this->item_mock]); + $result = $this->fetcher->fetch($this->url, false, null, null); + //Explicitly assert GUID value + $this->assertEquals(2, count($result)); + $this->assertEquals(1, count($result[1])); + $resultItem = $result[1][0]; + $this->assertEquals($this->guid, $resultItem->getGuid()); + } + + /** + * Test if the fetch function fetches a feed that does not specify a guid. + */ + public function testFetchWithoutGuid() + { + $this->setUpReader($this->url); + $this->guid = null; + $this->createItem(); + $feed = $this->createFeed(); + $this->mockIterator($this->feed_mock, [$this->item_mock]); + $result = $this->fetcher->fetch($this->url, false, null, null); + //Explicitly assert GUID value + $this->assertEquals(2, count($result)); + $this->assertEquals(1, count($result[1])); + $resultItem = $result[1][0]; + $this->assertEquals($this->permalink, $resultItem->getGuid()); + } + /** * Mock an iteration option on an existing mock * * @param object $iteratorMock The mock to enhance @@ -576,10 +610,10 @@ class FeedFetcherTest extends TestCase $this->item_mock->expects($this->exactly(1)) ->method('getLink') ->will($this->returnValue($this->permalink)); - $this->item_mock->expects($this->exactly(2)) + $this->item_mock->expects($this->exactly(1)) ->method('getTitle') ->will($this->returnValue($this->title)); - $this->item_mock->expects($this->exactly(2)) + $this->item_mock->expects($this->exactly(1)) ->method('getPublicId') ->will($this->returnValue($this->guid)); $this->item_mock->expects($this->exactly(1)) @@ -600,7 +634,6 @@ class FeedFetcherTest extends TestCase $item->setUnread(true) ->setUrl($this->permalink) ->setTitle('my<\' title') - ->setGuid($this->guid) ->setGuidHash($this->guid_hash) ->setBody($this->parsed_body) ->setRtl(false) @@ -609,6 +642,11 @@ class FeedFetcherTest extends TestCase ->setAuthor(html_entity_decode($this->author->getName())) ->setCategoriesJson($this->categoriesJson); + // some tests deliberately omit this, so leave default value if the guid is to be ignored + if ($this->guid !== null) { + $item->setGuid($this->guid); + } + if ($enclosureType === 'audio/ogg' || $enclosureType === 'video/ogg') { $media = $this->getMockbuilder(MediaInterface::class)->getMock(); $media->expects($this->once()) |