diff --git a/CHANGELOG.md b/CHANGELOG.md index b5f6254b2..4c252ab02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org). -## [Unreleased] +## 1.16.0 - 2019-02-23 #### Added @@ -61,6 +61,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this #### Fixed * [#317](https://github.com/shlinkio/shlink/issues/317) Fixed error while trying to generate previews because of global config file being deleted by mistake by build script. +* [#307](https://github.com/shlinkio/shlink/issues/307) Fixed memory leak while trying to process huge amounts of visits due to the query not being properly paginated. ## 1.15.1 - 2018-12-16 diff --git a/module/Common/test/Paginator/PaginableRepositoryAdapterTest.php b/module/Common/test/Paginator/Adapter/PaginableRepositoryAdapterTest.php similarity index 95% rename from module/Common/test/Paginator/PaginableRepositoryAdapterTest.php rename to module/Common/test/Paginator/Adapter/PaginableRepositoryAdapterTest.php index 4e63ba784..4f5afb23e 100644 --- a/module/Common/test/Paginator/PaginableRepositoryAdapterTest.php +++ b/module/Common/test/Paginator/Adapter/PaginableRepositoryAdapterTest.php @@ -1,7 +1,7 @@ getEntityManager()->createQuery($dql); + $dql = <<getEntityManager()->createQuery($dql) + ->setMaxResults($blockSize); + $remainingVisitsToProcess = $this->count(['visitLocation' => null]); + $offset = 0; - return $query->iterate(); + while ($remainingVisitsToProcess > 0) { + $iterator = $query->setFirstResult($applyOffset ? $offset : null)->iterate(); + foreach ($iterator as $key => [$value]) { + yield $key => $value; + } + + $remainingVisitsToProcess -= $blockSize; + $offset += $blockSize; + } } /** diff --git a/module/Core/src/Repository/VisitRepositoryInterface.php b/module/Core/src/Repository/VisitRepositoryInterface.php index 4de9024e3..585bb3ef5 100644 --- a/module/Core/src/Repository/VisitRepositoryInterface.php +++ b/module/Core/src/Repository/VisitRepositoryInterface.php @@ -9,7 +9,18 @@ interface VisitRepositoryInterface extends ObjectRepository { - public function findUnlocatedVisits(): iterable; + public const DEFAULT_BLOCK_SIZE = 10000; + + /** + * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in + * smaller blocks of a specific size. + * This will have side effects if you update those rows while you iterate them. + * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the + * dataset + * + * @return iterable|Visit[] + */ + public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable; /** * @return Visit[] diff --git a/module/Core/src/Service/VisitService.php b/module/Core/src/Service/VisitService.php index 705dab59c..4abbf4a2d 100644 --- a/module/Core/src/Service/VisitService.php +++ b/module/Core/src/Service/VisitService.php @@ -24,9 +24,12 @@ public function locateUnlocatedVisits(callable $geolocateVisit, ?callable $notif { /** @var VisitRepository $repo */ $repo = $this->em->getRepository(Visit::class); - $results = $repo->findUnlocatedVisits(); + $results = $repo->findUnlocatedVisits(false); + $count = 0; + $persistBlock = 200; - foreach ($results as [$visit]) { + foreach ($results as $visit) { + $count++; try { /** @var Location $location */ $location = $geolocateVisit($visit); @@ -37,20 +40,25 @@ public function locateUnlocatedVisits(callable $geolocateVisit, ?callable $notif $location = new VisitLocation($location); $this->locateVisit($visit, $location, $notifyVisitWithLocation); + + // Flush and clear after X iterations + if ($count % $persistBlock === 0) { + $this->em->flush(); + $this->em->clear(); + } } + + $this->em->flush(); + $this->em->clear(); } private function locateVisit(Visit $visit, VisitLocation $location, ?callable $notifyVisitWithLocation): void { $visit->locate($location); - $this->em->persist($visit); - $this->em->flush(); if ($notifyVisitWithLocation !== null) { $notifyVisitWithLocation($location, $visit); } - - $this->em->clear(); } } diff --git a/module/Core/test-db/Repository/VisitRepositoryTest.php b/module/Core/test-db/Repository/VisitRepositoryTest.php index db7979209..46bda8172 100644 --- a/module/Core/test-db/Repository/VisitRepositoryTest.php +++ b/module/Core/test-db/Repository/VisitRepositoryTest.php @@ -12,6 +12,8 @@ use Shlinkio\Shlink\Core\Model\Visitor; use Shlinkio\Shlink\Core\Repository\VisitRepository; use ShlinkioTest\Shlink\Common\DbTest\DatabaseTestCase; +use function Functional\map; +use function range; use function sprintf; class VisitRepositoryTest extends DatabaseTestCase @@ -30,8 +32,11 @@ protected function setUp(): void $this->repo = $this->getEntityManager()->getRepository(Visit::class); } - /** @test */ - public function findUnlocatedVisitsReturnsProperVisits(): void + /** + * @test + * @dataProvider provideBlockSize + */ + public function findUnlocatedVisitsReturnsProperVisits(int $blockSize): void { $shortUrl = new ShortUrl(''); $this->getEntityManager()->persist($shortUrl); @@ -50,7 +55,7 @@ public function findUnlocatedVisitsReturnsProperVisits(): void $this->getEntityManager()->flush(); $resultsCount = 0; - $results = $this->repo->findUnlocatedVisits(); + $results = $this->repo->findUnlocatedVisits(true, $blockSize); foreach ($results as $value) { $resultsCount++; } @@ -58,6 +63,13 @@ public function findUnlocatedVisitsReturnsProperVisits(): void $this->assertEquals(3, $resultsCount); } + public function provideBlockSize(): iterable + { + return map(range(1, 5), function (int $value) { + return [$value]; + }); + } + /** @test */ public function findVisitsByShortCodeReturnsProperData(): void { diff --git a/module/Core/test/Service/VisitServiceTest.php b/module/Core/test/Service/VisitServiceTest.php index 618e47817..be2bf8bab 100644 --- a/module/Core/test/Service/VisitServiceTest.php +++ b/module/Core/test/Service/VisitServiceTest.php @@ -17,7 +17,11 @@ use Shlinkio\Shlink\Core\Service\VisitService; use function array_shift; use function count; +use function floor; use function func_get_args; +use function Functional\map; +use function range; +use function sprintf; class VisitServiceTest extends TestCase { @@ -35,13 +39,12 @@ public function setUp(): void /** @test */ public function locateVisitsIteratesAndLocatesUnlocatedVisits(): void { - $unlocatedVisits = [ - [new Visit(new ShortUrl('foo'), Visitor::emptyInstance())], - [new Visit(new ShortUrl('bar'), Visitor::emptyInstance())], - ]; + $unlocatedVisits = map(range(1, 200), function (int $i) { + return new Visit(new ShortUrl(sprintf('short_code_%s', $i)), Visitor::emptyInstance()); + }); $repo = $this->prophesize(VisitRepository::class); - $findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits); + $findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $persist = $this->em->persist(Argument::type(Visit::class))->will(function () { @@ -63,19 +66,19 @@ public function locateVisitsIteratesAndLocatesUnlocatedVisits(): void $findUnlocatedVisits->shouldHaveBeenCalledOnce(); $getRepo->shouldHaveBeenCalledOnce(); $persist->shouldHaveBeenCalledTimes(count($unlocatedVisits)); - $flush->shouldHaveBeenCalledTimes(count($unlocatedVisits)); - $clear->shouldHaveBeenCalledTimes(count($unlocatedVisits)); + $flush->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1); + $clear->shouldHaveBeenCalledTimes(floor(count($unlocatedVisits) / 200) + 1); } /** @test */ - public function visitsWhichCannotBeLocatedAreIgnored() + public function visitsWhichCannotBeLocatedAreIgnored(): void { $unlocatedVisits = [ - [new Visit(new ShortUrl('foo'), Visitor::emptyInstance())], + new Visit(new ShortUrl('foo'), Visitor::emptyInstance()), ]; $repo = $this->prophesize(VisitRepository::class); - $findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits); + $findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $persist = $this->em->persist(Argument::type(Visit::class))->will(function () { @@ -92,7 +95,7 @@ public function visitsWhichCannotBeLocatedAreIgnored() $findUnlocatedVisits->shouldHaveBeenCalledOnce(); $getRepo->shouldHaveBeenCalledOnce(); $persist->shouldNotHaveBeenCalled(); - $flush->shouldNotHaveBeenCalled(); - $clear->shouldNotHaveBeenCalled(); + $flush->shouldHaveBeenCalledOnce(); + $clear->shouldHaveBeenCalledOnce(); } }