From 5eb91bcf01d31322df8639231f11692c8d424e36 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Thu, 6 May 2021 15:07:39 -0400 Subject: [PATCH] Cache histograms. This caches them in memory in the core library and to attached files in Girder. --- CHANGELOG.md | 5 + .../girder_large_image/models/image_item.py | 117 +++++++++++------- .../rest/large_image_resource.py | 34 ++++- girder/test_girder/test_large_image.py | 27 ++++ girder/test_girder/test_tiles_rest.py | 21 ++++ large_image/tilesource/base.py | 1 + tox.ini | 6 + 7 files changed, 164 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7a10e018..c031e64a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## Unreleased + +### Improvements +- Cache histogram requests + ## Version 1.5.0 ### Features diff --git a/girder/girder_large_image/models/image_item.py b/girder/girder_large_image/models/image_item.py index b9fa77069..1ace124e2 100644 --- a/girder/girder_large_image/models/image_item.py +++ b/girder/girder_large_image/models/image_item.py @@ -16,6 +16,7 @@ import io import json +import pickle import pymongo from girder import logger @@ -43,11 +44,18 @@ class ImageItem(Item): def initialize(self): super().initialize() self.ensureIndices(['largeImage.fileId']) - File().ensureIndices([([ - ('isLargeImageThumbnail', pymongo.ASCENDING), - ('attachedToType', pymongo.ASCENDING), - ('attachedToId', pymongo.ASCENDING), - ], {})]) + File().ensureIndices([ + ([ + ('isLargeImageThumbnail', pymongo.ASCENDING), + ('attachedToType', pymongo.ASCENDING), + ('attachedToId', pymongo.ASCENDING), + ], {}), + ([ + ('isLargeImageData', pymongo.ASCENDING), + ('attachedToType', pymongo.ASCENDING), + ('attachedToId', pymongo.ASCENDING), + ], {}), + ]) def createImageItem(self, item, fileObj, user=None, token=None, createJob=True, notify=False, **kwargs): @@ -302,10 +310,11 @@ def getThumbnail(self, item, checkAndCreate=False, width=None, height=None, **kw """ # check if a thumbnail file exists with a particular key keydict = dict(kwargs, width=width, height=height) - return self._getAndCacheImage( + return self._getAndCacheImageOrData( item, 'getThumbnail', checkAndCreate, keydict, width=width, height=height, **kwargs) - def _getAndCacheImage(self, item, imageFunc, checkAndCreate, keydict, **kwargs): + def _getAndCacheImageOrData( + self, item, imageFunc, checkAndCreate, keydict, pickleCache=False, **kwargs): if 'fill' in keydict and (keydict['fill']).lower() == 'none': del keydict['fill'] keydict = {k: v for k, v in keydict.items() if v is not None} @@ -313,7 +322,7 @@ def _getAndCacheImage(self, item, imageFunc, checkAndCreate, keydict, **kwargs): existing = File().findOne({ 'attachedToType': 'item', 'attachedToId': item['_id'], - 'isLargeImageThumbnail': True, + 'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True, 'thumbnailKey': key }) if existing: @@ -323,38 +332,45 @@ def _getAndCacheImage(self, item, imageFunc, checkAndCreate, keydict, **kwargs): contentDisposition = 'inline' else: contentDisposition = kwargs['contentDisposition'] + if pickleCache: + data = File().open(existing).read() + return pickle.loads(data), 'application/octet-stream' return File().download(existing, contentDisposition=contentDisposition) tileSource = self._loadTileSource(item, **kwargs) result = getattr(tileSource, imageFunc)(**kwargs) if result is None: - thumbData, thumbMime = b'', 'application/octet-stream' + imageData, imageMime = b'', 'application/octet-stream' + elif pickleCache: + imageData, imageMime = result, 'application/octet-stream' else: - thumbData, thumbMime = result - # The logic on which files to save could be more sophisticated. - maxThumbnailFiles = int(Setting().get( - constants.PluginSettings.LARGE_IMAGE_MAX_THUMBNAIL_FILES)) - saveFile = maxThumbnailFiles > 0 - if saveFile: + imageData, imageMime = result + saveFile = True + if not pickleCache: + # The logic on which files to save could be more sophisticated. + maxThumbnailFiles = int(Setting().get( + constants.PluginSettings.LARGE_IMAGE_MAX_THUMBNAIL_FILES)) + saveFile = maxThumbnailFiles > 0 # Make sure we don't exceed the desired number of thumbnails self.removeThumbnailFiles(item, maxThumbnailFiles - 1) - # Save the thumbnail as a file - thumbfile = Upload().uploadFromFile( - io.BytesIO(thumbData), size=len(thumbData), + if saveFile: + dataStored = imageData if not pickleCache else pickle.dumps(imageData, protocol=4) + # Save the data as a file + datafile = Upload().uploadFromFile( + io.BytesIO(dataStored), size=len(dataStored), name='_largeImageThumbnail', parentType='item', parent=item, - user=None, mimeType=thumbMime, attachParent=True) - if not len(thumbData) and 'received' in thumbfile: - thumbfile = Upload().finalizeUpload( - thumbfile, Assetstore().load(thumbfile['assetstoreId'])) - thumbfile.update({ - 'isLargeImageThumbnail': True, + user=None, mimeType=imageMime, attachParent=True) + if not len(dataStored) and 'received' in datafile: + datafile = Upload().finalizeUpload( + datafile, Assetstore().load(datafile['assetstoreId'])) + datafile.update({ + 'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True, 'thumbnailKey': key, }) # Ideally, we would check that the file is still wanted before we # save it. This is probably impossible without true transactions in # Mongo. - File().save(thumbfile) - # Return the data - return thumbData, thumbMime + File().save(datafile) + return imageData, imageMime def removeThumbnailFiles(self, item, keep=0, sort=None, **kwargs): """ @@ -369,23 +385,27 @@ def removeThumbnailFiles(self, item, keep=0, sort=None, **kwargs): :returns: a tuple of (the number of files before removal, the number of files removed). """ + keys = ['isLargeImageThumbnail'] + if not keep: + keys.append('isLargeImageData') if not sort: sort = [('_id', SortDir.DESCENDING)] - query = { - 'attachedToType': 'item', - 'attachedToId': item['_id'], - 'isLargeImageThumbnail': True, - } - query.update(kwargs) - present = 0 - removed = 0 - for file in File().find(query, sort=sort): - present += 1 - if keep > 0: - keep -= 1 - continue - File().remove(file) - removed += 1 + for key in keys: + query = { + 'attachedToType': 'item', + 'attachedToId': item['_id'], + key: True, + } + query.update(kwargs) + present = 0 + removed = 0 + for file in File().find(query, sort=sort): + present += 1 + if keep > 0: + keep -= 1 + continue + File().remove(file) + removed += 1 return (present, removed) def getRegion(self, item, **kwargs): @@ -425,8 +445,15 @@ def histogram(self, item, **kwargs): method. :returns: histogram object. """ - tileSource = self._loadTileSource(item, **kwargs) - return tileSource.histogram(**kwargs) + if kwargs.get('range') is not None: + tileSource = self._loadTileSource(item, **kwargs) + result = tileSource.histogram(**kwargs) + else: + imageKey = 'histogram' + result = self._getAndCacheImageOrData( + item, 'histogram', False, dict(kwargs, imageKey=imageKey), + imageKey=imageKey, pickleCache=True, **kwargs)[0] + return result def tileSource(self, item, **kwargs): """ @@ -459,5 +486,5 @@ def getAssociatedImage(self, item, imageKey, checkAndCreate=False, *args, **kwar None if the associated image doesn't exist. """ keydict = dict(kwargs, imageKey=imageKey) - return self._getAndCacheImage( + return self._getAndCacheImageOrData( item, 'getAssociatedImage', checkAndCreate, keydict, imageKey=imageKey, **kwargs) diff --git a/girder/girder_large_image/rest/large_image_resource.py b/girder/girder_large_image/rest/large_image_resource.py index c49a80643..21c014f3f 100644 --- a/girder/girder_large_image/rest/large_image_resource.py +++ b/girder/girder_large_image/rest/large_image_resource.py @@ -226,8 +226,9 @@ def __init__(self): self.route('DELETE', ('thumbnails',), self.deleteThumbnails) self.route('GET', ('associated_images',), self.countAssociatedImages) self.route('DELETE', ('associated_images',), self.deleteAssociatedImages) - self.route('DELETE', ('tiles', 'incomplete'), - self.deleteIncompleteTiles) + self.route('GET', ('histograms',), self.countHistograms) + self.route('DELETE', ('histograms',), self.deleteHistograms) + self.route('DELETE', ('tiles', 'incomplete'), self.deleteIncompleteTiles) @describeRoute( Description('Clear tile source caches to release resources and file handles.') @@ -438,3 +439,32 @@ def listSources(self, params): except Exception: pass return results + + @describeRoute( + Description('Count the number of cached histograms for large_image items.') + ) + @access.admin + def countHistograms(self, params): + query = { + 'isLargeImageData': True, + 'attachedToType': 'item', + 'thumbnailKey': {'$regex': '"imageKey":"histogram"'}, + } + count = File().find(query).count() + return count + + @describeRoute( + Description('Delete cached histograms from large_image items.') + ) + @access.admin + def deleteHistograms(self, params): + query = { + 'isLargeImageData': True, + 'attachedToType': 'item', + 'thumbnailKey': {'$regex': '"imageKey":"histogram"'}, + } + removed = 0 + for file in File().find(query): + File().remove(file) + removed += 1 + return removed diff --git a/girder/test_girder/test_large_image.py b/girder/test_girder/test_large_image.py index 23a230325..6495b6151 100644 --- a/girder/test_girder/test_large_image.py +++ b/girder/test_girder/test_large_image.py @@ -354,3 +354,30 @@ def testGetLargeImagePath(server, admin, fsAssetstore): path = ts._getLargeImagePath() assert path == abspath file = File().save(origFile) + + +@pytest.mark.usefixtures('unbindLargeImage') +@pytest.mark.plugin('large_image') +def testHistogramCaching(server, admin, user, fsAssetstore): + file = utilities.uploadExternalFile('sample_image.ptif', admin, fsAssetstore) + itemId = str(file['itemId']) + resp = server.request(path='/item/%s/tiles/histogram' % itemId, + user=admin, isJson=False) + assert utilities.respStatus(resp) == 200 + # Test GET histograms + resp = server.request(path='/large_image/histograms', user=user) + assert utilities.respStatus(resp) == 403 + resp = server.request(path='/large_image/histograms', user=admin) + assert utilities.respStatus(resp) == 200 + assert resp.json == 1 + # Test DELETE histograms + resp = server.request( + method='DELETE', path='/large_image/histograms', user=user) + assert utilities.respStatus(resp) == 403 + resp = server.request( + method='DELETE', path='/large_image/histograms', user=admin) + assert utilities.respStatus(resp) == 200 + assert resp.json == 1 + resp = server.request(path='/large_image/histograms', user=admin) + assert utilities.respStatus(resp) == 200 + assert resp.json == 0 diff --git a/girder/test_girder/test_tiles_rest.py b/girder/test_girder/test_tiles_rest.py index 1f8dcdd9f..45b737637 100644 --- a/girder/test_girder/test_tiles_rest.py +++ b/girder/test_girder/test_tiles_rest.py @@ -1130,6 +1130,27 @@ def testTilesHistogram(server, admin, fsAssetstore): assert len(resp.json[0]['hist']) == 256 assert resp.json[1]['samples'] == 2801664 assert resp.json[1]['hist'][128] == 176 + # A second query will fetch it from cache + resp = server.request( + path='/item/%s/tiles/histogram' % itemId, + params={'width': 2048, 'height': 2048, 'resample': False}) + assert len(resp.json) == 3 + assert len(resp.json[0]['hist']) == 256 + + +@pytest.mark.usefixtures('unbindLargeImage') +@pytest.mark.plugin('large_image') +def testTilesHistogramWithRange(server, admin, fsAssetstore): + file = utilities.uploadExternalFile( + 'sample_image.ptif', admin, fsAssetstore) + itemId = str(file['itemId']) + resp = server.request( + path='/item/%s/tiles/histogram' % itemId, + params={'width': 2048, 'height': 2048, 'resample': False, 'rangeMin': 10, 'rangeMax': 240}) + assert len(resp.json) == 3 + assert len(resp.json[0]['hist']) == 256 + assert resp.json[1]['samples'] == 685979 + assert resp.json[1]['hist'][128] == 186 @pytest.mark.usefixtures('unbindLargeImage') diff --git a/large_image/tilesource/base.py b/large_image/tilesource/base.py index 678f4b963..7e0eb14a1 100644 --- a/large_image/tilesource/base.py +++ b/large_image/tilesource/base.py @@ -1219,6 +1219,7 @@ def _pilFormatMatches(self, image, match=True, **kwargs): # compatibility could be an issue. return False + @methodcache() def histogram(self, dtype=None, onlyMinMax=False, bins=256, density=False, format=None, *args, **kwargs): """ diff --git a/tox.ini b/tox.ini index 1599333be..8b35283e8 100644 --- a/tox.ini +++ b/tox.ini @@ -43,6 +43,12 @@ setenv = GDAL_PAM_ENABLED=no PIP_FIND_LINKS=https://girder.github.io/large_image_wheels +# To run just some non-web-client tests, do tox -e server -- -k +# Don't use this for CI or full tests. +[testenv:server] +commands = + pytest {posargs} + [testenv:flake8] skipsdist = true skip_install = true