Skip to content

Commit

Permalink
Merge pull request #1645 from girder/cancel-plot
Browse files Browse the repository at this point in the history
Allow canceling plottable data requests
  • Loading branch information
manthey authored Sep 20, 2024
2 parents 9e0ad45 + 284d519 commit 9b42a83
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Improvements

- Only list computable plot columns if there are other numeric columns ([#1634](../../pull/1634))
- Allow canceling plottable data requests ([#1645](../../pull/1645))
- List official yaml mime type for the multi source ([#1636](../../pull/1636))
- Speed up correlating data files with annotations ([#1642](../../pull/1642))
- Support dict with MultiFileTileSource ([#1641](../../pull/1641))
Expand Down
17 changes: 13 additions & 4 deletions girder_annotation/girder_large_image_annotation/rest/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,18 +631,22 @@ def deleteItemAnnotations(self, item):
.param('sources', 'An optional comma separated list that can contain '
'folder, item, annotation, annotationelement, datafile.',
required=False)
.param('uuid', 'An optional uuid to allow cancelling a previous '
'request. If specified and there are any outstanding requests '
'with the same uuid, they may be cancelled to save resources.',
required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied for the item.', 403),
)
@access.public(cookie=True, scope=TokenScope.DATA_READ)
def getItemPlottableElements(self, item, annotations, adjacentItems, sources=None):
def getItemPlottableElements(self, item, annotations, adjacentItems, sources=None, uuid=None):
user = self.getCurrentUser()
if adjacentItems != '__all__':
adjacentItems = str(adjacentItems).lower() == 'true'
sources = sources or None
data = utils.PlottableItemData(
user, item, annotations=annotations, adjacentItems=adjacentItems,
sources=sources)
sources=sources, uuid=uuid)
return [col for col in data.columns if col.get('count')]

@autoDescribeRoute(
Expand Down Expand Up @@ -671,19 +675,24 @@ def getItemPlottableElements(self, item, annotations, adjacentItems, sources=Non
'rows which have all selected columns present will be included in '
'the computation.',
paramType='formData', requireObject=True, required=False)
.param('uuid', 'An optional uuid to allow cancelling a previous '
'request. If specified and there are any outstanding requests '
'with the same uuid, they may be cancelled to save resources.',
required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied for the item.', 403),
)
@access.public(cookie=True, scope=TokenScope.DATA_READ)
def getItemPlottableData(
self, item, keys, adjacentItems, annotations, requiredKeys, sources=None, compute=None):
self, item, keys, adjacentItems, annotations, requiredKeys,
sources=None, compute=None, uuid=None):
user = self.getCurrentUser()
if adjacentItems != '__all__':
adjacentItems = str(adjacentItems).lower() == 'true'
sources = sources or None
data = utils.PlottableItemData(
user, item, annotations=annotations, adjacentItems=adjacentItems,
sources=sources, compute=compute)
sources=sources, compute=compute, uuid=uuid)
return data.data(keys, requiredKeys)

def getFolderAnnotations(self, id, recurse, user, limit=False, offset=False, sort=False,
Expand Down
48 changes: 46 additions & 2 deletions girder_annotation/girder_large_image_annotation/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
import threading
import weakref

from bson.objectid import ObjectId

Expand Down Expand Up @@ -31,8 +32,11 @@
scanDatafileRecords = 50
scanAnnotationElements = 5000

_recentPlottableItemDataLock = threading.RLock()
_recentPlottableItemData = {}

@functools.lru_cache(maxsize=100)

@functools.lru_cache(maxsize=250)
def _dfFromFile(fileid, full=False):
import pandas as pd

Expand Down Expand Up @@ -405,14 +409,29 @@ def isGeoJSON(annotation):
'MultiPolygon'}


def _cancelPlottableItemData(uuid, newRecord):
if uuid is None:
return
with _recentPlottableItemDataLock:
if uuid in _recentPlottableItemData:
old = _recentPlottableItemData.pop(uuid)
try:
old().cancel = True
except Exception:
pass
if len(_recentPlottableItemData) > 7:
_recentPlottableItemData.pop(next(iter(_recentPlottableItemData)))
_recentPlottableItemData[uuid] = weakref.ref(newRecord)


class PlottableItemData:
maxItems = 1000
maxAnnotationElements = 5000
maxDistinct = 20
allowedTypes = (str, bool, int, float)

def __init__(self, user, item, annotations=None, adjacentItems=False,
sources=None, compute=None):
sources=None, compute=None, uuid=None):
"""
Get plottable data associated with an item.
Expand All @@ -435,12 +454,17 @@ def __init__(self, user, item, annotations=None, adjacentItems=False,
compute.(x|y|z), the computation will not be performed. Only rows
which have all selected columns present will be included in the
computation.
:param uuid: An optional uuid to allow cancelling a previous request.
If specified and there are any outstanding requests with the same
uuid, they may be cancelled to save resources.
"""
_cancelPlottableItemData(uuid, self)
self.user = user
self._columns = None
self._datacolumns = None
self._data = None
self._compute = None
self.cancel = False
try:
if len(compute['columns']):
self._compute = {'function': 'umap', 'params': {
Expand Down Expand Up @@ -524,9 +548,13 @@ def _findAnnotations(self, annotations):
if annot['annotation']['name'] not in names:
names[annot['annotation']['name']] = idx
for adjitem in self.items[1:]:
if self.cancel:
return
query = {'_active': {'$ne': False}, 'itemId': adjitem['_id']}
annotList = [None] * len(self.annotations[0])
for annot in Annotation().find(query, limit=0, sort=[('_version', -1)]):
if self.cancel:
return
if annot['annotation']['name'] in names and annotList[
names[annot['annotation']['name']]] is None:
annotList[names[annot['annotation']['name']]] = annot
Expand All @@ -553,6 +581,8 @@ def _findDataFiles(self): # noqa
self._itemfilelist[iidx] = [None] * len(self._itemfilelist[0])
names = {}
for file in Item().childFiles(item):
if self.cancel:
return
try:
if (file['_id'] == self.item['largeImage']['fileId'] or
file['_id'] == self.item['largeImage'].get('originalId')):
Expand Down Expand Up @@ -945,6 +975,8 @@ def _collectRecordRows(
"""
count = 0
for rowidx in range(rows):
if self.cancel:
return 0
try:
value = selector(record, data, rowidx)
except Exception:
Expand Down Expand Up @@ -996,6 +1028,8 @@ def _collectRecords(self, columns, recordlist, doctype, iid='', aid=''):
if self._datacolumns and colkey not in self._datacolumns:
continue
for where, (getData, selector, length) in col['where'].items():
if self.cancel:
return 0
if doctype != where and not where.startswith(doctype + '.'):
continue
for recidx, record in enumerate(recordlist):
Expand Down Expand Up @@ -1089,6 +1123,8 @@ def _getColumnsFromAnnotations(self, columns):
for iidx, annotList in enumerate(self.annotations or []):
iid = str(self.items[iidx]['_id'])
for anidx, annot in enumerate(annotList):
if self.cancel:
return 0
# This had been checking if the first item's annotation didn't
# contribute any required data to the data set, skip subsequent
# items' annotations; they are likely to be discarded. This
Expand Down Expand Up @@ -1124,6 +1160,8 @@ def _getColumnsFromDataFiles(self, columns):
for iidx, dfList in enumerate(self._itemfilelist or []):
iid = str(self.items[iidx]['_id'])
for dfidx, file in enumerate(dfList):
if self.cancel:
return 0
# If the first item's data file didn't contribute any required
# data to the data set, skip subsequent items' data files;
# they are likely to be discarded.
Expand Down Expand Up @@ -1207,6 +1245,8 @@ def computeSelectorAxis(record, data, row):
if not len(rows):
return 0
rows = {k: row[:lencols] for k, row in rows.items()}
if self.cancel:
return 0
if not self._computeFunction(rows):
return 0
for key in self.computeColumns:
Expand Down Expand Up @@ -1355,6 +1395,8 @@ def data(self, columns, requiredColumns=None): # noqa
rows = set()
# collects data as a side effect
collist = self._getColumns()
if self.cancel:
return
for coldata in self._datacolumns.values():
rows |= set(coldata.keys())
rows = sorted(rows)
Expand Down Expand Up @@ -1382,6 +1424,8 @@ def data(self, columns, requiredColumns=None): # noqa
logger.info(f'Reduced row count from {len(data)} to {len(subdata)} '
f'because of None values in implied columns')
data = subdata
if self.cancel:
return
# Refresh our count, distinct, distinctcount, min, max for each column
for cidx, col in enumerate(colsout):
col['count'] = len([row[cidx] for row in data if row[cidx] is not None])
Expand Down

0 comments on commit 9b42a83

Please sign in to comment.