Skip to content

Commit

Permalink
Merge branch 'master' into qa
Browse files Browse the repository at this point in the history
  • Loading branch information
rcreasi committed Jan 9, 2025
2 parents 949381f + cd53521 commit ba4ea17
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 49 deletions.
34 changes: 24 additions & 10 deletions portal-backend/depmap/breadbox_shim/breadbox_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@
from breadbox_client.models.compute_response import ComputeResponse
from breadbox_client.models import (
MatrixDatasetResponse,
MatrixDatasetResponseFormat,
TabularDatasetResponse,
FeatureResponse,
ValueType,
)

from depmap.data_access.response_parsing import (
format_breadbox_task_status,
get_breadbox_slice_id,
parse_breadbox_slice_id,
remove_breadbox_prefix,
)
from depmap.interactive.config.categories import CustomCellLinesConfig
from depmap.vector_catalog.models import Node, NodeType
from depmap.vector_catalog.trees import Trees
from depmap.partials.matrix.models import CellLineSeries
from depmap import extensions
from depmap_compute.slice import SliceQuery

# Since breadbox and the legacy backend contain different datasets, we need to combine
# values from each of their responses before returning a value.
Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(


class BreadboxVectorCatalogNodeInfo:
"""
"""
Vector Catalog endpoints return a specific dictionary structure for each parent node
in the vector catalog tree. This class reflects that same structure and contains
some defaults specific to breadbox.
Expand Down Expand Up @@ -222,7 +222,7 @@ def _get_feature_node_info_with_siblings(
def run_custom_analysis(
analysis_type: str,
dataset_slice_id: str,
query_feature_slice_id: str,
slice_query: Optional[SliceQuery],
vector_variable_type: str,
query_cell_lines: Optional[list[str]],
query_values: Optional[list[Any]],
Expand All @@ -233,19 +233,33 @@ def run_custom_analysis(
or a legacy portal feature (specified with the given feature_data).
Return a task status.
"""
dataset_uuid = parse_breadbox_slice_id(dataset_slice_id).dataset_id
if query_feature_slice_id:
feature_id = parse_breadbox_slice_id(query_feature_slice_id).feature_id
query_dataset_id = parse_breadbox_slice_id(query_feature_slice_id).dataset_id
bb_dataset_id = remove_breadbox_prefix(dataset_slice_id)
if slice_query:
# Temporary hack: for now, this slice query ALWAYS specifies a feature by label.
# This should always be true for our current feature selection component.
# Soon, this "breadbox_shim" should be removed entirely, along with this hack.
assert slice_query.identifier_type == "feature_label"

# Hack part 2: Custom analysis in Breadbox was set up to take a feature's given ID.
# We have the label here and need to use that to load the given ID.
query_dataset_id = parse_breadbox_slice_id(slice_query.dataset_id).dataset_id
all_dataset_features = extensions.breadbox.client.get_dataset_features(
query_dataset_id
)
feature_id = None
for bb_feature in all_dataset_features:
if bb_feature["label"] == slice_query.identifier:
feature_id = bb_feature["id"]
assert (
feature_id is not None
), "query_feature_slice_id must contain a feature ID"
), f"Unexpected feature label passed to breadbox custom analysis: '{slice_query.identifier}'"

else:
feature_id = ""
query_dataset_id = ""
bb_task_status = extensions.breadbox.client.compute_univariate_associations(
analysis_type=analysis_type,
dataset_id=dataset_uuid,
dataset_id=bb_dataset_id,
query_feature_id=feature_id,
query_dataset_id=query_dataset_id,
vector_variable_type=vector_variable_type,
Expand Down
37 changes: 20 additions & 17 deletions portal-backend/depmap/compute/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from depmap.extensions import csrf_protect, restplus_handle_exception
from depmap.compute import analysis_tasks
from depmap_compute.models import AnalysisType
from depmap_compute.slice import slice_id_to_slice_query
from depmap.user_uploads.utils.task_utils import get_current_result_dir

blueprint = Blueprint(
Expand Down Expand Up @@ -113,35 +114,41 @@ def post(self):
else:
raise ValueError("Unexpected analysis type {}".format(analysis_type))

# Parse the slice ID if one was provided
if query_id:
slice_query = slice_id_to_slice_query(query_id)
else:
slice_query = None

# Forward requests to breadbox a breadbox dataset is requested
if dataset_id.startswith("breadbox/"):
# If the query feature is from a legacy dataset, load it now and pass the values to breadbox
# If the query slice is from a legacy dataset, load it now and pass the values to breadbox
# The query_cell_lines parameter needs to be the same order/length as the query_values when passed to breadbox.
if query_id and not query_id.startswith("breadbox/"):
legacy_feature_series: pd.Series = interactive_utils.get_row_of_values_from_slice_id(
query_id
)
if slice_query and not slice_query.dataset_id.startswith("breadbox/"):
legacy_data_slice = data_access.get_slice_data(slice_query)
if query_cell_lines is not None:
# When the cell lines have been filtered by the user,
# the legacy feature series also needs to be filtered before being passed to breadbox.
feature_cell_lines = legacy_feature_series.index.tolist()
feature_cell_lines = legacy_data_slice.index.tolist()
unordered_cell_lines_interesection = list(
set(query_cell_lines).intersection(set(feature_cell_lines))
)
if len(unordered_cell_lines_interesection) == 0:
return format_taskless_error_message(
"No cell lines in common between query and dataset searched"
)
legacy_feature_series = legacy_feature_series.loc[
legacy_data_slice = legacy_data_slice.loc[
unordered_cell_lines_interesection
]
query_values = legacy_feature_series.tolist()
query_cell_lines = legacy_feature_series.index.tolist()
query_id = None
query_values = legacy_data_slice.tolist()
query_cell_lines: list[
str
] = legacy_data_slice.index.tolist() # pyright: ignore
slice_query = None
return breadbox_shim.run_custom_analysis(
analysis_type=analysis_type,
dataset_slice_id=dataset_id,
query_feature_slice_id=query_id,
slice_query=slice_query,
vector_variable_type=vector_variable_type,
query_cell_lines=query_cell_lines,
query_values=query_values,
Expand All @@ -164,12 +171,8 @@ def post(self):
# 1. main query vector
# 2. which is dependent/independent, the matrix or the vector
# 3. optionally, a list of cell line depmap ids
if query_id.startswith("breadbox/"):
query_series = breadbox_shim.get_feature_data_slice(slice_id=query_id)
else:
query_series = interactive_utils.get_row_of_values_from_slice_id(
query_id
)
assert slice_query is not None
query_series = data_access.get_slice_data(slice_query)

# cl_query_vector is the intersection of cell lines in both data tracts plus the cell line subset
(
Expand Down
2 changes: 1 addition & 1 deletion portal-backend/depmap/data_access/response_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def parse_breadbox_slice_id(slice_id: str) -> ParsedBreadboxSliceId:
"""
Parse the breadbox dataset ID and feature ID from the given slice ID. If the given
slice ID is malformed, throw a Bad Request error. Slice IDs should be formatted like
'breadbox/<dataset-uuid>/<feature-uuid>' or 'breadbox/<dataset-uuid>'.
'breadbox/<dataset-uuid>/<feature-given-id>' or 'breadbox/<dataset-uuid>'.
"""
match = re.match(BREADBOX_SLICE_ID_REGEX, slice_id)
assert match, f"Breadbox slice id '{slice_id}' does not match the expected format."
Expand Down
2 changes: 0 additions & 2 deletions portal-backend/pyright-ratchet-errors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -667,15 +667,13 @@ views.py: error: Argument of type "Literal['url']" cannot be assigned to paramet
views.py: error: Argument of type "Literal['user_id']" cannot be assigned to parameter "__s" of type "slice" in function "__getitem__"
views.py: error: Argument of type "Literal['value']" cannot be assigned to parameter "__s" of type "slice" in function "__getitem__"
views.py: error: Argument of type "Literal[DependencyEnum.Avana]" cannot be assigned to parameter "dependency_dataset_name" of type "str" in function "get_dataset_by_name"
views.py: error: Argument of type "Unknown | Any | None" cannot be assigned to parameter "query_feature_slice_id" of type "str" in function "run_custom_analysis"
views.py: error: Argument of type "Unknown | FileStorage" cannot be assigned to parameter "filepath_or_buffer" of type "FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str]" in function "read_csv" (reportArgumentType)
views.py: error: Argument of type "Unknown | Hashable" cannot be assigned to parameter "group_name" of type "str" in function "__init__"
views.py: error: Argument of type "Unknown | None" cannot be assigned to parameter "cell_line_col_index" of type "int" in function "get_all_cell_line_compound_sensitivity"
views.py: error: Argument of type "Unknown | None" cannot be assigned to parameter "cell_line_col_index" of type "int" in function "get_all_cell_line_gene_effects"
views.py: error: Argument of type "Unknown | int | list[Unknown]" cannot be assigned to parameter "color_num" of type "str | int" in function "__init__" (reportArgumentType)
views.py: error: Argument of type "list[BreadboxVectorCatalogNodeInfo]" cannot be assigned to parameter "__iterable" of type "Iterable[dict[Unknown, Unknown]]" in function "extend"
views.py: error: Argument of type "list[CellLineSeries]" cannot be assigned to parameter "breadbox_feature_data" of type "list[Series]" in function "get_df_from_feature_list"
views.py: error: Argument of type "list[Unknown] | Any | list[int] | Unknown" cannot be assigned to parameter "query_cell_lines" of type "list[str] | None" in function "run_custom_analysis" (reportArgumentType)
views.py: error: Argument of type "list[list[Unknown]] | None" cannot be assigned to parameter "compound_experiment_and_datasets" of type "List[Tuple[CompoundExperiment, DependencyDataset]]" in function "format_dep_dist_caption"
views.py: error: Argument of type "str | Any" cannot be assigned to parameter "__s" of type "slice" in function "__getitem__"
views.py: error: Argument of type "str | None" cannot be assigned to parameter "s" of type "str | bytes | bytearray" in function "loads" (reportArgumentType)
Expand Down
28 changes: 9 additions & 19 deletions portal-backend/tests/depmap/compute/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,31 +376,21 @@ def test_compute_univariate_associations_with_breadbox_feature(
empty_db_mock_downloads.session.flush()
interactive_test_utils.reload_interactive_config()

# Mock the breadbox client response
mock_breadbox_feature_data = [
FeatureResponse(
feature_id="foo",
dataset_id="bar",
values=FeatureResponseValues.from_dict(
{
cell_lines[0].depmap_id: 0.1,
cell_lines[1].depmap_id: 0.2,
cell_lines[2].depmap_id: 0.3,
}
),
label="feature_foo",
units="inches",
dataset_label="dataset_bar",
mock_breadbox_client.get_dataset_data = MagicMock(
return_value=pd.DataFrame(
data={"foo": [0.1, 0.2, 0.3]},
index=[
cell_lines[0].depmap_id,
cell_lines[1].depmap_id,
cell_lines[2].depmap_id,
],
)
]
mock_breadbox_client.get_feature_data = MagicMock(
return_value=mock_breadbox_feature_data
)

with app.test_client() as c:
# assemble query parameters
dataset_id = gene_dataset.name.name
breadbox_slice_id = "breadbox/foo/bar"
breadbox_slice_id = "slice/breadbox%2Ffoo/feature_foo/label"

parameters = {
"analysisType": "pearson",
Expand Down

0 comments on commit ba4ea17

Please sign in to comment.