Skip to content

Commit

Permalink
Merge pull request #234 from unipept/feature/reference-proteomes
Browse files Browse the repository at this point in the history
Add ability to build custom databases from UniProt reference proteomes
  • Loading branch information
pverscha authored Oct 7, 2022
2 parents 48a0132 + 3c0e1f0 commit 359eea0
Show file tree
Hide file tree
Showing 17 changed files with 697 additions and 169 deletions.
386 changes: 330 additions & 56 deletions src/components/custom-database/CreateCustomDatabase.vue

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/components/pages/CustomDatabasePage.vue
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ export default class CustomDatabasePage extends Vue {
this.selectedTaxaDefault.splice(0, this.selectedTaxaDefault.length);
const ncbiOntologyProcessor = new NcbiOntologyProcessor(new CachedNcbiResponseCommunicator());
const ontology = await ncbiOntologyProcessor.getOntologyByIds(db.taxa);
this.selectedTaxaDefault.push(...(db.taxa.map(id => ontology.getDefinition(id))));
this.selectedTaxaDefault.push(...(db.taxa.filter(id => id !== 1).map(id => ontology.getDefinition(id))));
this.createDatabaseDialog = true;
}
Expand Down
158 changes: 95 additions & 63 deletions src/components/taxon/TaxaBrowser.vue
Original file line number Diff line number Diff line change
Expand Up @@ -25,66 +25,79 @@
{{ failedImports.join(", ") }}.
</v-alert>
</div>
<div v-if="selectedItems.length === 0" style="text-align: center">
<div>No taxa selected yet. No filtering will be applied.</div>
<div class="text-caption">
Use the search bar below to find taxa that can be used for filtering.
</div>
</div>
<v-chip-group column v-else>
<v-chip
v-for="taxon in selectedItems"
close
:key="taxon.id"
@click:close="selectItem(taxon)"
:color="getRankColor(taxon.rank)"
dark>
{{ taxon.name }}
</v-chip>
</v-chip-group>
<div class="d-flex flex-row justify-center my-2">
<v-tooltip bottom open-delay="500">
<template v-slot:activator="{ on, attrs }">
<v-btn
color="primary"
outlined
v-on="on"
@click="importTaxaFromFile"
:loading="importLoading"
class="mr-2">
Import taxa from file
</v-btn>
</template>
<span>Import a selection of taxa for filtering from a file.</span>
</v-tooltip>
<v-tooltip bottom open-delay="500">
<template v-slot:activator="{ on, attrs }">
<v-btn outlined v-on="on" @click="clearSelection" color="red" class="mr-2">
Clear selection
</v-btn>
</template>
<span>Clear selection</span>
</v-tooltip>
</div>
<div>
<span v-if="uniprotRecordsLoading">Computing database size...</span>
<span v-else>Resulting database will contain {{ formattedUniprotRecords }} UniProtKB records.</span>
</div>

<v-container fluid>
<v-row class="d-flex align-center">
<div class="flex-grow-1">
<div v-if="selectedItems.length === 0" style="text-align: center">
<div>No taxa selected yet. No filtering will be applied.</div>
<div class="text-caption">
Use the table and search bar below to find taxa that can be used for filtering.
</div>
</div>
<v-chip-group column v-else>
<v-chip
v-for="taxon in selectedItems"
close
:key="taxon.id"
@click:close="selectItem(taxon)"
:color="getRankColor(taxon.rank)"
dark>
{{ taxon.name }}
</v-chip>
</v-chip-group>
</div>
<div class="d-flex flex-column justify-center">
<v-tooltip bottom open-delay="500">
<template v-slot:activator="{ on, attrs }">
<v-btn
color="primary"
outlined
v-on="on"
@click="importTaxaFromFile"
:loading="importLoading"
class="mb-1"
small>
Import taxa from file
</v-btn>
</template>
<span>Import a selection of taxa for filtering from a file.</span>
</v-tooltip>
<v-tooltip bottom open-delay="500">
<template v-slot:activator="{ on, attrs }">
<v-btn outlined v-on="on" @click="clearSelection" color="red" small>
Clear selection
</v-btn>
</template>
<span>Clear all selected taxa.</span>
</v-tooltip>
</div>
</v-row>
<v-row>
<div>
<span v-if="uniprotRecordsHelper.isExecuting()">Computing database size...</span>
<span v-else>
Resulting database will contain {{ formattedUniprotRecords }} UniProtKB records.
</span>
</div>
</v-row>
</v-container>

</div>
</div>

<div>
<div>
<!-- Must use append-icon instead of clearable here, otherwise model is set to null -->
<v-text-field
prepend-icon="mdi-magnify"
label="Search"
v-model="search"
append-icon="mdi-close"
@keydown.enter="filterByName()"
@click:append="clearFilter()">
</v-text-field>
</div>
<!-- <div>-->
<!-- &lt;!&ndash; Must use append-icon instead of clearable here, otherwise model is set to null &ndash;&gt;-->
<!-- <v-text-field-->
<!-- prepend-icon="mdi-magnify"-->
<!-- label="Search"-->
<!-- v-model="search"-->
<!-- append-icon="mdi-close"-->
<!-- @keydown.enter="filterByName()"-->
<!-- @click:append="clearFilter()">-->
<!-- </v-text-field>-->
<!-- </div>-->
<v-data-table
:headers="headers"
:items="taxa"
Expand All @@ -93,6 +106,19 @@
:loading="loading"
:options.sync="options"
dense>
<template v-slot:footer.prepend>
<v-text-field
prepend-icon="mdi-magnify"
label="Search"
v-model="search"
append-icon="mdi-close"
dense
hide-details
class="mr-6"
@keydown.enter="filterByName()"
@click:append="clearFilter()">
</v-text-field>
</template>
<template v-slot:item.action="{ item }">
<v-btn
color="primary"
Expand Down Expand Up @@ -172,6 +198,7 @@ import { Prop, Watch } from "vue-property-decorator";
import { DataOptions } from "vuetify";
import { promises as fs } from "fs";
import MetadataCommunicator from "@/logic/communication/metadata/MetadataCommunicator";
import AsyncHelper from "@/logic/AsyncHelper";
const { dialog } = require("@electron/remote");
Expand Down Expand Up @@ -272,7 +299,7 @@ export default class TaxaBrowser extends Vue {
private showSearchHintActive = false;
private uniprotRecords = 0;
private uniprotRecordsLoading = false;
private uniprotRecordsHelper = new AsyncHelper<number>();
private get formattedUniprotRecords(): string {
return StringUtils.toHumanReadableNumber(this.uniprotRecords);
Expand Down Expand Up @@ -338,19 +365,24 @@ export default class TaxaBrowser extends Vue {
}
@Watch("selectedItems")
@Watch("swissprotSelected")
@Watch("tremblSelected")
private onSelectedItemsChanged(): void {
this.$emit("input", this.selectedItems);
this.computeUniprotRecords();
}
private async computeUniprotRecords(): Promise<void> {
this.uniprotRecordsLoading = true;
this.uniprotRecords = await MetadataCommunicator.getUniProtRecordCount(
this.selectedItems.map(taxon => taxon.id),
this.swissprotSelected,
this.tremblSelected
this.uniprotRecordsHelper.performIfLast(
() => MetadataCommunicator.getUniProtRecordCount(
this.selectedItems.map(taxon => taxon.id),
this.swissprotSelected,
this.tremblSelected
),
(count) => {
this.uniprotRecords = count;
}
);
this.uniprotRecordsLoading = false;
}
private getRankColor(rank: string): string {
Expand Down
21 changes: 21 additions & 0 deletions src/db/migrations/v5_to_v6.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
DROP TABLE analysis_source;
CREATE TABLE analysis_source (
id INTEGER PRIMARY KEY AUTOINCREMENT,
type TEXT CHECK( type in ('online', 'custom_db') ) NOT NULL,
/**
* If the type of the AnalysisSource is online, this field indicates which API endpoint was used for the analysis.
*/
endpoint TEXT,
/* UniProt version that was used to process the underlying data (e.g. 2022.02). */
uniprot_version TEXT NOT NULL,
/**
* Comma-delimited list of NCBI taxon ID's that are used for filtering in this database. This field will only be
* used if the type of this analysis source is "custom_db".
*/
selected_taxa TEXT,
/**
* Comma-delimited list of databases (or database sources) that are used to construct this database. This list
* could include things like swissprot, trembl or reference proteome identifiers.
*/
sources TEXT
);
76 changes: 76 additions & 0 deletions src/db/schemas/schema_v6.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
CREATE TABLE studies (
id TEXT PRIMARY KEY,
name TEXT NOT NULL
);

CREATE TABLE assays (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
study_id TEXT NOT NULL,
configuration_id INT NOT NULL,
/*
AnalysisSource that was last selected for this assay. This endpoint does not necessarily need to be the same as the one
selected for the storage_metadata table. This endpoint is only used to detect whether the assay needs to be
recomputed or not.
*/
analysis_source_id INTEGER,
FOREIGN KEY(study_id) REFERENCES studies(id),
FOREIGN KEY(configuration_id) REFERENCES search_configuration(id),
FOREIGN KEY(analysis_source_id) REFERENCES analysis_source(id)
);

CREATE TABLE search_configuration (
id INTEGER PRIMARY KEY,
equate_il INT NOT NULL,
filter_duplicates INT NOT NULL,
missing_cleavage_handling INT NOT NULL
);

CREATE TABLE peptide_trust (
assay_id TEXT NOT NULL,
missed_peptides TEXT NOT NULL,
matched_peptides INT NOT NULL,
searched_peptides INT NOT NULL,
PRIMARY KEY(assay_id)
);

CREATE TABLE storage_metadata (
assay_id TEXT NOT NULL,
configuration_id INT NOT NULL,
/*
Hash of the files that are stored on the local filesystem. This hash can be used to verify the integrity of the
files containing the offline result data on the filesystem. Value for this column is the concatenation of
the hash for both the data buffer and index buffer files.
*/
data_hash TEXT,
analysis_date TEXT,
analysis_source_id INTEGER,
PRIMARY KEY(assay_id),
FOREIGN KEY(configuration_id) REFERENCES search_configuration(id),
FOREIGN KEY(analysis_source_id) REFERENCES analysis_source(id)
);

CREATE TABLE analysis_source (
id INTEGER PRIMARY KEY AUTOINCREMENT,
type TEXT CHECK( type in ('online', 'custom_db') ) NOT NULL,
/**
* If the type of the AnalysisSource is online, this field indicates which API endpoint was used for the analysis.
*/
endpoint TEXT,
/* UniProt version that was used to process the underlying data (e.g. 2022.02). */
uniprot_version TEXT NOT NULL,
/**
* Comma-delimited list of NCBI taxon ID's that are used for filtering in this database. This field will only be
* used if the type of this analysis source is "custom_db".
*/
selected_taxa TEXT,
/**
* Comma-delimited list of databases (or database sources) that are used to construct this database. This list
* could include things like swissprot, trembl or reference proteome identifiers.
*/
sources TEXT
);

CREATE TABLE database_metadata (
application_version TEXT NOT NULL
);
41 changes: 41 additions & 0 deletions src/logic/AsyncHelper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* This class is a helper utility for async operations. It provides a method that can be used if multiple async
* requests can be made simultaneously, but only the last result should actually be used.
*/
export default class AsyncHelper<T> {
private asyncCount: number = 0;
private executing: boolean = false;

/**
* Use this method if a specific async operation could be executed more than once, but only the result of the final
* call should be retained and used. Note that the final call is defined as the one that was last called by this
* method, it is not necessarily the one that finishes last.
*
* @param asyncResult The function that can be executed multiple times and for which only the results of the final
* evocation should be retained.
* @param mutation A mutation that should be performed upon receiving the results of the async function. This will
* only be executed if the results of the final invocation of this method are available.
*/
public async performIfLast(
asyncResult: () => Promise<T>,
mutation: (result: T) => void
): Promise<void> {
this.executing = true;
this.asyncCount++;
const count = this.asyncCount;

const result = await asyncResult();

if (count === this.asyncCount) {
mutation(result);
this.executing = false;
}
}

/**
* Is this helper currently waiting for a call to finish or not?
*/
public isExecuting(): boolean {
return this.executing;
}
}
4 changes: 2 additions & 2 deletions src/logic/communication/docker/DockerCommunicator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ export default class DockerCommunicator {
public static readonly WEB_COMPONENT_PUBLIC_URL = "http://localhost";
public static readonly WEB_COMPONENT_PUBLIC_PORT = "3000";

public static readonly UNIPEPT_DB_IMAGE_NAME = "ghcr.io/unipept/unipept-database:1.0";
public static readonly UNIPEPT_WEB_IMAGE_NAME = "ghcr.io/unipept/unipept-web:1.0";
public static readonly UNIPEPT_DB_IMAGE_NAME = "ghcr.io/unipept/unipept-database:1.1";
public static readonly UNIPEPT_WEB_IMAGE_NAME = "ghcr.io/unipept/unipept-web:1.1";

public static connection: Dockerode;

Expand Down
6 changes: 4 additions & 2 deletions src/logic/communication/metadata/MetadataCommunicator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ export default class MetadataCommunicator {
swissprotSelected: boolean,
tremblSelected: boolean
): Promise<number> {
let idQuery: string;
if (taxa.length === 0) {
return 0;
idQuery = "*";
} else {
idQuery = taxa.map(taxon => `taxonomy_id:${taxon}`).join("+OR+");
}

const idQuery = taxa.map(taxon => `taxonomy_id:${taxon}`).join("+OR+");
const result = await NetworkUtils.getJSON(
`${MetadataCommunicator.UNIPROT_API_URL}?facets=reviewed&query=${idQuery}&size=0`
);
Expand Down
14 changes: 14 additions & 0 deletions src/logic/communication/proteomes/Proteome.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { NcbiId } from "unipept-web-components";

export default class Proteome {
constructor(
// String that uniquely identifies this reference proteome.
public readonly id: string,
// Name of the organism to which this proteome is associated.
public readonly organismName: string,
// NCBI ID of the organism to which this proteome is associated.
public readonly organismId: NcbiId,
// The amount of proteins that are present in this reference proteome.
public readonly proteinCount: number
) {}
}
Loading

0 comments on commit 359eea0

Please sign in to comment.