-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Release-As: 0.6.1
- Loading branch information
Showing
24 changed files
with
4,257 additions
and
88,895 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Protobuf messages for the server. | ||
|
||
syntax = "proto3"; | ||
|
||
package mehari.server; | ||
|
||
import "mehari/txs.proto"; | ||
|
||
// Query for transcripts of a certain gene. | ||
message GeneTranscriptsQuery { | ||
// Gene identifier to query for. | ||
optional string hgnc_id = 1; | ||
// Genome build to use. | ||
optional mehari.txs.GenomeBuild genome_build = 2; | ||
|
||
// The number of entries to return per page. | ||
optional int32 page_size = 3; | ||
// The token to continue from a previous query. | ||
optional string next_page_token = 4; | ||
} | ||
|
||
// Container for a response to `GeneTranscriptsQuery`. | ||
message GeneTranscriptsResponse { | ||
// The transcripts for the gene. | ||
repeated mehari.txs.Transcript transcripts = 1; | ||
|
||
// The token to continue from a previous query. | ||
optional string next_page_token = 2; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
syntax = "proto3"; | ||
|
||
package mehari.txs; | ||
|
||
// Stores long array of sequences with an "index" of sequence names to their | ||
// index. | ||
// | ||
// The fields `aliases` and `aliases_idx` have the same length and `aliases_idx[i]` | ||
// stores the index into `seqs` for the sequence `aliases[i]`. In other words. | ||
// `seqs[aliases_idx[i]]` stores the sequence for `aliases[i]`. | ||
message SequenceDb { | ||
// The sequence aliases, cf. `aliases_idx`. | ||
repeated string aliases = 1; | ||
// The corresponding index in `seqs`, cf. `aliases`. | ||
repeated uint32 aliases_idx = 2; | ||
// The corresponding sequences. | ||
repeated string seqs = 3; | ||
} | ||
|
||
// Mapping from gene to transcript ID. | ||
message GeneToTxId { | ||
// Gene HGNC ID; serves as gene identifier. | ||
string gene_id = 1; | ||
// Vector of all transcript IDs. | ||
repeated string tx_ids = 2; | ||
} | ||
|
||
// Container for the transcript-related database. | ||
message TranscriptDb { | ||
// Vector of all transcripts. | ||
repeated Transcript transcripts = 1; | ||
// Mapping from gene ID to vector of all transcript IDs. | ||
repeated GeneToTxId gene_to_tx = 2; | ||
} | ||
|
||
// Enumeration for `Transcript::biotype`. | ||
enum TranscriptBiotype { | ||
// unknown | ||
TRANSCRIPT_BIOTYPE_UNKNOWN = 0; | ||
// Coding transcript. | ||
TRANSCRIPT_BIOTYPE_CODING = 1; | ||
// Non-coding transcript. | ||
TRANSCRIPT_BIOTYPE_NON_CODING = 2; | ||
} | ||
|
||
// Bit values for the transcript tags. | ||
enum TranscriptTag { | ||
// unknown | ||
TRANSCRIPT_TAG_UNKNOWN = 0; | ||
// Member of Ensembl basic. | ||
TRANSCRIPT_TAG_BASIC = 1; | ||
// Member of Ensembl canonical. | ||
TRANSCRIPT_TAG_ENSEMBL_CANONICAL = 2; | ||
// Member of MANE Select. | ||
TRANSCRIPT_TAG_MANE_SELECT = 3; | ||
// Member of MANE Plus Clinical. | ||
TRANSCRIPT_TAG_MANE_PLUS_CLINICAL = 4; | ||
// Member of RefSeq Select. | ||
TRANSCRIPT_TAG_REF_SEQ_SELECT = 5; | ||
// Flagged as being a selenoprotein (UGA => selenon). | ||
TRANSCRIPT_TAG_SELENOPROTEIN = 6; | ||
} | ||
|
||
// Store information about a transcript. | ||
message Transcript { | ||
// Transcript accession with version, e.g., `"NM_007294.3"` or `"ENST00000461574.1"` for BRCA1. | ||
string id = 1; | ||
// HGNC symbol, e.g., `"BRCA1"` | ||
string gene_symbol = 2; | ||
// HGNC gene identifier, e.g., `"1100"` for BRCA1. | ||
string gene_id = 3; | ||
// Transcript biotype. | ||
TranscriptBiotype biotype = 4; | ||
// Transcript flags. | ||
repeated TranscriptTag tags = 5; | ||
// Identifier of the corresponding protein. | ||
optional string protein = 6; | ||
// CDS start codon. | ||
optional int32 start_codon = 7; | ||
// CDS stop codon. | ||
optional int32 stop_codon = 8; | ||
// Alignments on the different genome builds. | ||
repeated GenomeAlignment genome_alignments = 9; | ||
} | ||
|
||
// Enumeration for the known genome builds. | ||
enum GenomeBuild { | ||
// unknown | ||
GENOME_BUILD_UNKNOWN = 0; | ||
// GRCH37. | ||
GENOME_BUILD_GRCH37 = 1; | ||
// GRCh38. | ||
GENOME_BUILD_GRCH38 = 2; | ||
} | ||
|
||
// Enumeration for the two strands of the genome. | ||
enum Strand { | ||
// unknown | ||
STRAND_UNKNOWN = 0; | ||
// Forward / plus | ||
STRAND_PLUS = 1; | ||
// Reverse / minus | ||
STRAND_MINUS = 2; | ||
} | ||
|
||
// Store information about a transcript aligning to a genome. | ||
message GenomeAlignment { | ||
// The genome build identifier. | ||
GenomeBuild genome_build = 1; | ||
// Accession of the contig sequence. | ||
string contig = 2; | ||
// CDS end position, `-1` to indicate `None`. | ||
optional int32 cds_start = 3; | ||
// CDS end position, `-1` to indicate `None`. | ||
optional int32 cds_end = 4; | ||
// The strand. | ||
Strand strand = 5; | ||
// Exons of the alignment. | ||
repeated ExonAlignment exons = 6; | ||
} | ||
|
||
// Store the alignment of one exon to the reference. | ||
message ExonAlignment { | ||
// Start position on reference. | ||
int32 alt_start_i = 1; | ||
// End position on reference. | ||
int32 alt_end_i = 2; | ||
// Exon number. | ||
int32 ord = 3; | ||
// CDS start coordinate. | ||
optional int32 alt_cds_start_i = 4; | ||
// CDS end coordinate. | ||
optional int32 alt_cds_end_i = 5; | ||
// CIGAR string of alignment, empty indicates full matches. | ||
string cigar = 6; | ||
} | ||
|
||
// Database of transcripts with sequences. | ||
message TxSeqDatabase { | ||
// Store transcripts with their aliases. | ||
TranscriptDb tx_db = 1; | ||
// Store sequence with their aliases. | ||
SequenceDb seq_db = 2; | ||
// The version of the database. | ||
optional string version = 3; | ||
// The reference assembly that this database refers to. | ||
optional string genome_release = 4; | ||
} |
Oops, something went wrong.