Skip to content

Commit

Permalink
Merge pull request #26 from unipept/improvement/cleanup-database
Browse files Browse the repository at this point in the history
Optimise database schema
  • Loading branch information
pverscha authored Feb 22, 2023
2 parents 202b147 + 22da202 commit d25529f
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 449 deletions.
70 changes: 29 additions & 41 deletions schemas/structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,21 @@ DEFAULT CHARACTER SET = utf8
COLLATE = utf8_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`interpro`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`interpro_entries` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`code` VARCHAR(9) NOT NULL,
`category` VARCHAR(32) NOT NULL,
`name` VARCHAR(160) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE INDEX `idx_interpro_code` (`code` ASC))
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`lineages`
-- -----------------------------------------------------
Expand Down Expand Up @@ -185,27 +200,6 @@ DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`embl_cross_references`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`embl_cross_references` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`uniprot_entry_id` INT UNSIGNED NOT NULL ,
`protein_id` VARCHAR(25) NULL ,
`sequence_id` VARCHAR(25) NULL ,
PRIMARY KEY (`id`) ,
INDEX `fk_embl_reference_uniprot_entries` (`uniprot_entry_id` ASC) ,
INDEX `idx_sequence_id` (`sequence_id` ASC) ,
CONSTRAINT `fk_uniprot_cross_reference_uniprot_entries`
FOREIGN KEY (`uniprot_entry_id` )
REFERENCES `unipept`.`uniprot_entries` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`datasets`
-- -----------------------------------------------------
Expand Down Expand Up @@ -256,26 +250,6 @@ DEFAULT CHARACTER SET = utf8
COLLATE = utf8_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`refseq_cross_references`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`refseq_cross_references` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`uniprot_entry_id` INT UNSIGNED NOT NULL ,
`protein_id` VARCHAR(25) NULL ,
`sequence_id` VARCHAR(25) NULL ,
PRIMARY KEY (`id`) ,
INDEX `fk_refseq_reference_uniprot_entries` (`uniprot_entry_id` ASC) ,
CONSTRAINT `fk_refseq_cross_reference_uniprot_entries`
FOREIGN KEY (`uniprot_entry_id` )
REFERENCES `unipept`.`uniprot_entries` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`go_cross_references`
-- -----------------------------------------------------
Expand Down Expand Up @@ -326,6 +300,20 @@ DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`interpro_cross_references`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`interpro_cross_references` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`uniprot_entry_id` INT UNSIGNED NOT NULL ,
`interpro_entry_code` VARCHAR(9) NOT NULL ,
PRIMARY KEY (`id`),
INDEX `fk_interpro_reference_uniprot_entries` (`uniprot_entry_id` ASC))
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`users`
-- -----------------------------------------------------
Expand Down
13 changes: 0 additions & 13 deletions schemas/structure_index_only.sql
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,6 @@ ALTER TABLE sequences ADD INDEX fk_sequences_taxons (lca ASC), ADD INDEX fk_sequ
ALTER TABLE peptides ADD INDEX fk_peptides_sequences (sequence_id ASC), ADD INDEX fk_peptides_uniprot_entries (uniprot_entry_id ASC), ADD INDEX fk_peptides_original_sequences (original_sequence_id ASC);


-- -----------------------------------------------------
-- Table `unipept`.`embl_cross_references`
-- -----------------------------------------------------
ALTER TABLE embl_cross_references ADD INDEX fk_embl_reference_uniprot_entries (uniprot_entry_id ASC);
-- ALTER TABLE embl_cross_references ADD INDEX idx_sequence_id (sequence_id ASC);


-- -----------------------------------------------------
-- Table `unipept`.`refseq_cross_references`
-- -----------------------------------------------------
ALTER TABLE refseq_cross_references ADD INDEX fk_refseq_reference_uniprot_entries (uniprot_entry_id ASC);


-- -----------------------------------------------------
-- Table `unipept`.`go_cross_references`
-- -----------------------------------------------------
Expand Down
76 changes: 9 additions & 67 deletions schemas/structure_no_index.sql
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,17 @@ ENGINE = InnoDB;


-- -----------------------------------------------------
-- Table `unipept`.`uniprot_entries`
-- Table `unipept`.`interpro`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`uniprot_entries` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`uniprot_accession_number` CHAR(10) ASCII NOT NULL ,
`version` SMALLINT UNSIGNED NOT NULL ,
`taxon_id` MEDIUMINT UNSIGNED NOT NULL ,
`type` ENUM('swissprot', 'trembl') NOT NULL ,
`name`VARCHAR(150) NOT NULL ,
`protein` TEXT NOT NULL ,
PRIMARY KEY (`id`))
CREATE TABLE IF NOT EXISTS `unipept`.`interpro_entries` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`code` VARCHAR(9) NOT NULL,
`category` VARCHAR(32) NOT NULL,
`name` VARCHAR(160) NOT NULL,
PRIMARY KEY (`id`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8
COLLATE = utf8_general_ci;
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
Expand Down Expand Up @@ -153,20 +150,6 @@ DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`embl_cross_references`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`embl_cross_references` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`uniprot_entry_id` INT UNSIGNED NOT NULL ,
`protein_id` VARCHAR(25) NULL ,
`sequence_id` VARCHAR(25) NULL ,
PRIMARY KEY (`id`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`datasets`
-- -----------------------------------------------------
Expand Down Expand Up @@ -217,20 +200,6 @@ DEFAULT CHARACTER SET = utf8
COLLATE = utf8_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`refseq_cross_references`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`refseq_cross_references` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`uniprot_entry_id` INT UNSIGNED NOT NULL ,
`protein_id` VARCHAR(25) NULL ,
`sequence_id` VARCHAR(25) NULL ,
PRIMARY KEY (`id`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`go_cross_references`
-- -----------------------------------------------------
Expand All @@ -244,19 +213,6 @@ DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`go_terms`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`go_terms` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`go_term_code` VARCHAR(15) NOT NULL,
`name` VARCHAR(160) NOT NULL,
PRIMARY KEY (`id`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`ec_cross_references`
-- -----------------------------------------------------
Expand All @@ -282,20 +238,6 @@ DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`interpro`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `unipept`.`interpro_entries` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT ,
`code` VARCHAR(9) NOT NULL,
`category` VARCHAR(32) NOT NULL,
`name` VARCHAR(160) NOT NULL,
PRIMARY KEY (`id`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = ascii
COLLATE = ascii_general_ci;


-- -----------------------------------------------------
-- Table `unipept`.`users`
-- -----------------------------------------------------
Expand Down
Loading

0 comments on commit d25529f

Please sign in to comment.