From b5cda5693bb2742e663effd73afb8c6669fda248 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 15:07:58 +0200 Subject: [PATCH 01/11] Move to psql --- Gemfile | 4 ++-- Gemfile.lock | 6 +++++- config/database.yml | 11 ++++++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Gemfile b/Gemfile index 757c4fa..e80f2e9 100644 --- a/Gemfile +++ b/Gemfile @@ -6,8 +6,8 @@ ruby "3.1.2" # Bundle edge Rails instead: gem "rails", github: "rails/rails", branch: "main" gem "rails", "~> 7.0.3", ">= 7.0.3.1" -# Use mysql as the database for Active Record -gem "mysql2", "~> 0.5", "> 0.5.5" +# Use Postgres as the database for Active Record +gem "pg", "~> 1.5.6" # Use the Puma web server [https://github.com/puma/puma] gem "puma", "~> 5.0" diff --git a/Gemfile.lock b/Gemfile.lock index c1fe9b1..902feb1 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -119,7 +119,10 @@ GEM minitest (5.18.0) msgpack (1.5.6) multi_json (1.15.0) +<<<<<<< HEAD mysql2 (0.5.6) +======= +>>>>>>> b4f1356 (Move to psql) net-imap (0.2.3) digest net-protocol @@ -150,6 +153,7 @@ GEM parallel (1.22.1) parser (3.1.2.1) ast (~> 2.4.1) + pg (1.5.6) public_suffix (5.0.0) puma (5.6.5) nio4r (~> 2.0) @@ -253,9 +257,9 @@ DEPENDENCIES debug jbuilder multi_json (~> 1.15.0) - mysql2 (~> 0.5, > 0.5.5) octokit (~> 5.4) oj (~> 3.13) + pg (~> 1.5.6) puma (~> 5.0) rails (~> 7.0.3, >= 7.0.3.1) rails-controller-testing (~> 1.0) diff --git a/config/database.yml b/config/database.yml index 06f9ee6..85ef32e 100644 --- a/config/database.yml +++ b/config/database.yml @@ -1,16 +1,17 @@ default: &default - adapter: mysql2 + adapter: postgresql username: root - password: unipept + password: root pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> development: <<: *default - username: unipept - database: unipept + username: root + database: postgres # setup local port forwarding for this to work host: 127.0.0.1 - port: 3306 + port: 5432 + schema_search_path: "unipept" # Warning: The database defined as "test" will be erased and # re-generated from your development database when you run "rake". From 9b052442d6fe1bc9d71035d08642c3298300d646 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 15:15:45 +0200 Subject: [PATCH 02/11] Fix conflict --- Gemfile.lock | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 902feb1..95cbf2c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -119,10 +119,6 @@ GEM minitest (5.18.0) msgpack (1.5.6) multi_json (1.15.0) -<<<<<<< HEAD - mysql2 (0.5.6) -======= ->>>>>>> b4f1356 (Move to psql) net-imap (0.2.3) digest net-protocol From 0f825cc7bd413f6699cb3f93e3d51f485208eba1 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 15:16:59 +0200 Subject: [PATCH 03/11] Update credentials --- config/database.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/config/database.yml b/config/database.yml index 85ef32e..e494dcf 100644 --- a/config/database.yml +++ b/config/database.yml @@ -18,9 +18,7 @@ development: # Do not set this db to the same as development or production. test: <<: *default - database: unipept_test url: <%= ENV['TEST_DATABASE_URL'] || ENV['DATABASE_URL'] %> production: <<: *default - database: unipept From c0f0db83dd12eb4ad5cd35eea64b7f7a9ca85804 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 15:29:40 +0200 Subject: [PATCH 04/11] Update CI --- .github/workflows/test.yml | 16 ++++++++-------- config/database.yml | 9 +++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3eca0da..20a2342 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,18 +6,18 @@ jobs: test: env: RAILS_ENV: "test" - TEST_DATABASE_URL: "mysql2://root:unipept@127.0.0.1:3306/unipept_test" + TEST_DATABASE_URL: "pg://unipept:unipept@127.0.0.1:5432/unipept_test" runs-on: ubuntu-latest services: - mysql: - image: mariadb:10.9 + psql: + image: postgres:16.2 env: - MYSQL_DATABASE: "unipept_test" - MYSQL_ROOT_PASSWORD: "unipept" - MYSQL_HOST: "localhost" + POSTGRES_DB: "unipept_test" + POSTGRES_PASSWORD: "unipept" + POSTGRES_USER: "unipept" ports: - - 3306:3306 - options: --health-cmd "mysqladmin ping -h localhost" --health-interval 10s --health-timeout 5s --health-retries 5 + - 5432:5432 + options: --health-cmd "pg_isready" --health-interval 10s --health-timeout 5s --health-retries 5 steps: - uses: actions/checkout@v4 - name: Use ruby from .ruby-version diff --git a/config/database.yml b/config/database.yml index e494dcf..4994e36 100644 --- a/config/database.yml +++ b/config/database.yml @@ -1,13 +1,12 @@ default: &default adapter: postgresql - username: root - password: root + username: unipept + password: unipept pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> development: <<: *default - username: root - database: postgres + database: unipept # setup local port forwarding for this to work host: 127.0.0.1 port: 5432 @@ -18,7 +17,9 @@ development: # Do not set this db to the same as development or production. test: <<: *default + database: unipept_test url: <%= ENV['TEST_DATABASE_URL'] || ENV['DATABASE_URL'] %> production: <<: *default + database: unipept From 3e1e63a482298be858a18d104ce30425ab75278f Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 15:31:13 +0200 Subject: [PATCH 05/11] Update CI --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 20a2342..9e56507 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ jobs: test: env: RAILS_ENV: "test" - TEST_DATABASE_URL: "pg://unipept:unipept@127.0.0.1:5432/unipept_test" + TEST_DATABASE_URL: "postgresql://unipept:unipept@127.0.0.1:5432/unipept_test" runs-on: ubuntu-latest services: psql: From 7f98aec2eb4d31e976d8a1730cb5b63fd53d3df2 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 15:38:02 +0200 Subject: [PATCH 06/11] Migrate tests --- db/schema.rb | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index 9bc0487..73831b0 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -12,7 +12,7 @@ ActiveRecord::Schema.define(version: 0) do - create_table "dataset_items", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| + create_table "dataset_items", id: :integer, unsigned: true, force: :cascade do |t| t.integer "dataset_id", unsigned: true t.string "name", limit: 160 t.text "data", limit: 16777215, null: false, collation: "ascii_general_ci" @@ -20,52 +20,52 @@ t.index ["dataset_id"], name: "fk_dataset_items_datasets" end - create_table "datasets", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| + create_table "datasets", id: :integer, unsigned: true, force: :cascade do |t| t.string "environment", limit: 160 t.string "reference", limit: 500 t.string "url", limit: 200 t.string "project_website", limit: 200 end - create_table "ec_cross_references", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "ec_cross_references", id: :integer, unsigned: true, force: :cascade do |t| t.integer "uniprot_entry_id", null: false, unsigned: true t.string "ec_number_code", limit: 15, null: false end - create_table "ec_numbers", id: :integer, limit: 2, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| + create_table "ec_numbers", id: :integer, limit: 2, unsigned: true, force: :cascade do |t| t.string "code", limit: 15, null: false t.string "name", limit: 155, null: false end - create_table "embl_cross_references", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "embl_cross_references", id: :integer, unsigned: true, force: :cascade do |t| t.integer "uniprot_entry_id", null: false, unsigned: true t.string "protein_id", limit: 25 t.string "sequence_id", limit: 25 end - create_table "go_cross_references", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "go_cross_references", id: :integer, unsigned: true, force: :cascade do |t| t.integer "uniprot_entry_id", null: false, unsigned: true t.string "go_term_code", limit: 15, null: false end - create_table "go_terms", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| + create_table "go_terms", id: :integer, unsigned: true, force: :cascade do |t| t.string "code", limit: 15, null: false t.string "namespace", limit: 18, null: false t.string "name", limit: 200, null: false end - create_table "interpro_cross_references", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "interpro_cross_references", id: :integer, unsigned: true, force: :cascade do |t| t.integer "uniprot_entry_id", null: false, unsigned: true t.string "interpro_entry_code", limit: 9, null: false end - create_table "interpro_entries", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "interpro_entries", id: :integer, unsigned: true, force: :cascade do |t| t.string "code", limit: 9, null: false t.string "category", limit: 32, null: false t.string "name", limit: 160, null: false end - create_table "lineages", primary_key: "taxon_id", id: :integer, limit: 3, unsigned: true, default: nil, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "lineages", primary_key: "taxon_id", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| t.integer "superkingdom", limit: 3 t.integer "kingdom", limit: 3 t.integer "subkingdom", limit: 3 @@ -95,28 +95,28 @@ t.integer "forma", limit: 3 end - create_table "peptides", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "peptides", id: :integer, unsigned: true, force: :cascade do |t| t.integer "sequence_id", null: false, unsigned: true t.integer "original_sequence_id", null: false, unsigned: true t.integer "uniprot_entry_id", null: false, unsigned: true end - create_table "posts", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| + create_table "posts", id: :integer, unsigned: true, force: :cascade do |t| t.string "title", limit: 100, null: false t.text "content", null: false t.date "date", null: false end - create_table "proteome_caches", primary_key: "proteome_id", id: :integer, limit: 3, unsigned: true, default: nil, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "proteome_caches", primary_key: "proteome_id", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| t.text "json_sequences", limit: 16777215, null: false end - create_table "proteome_cross_references", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| + create_table "proteome_cross_references", id: :integer, unsigned: true, force: :cascade do |t| t.integer "uniprot_entry_id", null: false, unsigned: true t.integer "proteome_id", limit: 3, null: false, unsigned: true end - create_table "proteomes", id: :integer, limit: 3, unsigned: true, default: nil, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| + create_table "proteomes", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| t.string "proteome_accession_number", limit: 12, null: false t.string "proteome_name", limit: 145, null: false t.integer "taxon_id", limit: 3, unsigned: true @@ -127,13 +127,13 @@ t.string "name", limit: 225 end - create_table "refseq_cross_references", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "refseq_cross_references", id: :integer, unsigned: true, force: :cascade do |t| t.integer "uniprot_entry_id", null: false, unsigned: true t.string "protein_id", limit: 25 t.string "sequence_id", limit: 25 end - create_table "sequences", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16", force: :cascade do |t| + create_table "sequences", id: :integer, unsigned: true, force: :cascade do |t| t.string "sequence", limit: 50, null: false t.integer "lca", limit: 3, unsigned: true t.integer "lca_il", limit: 3, unsigned: true @@ -141,14 +141,14 @@ t.binary "fa_il", limit: 16777215 end - create_table "taxons", id: :integer, limit: 3, unsigned: true, default: nil, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| + create_table "taxons", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| t.string "name", limit: 120, null: false t.string "rank", limit: 16 t.integer "parent_id", limit: 3, unsigned: true t.binary "valid_taxon", limit: 1, default: 0b1, null: false end - create_table "uniprot_entries", id: :integer, unsigned: true, options: "ENGINE=InnoDB DEFAULT CHARSET=ascii", force: :cascade do |t| + create_table "uniprot_entries", id: :integer, unsigned: true, force: :cascade do |t| t.string "uniprot_accession_number", limit: 10, null: false, collation: "latin1_swedish_ci" t.integer "version", limit: 2, null: false, unsigned: true t.integer "taxon_id", limit: 3, null: false, unsigned: true @@ -157,7 +157,7 @@ t.text "protein", null: false end - create_table "users", id: :integer, options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| + create_table "users", id: :integer, force: :cascade do |t| t.string "username", limit: 8, null: false t.integer "admin", limit: 1, default: 0, null: false end From 71e0339aeb11f17745b262075374021ca0ef3cef Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 16:00:15 +0200 Subject: [PATCH 07/11] Re-generate schema --- db/schema.rb | 205 +++++++++++++++++++++++---------------------------- 1 file changed, 91 insertions(+), 114 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index 73831b0..29a7054 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2,164 +2,141 @@ # of editing this file, please use the migrations feature of Active Record to # incrementally modify your database, and then regenerate this schema definition. # -# Note that this schema.rb definition is the authoritative source for your -# database schema. If you need to create the application database on another -# system, you should be using db:schema:load, not running all the migrations -# from scratch. The latter is a flawed and unsustainable approach (the more migrations -# you'll amass, the slower it'll run and the greater likelihood for issues). +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 0) do +ActiveRecord::Schema[7.0].define(version: 0) do + # These are extensions that must be enabled in order to support this database + enable_extension "plpgsql" - create_table "dataset_items", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "dataset_id", unsigned: true + # Custom types defined in this database. + # Note that some types may not work with other database engines. Be careful if changing database. + create_enum "db_type", ["swissprot", "trembl"] + create_enum "go_namespace", ["biological process", "molecular function", "cellular component"] + create_enum "rank_type", ["no rank", "superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum", "superclass", "class", "subclass", "superorder", "order", "suborder", "infraorder", "superfamily", "family", "subfamily", "tribe", "subtribe", "genus", "subgenus", "species group", "species subgroup", "species", "subspecies", "strain", "varietas", "forma"] + + create_table "dataset_items", id: :integer, default: nil, force: :cascade do |t| + t.bigint "dataset_id" t.string "name", limit: 160 - t.text "data", limit: 16777215, null: false, collation: "ascii_general_ci" + t.text "data", null: false t.integer "order" - t.index ["dataset_id"], name: "fk_dataset_items_datasets" end - create_table "datasets", id: :integer, unsigned: true, force: :cascade do |t| + create_table "datasets", id: :integer, default: nil, force: :cascade do |t| t.string "environment", limit: 160 t.string "reference", limit: 500 t.string "url", limit: 200 t.string "project_website", limit: 200 end - create_table "ec_cross_references", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "uniprot_entry_id", null: false, unsigned: true + create_table "ec_cross_references", id: :bigint, default: nil, force: :cascade do |t| + t.bigint "uniprot_entry_id", null: false t.string "ec_number_code", limit: 15, null: false + t.index ["uniprot_entry_id"], name: "idx_ec_cross_references_uniprot_entry_id" end - create_table "ec_numbers", id: :integer, limit: 2, unsigned: true, force: :cascade do |t| + create_table "ec_numbers", id: :integer, default: nil, force: :cascade do |t| t.string "code", limit: 15, null: false t.string "name", limit: 155, null: false + t.index ["code"], name: "idx_ec_numbers_code" end - create_table "embl_cross_references", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "uniprot_entry_id", null: false, unsigned: true - t.string "protein_id", limit: 25 - t.string "sequence_id", limit: 25 - end - - create_table "go_cross_references", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "uniprot_entry_id", null: false, unsigned: true + create_table "go_cross_references", id: :bigint, default: nil, force: :cascade do |t| + t.bigint "uniprot_entry_id", null: false t.string "go_term_code", limit: 15, null: false + t.index ["uniprot_entry_id"], name: "idx_go_cross_references_uniprot_entry_id" end - create_table "go_terms", id: :integer, unsigned: true, force: :cascade do |t| + create_table "go_terms", id: :integer, default: nil, force: :cascade do |t| t.string "code", limit: 15, null: false - t.string "namespace", limit: 18, null: false + t.enum "namespace", null: false, enum_type: "go_namespace" t.string "name", limit: 200, null: false + t.index ["code"], name: "idx_go_terms_code" end - create_table "interpro_cross_references", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "uniprot_entry_id", null: false, unsigned: true + create_table "interpro_cross_references", id: :bigint, default: nil, force: :cascade do |t| + t.bigint "uniprot_entry_id", null: false t.string "interpro_entry_code", limit: 9, null: false + t.index ["uniprot_entry_id"], name: "idx_interpro_cross_references_uniprot_entry_id" end - create_table "interpro_entries", id: :integer, unsigned: true, force: :cascade do |t| + create_table "interpro_entries", id: :integer, default: nil, force: :cascade do |t| t.string "code", limit: 9, null: false t.string "category", limit: 32, null: false t.string "name", limit: 160, null: false end - create_table "lineages", primary_key: "taxon_id", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| - t.integer "superkingdom", limit: 3 - t.integer "kingdom", limit: 3 - t.integer "subkingdom", limit: 3 - t.integer "superphylum", limit: 3 - t.integer "phylum", limit: 3 - t.integer "subphylum", limit: 3 - t.integer "superclass", limit: 3 - t.integer "class", limit: 3 - t.integer "subclass", limit: 3 - t.integer "superorder", limit: 3 - t.integer "order", limit: 3 - t.integer "suborder", limit: 3 - t.integer "infraorder", limit: 3 - t.integer "superfamily", limit: 3 - t.integer "family", limit: 3 - t.integer "subfamily", limit: 3 - t.integer "tribe", limit: 3 - t.integer "subtribe", limit: 3 - t.integer "genus", limit: 3 - t.integer "subgenus", limit: 3 - t.integer "species_group", limit: 3 - t.integer "species_subgroup", limit: 3 - t.integer "species", limit: 3 - t.integer "subspecies", limit: 3 - t.integer "strain", limit: 3 - t.integer "varietas", limit: 3 - t.integer "forma", limit: 3 - end - - create_table "peptides", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "sequence_id", null: false, unsigned: true - t.integer "original_sequence_id", null: false, unsigned: true - t.integer "uniprot_entry_id", null: false, unsigned: true - end - - create_table "posts", id: :integer, unsigned: true, force: :cascade do |t| - t.string "title", limit: 100, null: false - t.text "content", null: false - t.date "date", null: false - end - - create_table "proteome_caches", primary_key: "proteome_id", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| - t.text "json_sequences", limit: 16777215, null: false - end - - create_table "proteome_cross_references", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "uniprot_entry_id", null: false, unsigned: true - t.integer "proteome_id", limit: 3, null: false, unsigned: true - end - - create_table "proteomes", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| - t.string "proteome_accession_number", limit: 12, null: false - t.string "proteome_name", limit: 145, null: false - t.integer "taxon_id", limit: 3, unsigned: true - t.binary "type_strain", limit: 1, default: 0b0, null: false - t.binary "reference_proteome", limit: 1, default: 0b0, null: false - t.string "strain", limit: 120 - t.string "assembly", limit: 45 - t.string "name", limit: 225 - end - - create_table "refseq_cross_references", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "uniprot_entry_id", null: false, unsigned: true - t.string "protein_id", limit: 25 - t.string "sequence_id", limit: 25 - end - - create_table "sequences", id: :integer, unsigned: true, force: :cascade do |t| + create_table "lineages", primary_key: "taxon_id", id: :integer, default: nil, force: :cascade do |t| + t.integer "superkingdom" + t.integer "kingdom" + t.integer "subkingdom" + t.integer "superphylum" + t.integer "phylum" + t.integer "subphylum" + t.integer "superclass" + t.integer "class" + t.integer "subclass" + t.integer "superorder" + t.integer "order" + t.integer "suborder" + t.integer "infraorder" + t.integer "superfamily" + t.integer "family" + t.integer "subfamily" + t.integer "tribe" + t.integer "subtribe" + t.integer "genus" + t.integer "subgenus" + t.integer "species_group" + t.integer "species_subgroup" + t.integer "species" + t.integer "subspecies" + t.integer "strain" + t.integer "varietas" + t.integer "forma" + end + + create_table "peptides", id: :bigint, default: nil, force: :cascade do |t| + t.bigint "sequence_id", null: false + t.bigint "original_sequence_id", null: false + t.bigint "uniprot_entry_id", null: false + t.index ["original_sequence_id"], name: "idx_peptides_original_sequence_id" + t.index ["sequence_id"], name: "idx_peptides_sequence_id" + t.index ["uniprot_entry_id"], name: "idx_peptides_uniprot_entry_id" + end + + create_table "sequences", id: :bigint, default: nil, force: :cascade do |t| t.string "sequence", limit: 50, null: false - t.integer "lca", limit: 3, unsigned: true - t.integer "lca_il", limit: 3, unsigned: true - t.binary "fa", limit: 16777215 - t.binary "fa_il", limit: 16777215 + t.integer "lca" + t.integer "lca_il" + t.binary "fa" + t.binary "fa_il" + t.index ["lca"], name: "idx_sequences_lca" + t.index ["lca_il"], name: "idx_sequences_lca_il" + t.index ["sequence"], name: "idx_sequences_sequence" end - create_table "taxons", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| + create_table "taxons", id: :integer, default: nil, force: :cascade do |t| t.string "name", limit: 120, null: false - t.string "rank", limit: 16 - t.integer "parent_id", limit: 3, unsigned: true - t.binary "valid_taxon", limit: 1, default: 0b1, null: false + t.enum "rank", enum_type: "rank_type" + t.integer "parent_id" + t.integer "valid_taxon", limit: 2, default: 1, null: false end - create_table "uniprot_entries", id: :integer, unsigned: true, force: :cascade do |t| - t.string "uniprot_accession_number", limit: 10, null: false, collation: "latin1_swedish_ci" - t.integer "version", limit: 2, null: false, unsigned: true - t.integer "taxon_id", limit: 3, null: false, unsigned: true - t.string "type", limit: 9, null: false + create_table "uniprot_entries", id: :integer, default: nil, force: :cascade do |t| + t.string "uniprot_accession_number", limit: 10, null: false + t.integer "version", null: false + t.integer "taxon_id", null: false + t.enum "type", null: false, enum_type: "db_type" t.string "name", limit: 150, null: false t.text "protein", null: false - end - - create_table "users", id: :integer, force: :cascade do |t| - t.string "username", limit: 8, null: false - t.integer "admin", limit: 1, default: 0, null: false + t.index ["taxon_id"], name: "idx_uniprot_entries_taxon_id" + t.index ["uniprot_accession_number"], name: "idx_uniprot_entries_uniprot_accession_number" end add_foreign_key "dataset_items", "datasets", name: "fk_dataset_items_datasets" From f6f8608ba27fb5fdfe9e6dca6a30c85468bea400 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 16:11:45 +0200 Subject: [PATCH 08/11] Add missing table --- db/schema.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/db/schema.rb b/db/schema.rb index 29a7054..3acef0d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -139,5 +139,10 @@ t.index ["uniprot_accession_number"], name: "idx_uniprot_entries_uniprot_accession_number" end + create_table "users", id: :integer, force: :cascade do |t| + t.string "username", limit: 8, null: false + t.integer "admin", limit: 1, default: 0, null: false + end + add_foreign_key "dataset_items", "datasets", name: "fk_dataset_items_datasets" end From edc40faa471c6876dfb0f0ca1e5571e66918dcf1 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 16:18:20 +0200 Subject: [PATCH 09/11] Add auto increment --- db/schema.rb | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index 3acef0d..94adb57 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -20,58 +20,58 @@ create_enum "go_namespace", ["biological process", "molecular function", "cellular component"] create_enum "rank_type", ["no rank", "superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum", "superclass", "class", "subclass", "superorder", "order", "suborder", "infraorder", "superfamily", "family", "subfamily", "tribe", "subtribe", "genus", "subgenus", "species group", "species subgroup", "species", "subspecies", "strain", "varietas", "forma"] - create_table "dataset_items", id: :integer, default: nil, force: :cascade do |t| + create_table "dataset_items", id: :integer, auto_increment: true, force: :cascade do |t| t.bigint "dataset_id" t.string "name", limit: 160 t.text "data", null: false t.integer "order" end - create_table "datasets", id: :integer, default: nil, force: :cascade do |t| + create_table "datasets", id: :integer, auto_increment: true, force: :cascade do |t| t.string "environment", limit: 160 t.string "reference", limit: 500 t.string "url", limit: 200 t.string "project_website", limit: 200 end - create_table "ec_cross_references", id: :bigint, default: nil, force: :cascade do |t| + create_table "ec_cross_references", id: :bigint, auto_increment: true, force: :cascade do |t| t.bigint "uniprot_entry_id", null: false t.string "ec_number_code", limit: 15, null: false t.index ["uniprot_entry_id"], name: "idx_ec_cross_references_uniprot_entry_id" end - create_table "ec_numbers", id: :integer, default: nil, force: :cascade do |t| + create_table "ec_numbers", id: :integer, auto_increment: true, force: :cascade do |t| t.string "code", limit: 15, null: false t.string "name", limit: 155, null: false t.index ["code"], name: "idx_ec_numbers_code" end - create_table "go_cross_references", id: :bigint, default: nil, force: :cascade do |t| + create_table "go_cross_references", id: :bigint, auto_increment: true, force: :cascade do |t| t.bigint "uniprot_entry_id", null: false t.string "go_term_code", limit: 15, null: false t.index ["uniprot_entry_id"], name: "idx_go_cross_references_uniprot_entry_id" end - create_table "go_terms", id: :integer, default: nil, force: :cascade do |t| + create_table "go_terms", id: :integer, auto_increment: true, force: :cascade do |t| t.string "code", limit: 15, null: false t.enum "namespace", null: false, enum_type: "go_namespace" t.string "name", limit: 200, null: false t.index ["code"], name: "idx_go_terms_code" end - create_table "interpro_cross_references", id: :bigint, default: nil, force: :cascade do |t| + create_table "interpro_cross_references", id: :bigint, auto_increment: true, force: :cascade do |t| t.bigint "uniprot_entry_id", null: false t.string "interpro_entry_code", limit: 9, null: false t.index ["uniprot_entry_id"], name: "idx_interpro_cross_references_uniprot_entry_id" end - create_table "interpro_entries", id: :integer, default: nil, force: :cascade do |t| + create_table "interpro_entries", id: :integer, auto_increment: true, force: :cascade do |t| t.string "code", limit: 9, null: false t.string "category", limit: 32, null: false t.string "name", limit: 160, null: false end - create_table "lineages", primary_key: "taxon_id", id: :integer, default: nil, force: :cascade do |t| + create_table "lineages", primary_key: "taxon_id", id: :integer, auto_increment: true, force: :cascade do |t| t.integer "superkingdom" t.integer "kingdom" t.integer "subkingdom" @@ -101,7 +101,7 @@ t.integer "forma" end - create_table "peptides", id: :bigint, default: nil, force: :cascade do |t| + create_table "peptides", id: :bigint, auto_increment: true, force: :cascade do |t| t.bigint "sequence_id", null: false t.bigint "original_sequence_id", null: false t.bigint "uniprot_entry_id", null: false @@ -110,7 +110,7 @@ t.index ["uniprot_entry_id"], name: "idx_peptides_uniprot_entry_id" end - create_table "sequences", id: :bigint, default: nil, force: :cascade do |t| + create_table "sequences", id: :bigint, auto_increment: true, force: :cascade do |t| t.string "sequence", limit: 50, null: false t.integer "lca" t.integer "lca_il" @@ -121,14 +121,14 @@ t.index ["sequence"], name: "idx_sequences_sequence" end - create_table "taxons", id: :integer, default: nil, force: :cascade do |t| + create_table "taxons", id: :integer, auto_increment: true, force: :cascade do |t| t.string "name", limit: 120, null: false t.enum "rank", enum_type: "rank_type" t.integer "parent_id" t.integer "valid_taxon", limit: 2, default: 1, null: false end - create_table "uniprot_entries", id: :integer, default: nil, force: :cascade do |t| + create_table "uniprot_entries", id: :integer, auto_increment: true, force: :cascade do |t| t.string "uniprot_accession_number", limit: 10, null: false t.integer "version", null: false t.integer "taxon_id", null: false From 99f5fb752194bc8235e405305c5790d94e3e93db Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 16:20:18 +0200 Subject: [PATCH 10/11] Remove auto increment --- db/schema.rb | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index 94adb57..44ecd03 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -20,58 +20,58 @@ create_enum "go_namespace", ["biological process", "molecular function", "cellular component"] create_enum "rank_type", ["no rank", "superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum", "superclass", "class", "subclass", "superorder", "order", "suborder", "infraorder", "superfamily", "family", "subfamily", "tribe", "subtribe", "genus", "subgenus", "species group", "species subgroup", "species", "subspecies", "strain", "varietas", "forma"] - create_table "dataset_items", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "dataset_items", id: :integer, unsigned: true, force: :cascade do |t| t.bigint "dataset_id" t.string "name", limit: 160 t.text "data", null: false t.integer "order" end - create_table "datasets", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "datasets", id: :integer, unsigned: true, force: :cascade do |t| t.string "environment", limit: 160 t.string "reference", limit: 500 t.string "url", limit: 200 t.string "project_website", limit: 200 end - create_table "ec_cross_references", id: :bigint, auto_increment: true, force: :cascade do |t| + create_table "ec_cross_references", id: :bigint, force: :cascade do |t| t.bigint "uniprot_entry_id", null: false t.string "ec_number_code", limit: 15, null: false t.index ["uniprot_entry_id"], name: "idx_ec_cross_references_uniprot_entry_id" end - create_table "ec_numbers", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "ec_numbers", id: :integer, unsigned: true, force: :cascade do |t| t.string "code", limit: 15, null: false t.string "name", limit: 155, null: false t.index ["code"], name: "idx_ec_numbers_code" end - create_table "go_cross_references", id: :bigint, auto_increment: true, force: :cascade do |t| + create_table "go_cross_references", id: :bigint, force: :cascade do |t| t.bigint "uniprot_entry_id", null: false t.string "go_term_code", limit: 15, null: false t.index ["uniprot_entry_id"], name: "idx_go_cross_references_uniprot_entry_id" end - create_table "go_terms", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "go_terms", id: :integer, unsigned: true, force: :cascade do |t| t.string "code", limit: 15, null: false t.enum "namespace", null: false, enum_type: "go_namespace" t.string "name", limit: 200, null: false t.index ["code"], name: "idx_go_terms_code" end - create_table "interpro_cross_references", id: :bigint, auto_increment: true, force: :cascade do |t| + create_table "interpro_cross_references", id: :bigint, force: :cascade do |t| t.bigint "uniprot_entry_id", null: false t.string "interpro_entry_code", limit: 9, null: false t.index ["uniprot_entry_id"], name: "idx_interpro_cross_references_uniprot_entry_id" end - create_table "interpro_entries", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "interpro_entries", id: :integer, unsigned: true, force: :cascade do |t| t.string "code", limit: 9, null: false t.string "category", limit: 32, null: false t.string "name", limit: 160, null: false end - create_table "lineages", primary_key: "taxon_id", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "lineages", primary_key: "taxon_id", id: :integer, unsigned: true, force: :cascade do |t| t.integer "superkingdom" t.integer "kingdom" t.integer "subkingdom" @@ -101,7 +101,7 @@ t.integer "forma" end - create_table "peptides", id: :bigint, auto_increment: true, force: :cascade do |t| + create_table "peptides", id: :bigint, force: :cascade do |t| t.bigint "sequence_id", null: false t.bigint "original_sequence_id", null: false t.bigint "uniprot_entry_id", null: false @@ -110,7 +110,7 @@ t.index ["uniprot_entry_id"], name: "idx_peptides_uniprot_entry_id" end - create_table "sequences", id: :bigint, auto_increment: true, force: :cascade do |t| + create_table "sequences", id: :bigint, force: :cascade do |t| t.string "sequence", limit: 50, null: false t.integer "lca" t.integer "lca_il" @@ -121,14 +121,14 @@ t.index ["sequence"], name: "idx_sequences_sequence" end - create_table "taxons", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "taxons", id: :integer, unsigned: true, force: :cascade do |t| t.string "name", limit: 120, null: false t.enum "rank", enum_type: "rank_type" t.integer "parent_id" t.integer "valid_taxon", limit: 2, default: 1, null: false end - create_table "uniprot_entries", id: :integer, auto_increment: true, force: :cascade do |t| + create_table "uniprot_entries", id: :integer, unsigned: true, force: :cascade do |t| t.string "uniprot_accession_number", limit: 10, null: false t.integer "version", null: false t.integer "taxon_id", null: false @@ -139,7 +139,7 @@ t.index ["uniprot_accession_number"], name: "idx_uniprot_entries_uniprot_accession_number" end - create_table "users", id: :integer, force: :cascade do |t| + create_table "users", id: :integer, unsigned: true, force: :cascade do |t| t.string "username", limit: 8, null: false t.integer "admin", limit: 1, default: 0, null: false end From 16ce1f652e34ef00e2f80bf8702d473dd74e7cb8 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Mon, 6 May 2024 16:29:54 +0200 Subject: [PATCH 11/11] Manually add missing things back --- db/schema.rb | 128 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 48 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index 44ecd03..c3e4150 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -21,7 +21,7 @@ create_enum "rank_type", ["no rank", "superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum", "superclass", "class", "subclass", "superorder", "order", "suborder", "infraorder", "superfamily", "family", "subfamily", "tribe", "subtribe", "genus", "subgenus", "species group", "species subgroup", "species", "subspecies", "strain", "varietas", "forma"] create_table "dataset_items", id: :integer, unsigned: true, force: :cascade do |t| - t.bigint "dataset_id" + t.bigint "dataset_id", unsigned: true t.string "name", limit: 160 t.text "data", null: false t.integer "order" @@ -34,8 +34,8 @@ t.string "project_website", limit: 200 end - create_table "ec_cross_references", id: :bigint, force: :cascade do |t| - t.bigint "uniprot_entry_id", null: false + create_table "ec_cross_references", id: :bigint, unsigned: true, force: :cascade do |t| + t.bigint "uniprot_entry_id", null: false, unsigned: true t.string "ec_number_code", limit: 15, null: false t.index ["uniprot_entry_id"], name: "idx_ec_cross_references_uniprot_entry_id" end @@ -46,8 +46,8 @@ t.index ["code"], name: "idx_ec_numbers_code" end - create_table "go_cross_references", id: :bigint, force: :cascade do |t| - t.bigint "uniprot_entry_id", null: false + create_table "go_cross_references", id: :bigint, unsigned: true, force: :cascade do |t| + t.bigint "uniprot_entry_id", null: false, unsigned: true t.string "go_term_code", limit: 15, null: false t.index ["uniprot_entry_id"], name: "idx_go_cross_references_uniprot_entry_id" end @@ -59,8 +59,8 @@ t.index ["code"], name: "idx_go_terms_code" end - create_table "interpro_cross_references", id: :bigint, force: :cascade do |t| - t.bigint "uniprot_entry_id", null: false + create_table "interpro_cross_references", id: :bigint, unsigned: true, force: :cascade do |t| + t.bigint "uniprot_entry_id", null: false, unsigned: true t.string "interpro_entry_code", limit: 9, null: false t.index ["uniprot_entry_id"], name: "idx_interpro_cross_references_uniprot_entry_id" end @@ -72,50 +72,82 @@ end create_table "lineages", primary_key: "taxon_id", id: :integer, unsigned: true, force: :cascade do |t| - t.integer "superkingdom" - t.integer "kingdom" - t.integer "subkingdom" - t.integer "superphylum" - t.integer "phylum" - t.integer "subphylum" - t.integer "superclass" - t.integer "class" - t.integer "subclass" - t.integer "superorder" - t.integer "order" - t.integer "suborder" - t.integer "infraorder" - t.integer "superfamily" - t.integer "family" - t.integer "subfamily" - t.integer "tribe" - t.integer "subtribe" - t.integer "genus" - t.integer "subgenus" - t.integer "species_group" - t.integer "species_subgroup" - t.integer "species" - t.integer "subspecies" - t.integer "strain" - t.integer "varietas" - t.integer "forma" - end - - create_table "peptides", id: :bigint, force: :cascade do |t| - t.bigint "sequence_id", null: false - t.bigint "original_sequence_id", null: false - t.bigint "uniprot_entry_id", null: false + t.integer "superkingdom", limit: 3 + t.integer "kingdom", limit: 3 + t.integer "subkingdom", limit: 3 + t.integer "superphylum", limit: 3 + t.integer "phylum", limit: 3 + t.integer "subphylum", limit: 3 + t.integer "superclass", limit: 3 + t.integer "class", limit: 3 + t.integer "subclass", limit: 3 + t.integer "superorder", limit: 3 + t.integer "order", limit: 3 + t.integer "suborder", limit: 3 + t.integer "infraorder", limit: 3 + t.integer "superfamily", limit: 3 + t.integer "family", limit: 3 + t.integer "subfamily", limit: 3 + t.integer "tribe", limit: 3 + t.integer "subtribe", limit: 3 + t.integer "genus", limit: 3 + t.integer "subgenus", limit: 3 + t.integer "species_group", limit: 3 + t.integer "species_subgroup", limit: 3 + t.integer "species", limit: 3 + t.integer "subspecies", limit: 3 + t.integer "strain", limit: 3 + t.integer "varietas", limit: 3 + t.integer "forma", limit: 3 + end + + create_table "peptides", id: :bigint, unsigned: true, force: :cascade do |t| + t.bigint "sequence_id", null: false, unsigned: true + t.bigint "original_sequence_id", null: false, unsigned: true + t.bigint "uniprot_entry_id", null: false, unsigned: true t.index ["original_sequence_id"], name: "idx_peptides_original_sequence_id" t.index ["sequence_id"], name: "idx_peptides_sequence_id" t.index ["uniprot_entry_id"], name: "idx_peptides_uniprot_entry_id" end - create_table "sequences", id: :bigint, force: :cascade do |t| + create_table "posts", id: :integer, unsigned: true, force: :cascade do |t| + t.string "title", limit: 100, null: false + t.text "content", null: false + t.date "date", null: false + end + + create_table "proteome_caches", primary_key: "proteome_id", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| + t.text "json_sequences", limit: 16777215, null: false + end + + create_table "proteome_cross_references", id: :integer, unsigned: true, force: :cascade do |t| + t.integer "uniprot_entry_id", null: false, unsigned: true + t.integer "proteome_id", limit: 3, null: false, unsigned: true + end + + create_table "proteomes", id: :integer, limit: 3, unsigned: true, default: nil, force: :cascade do |t| + t.string "proteome_accession_number", limit: 12, null: false + t.string "proteome_name", limit: 145, null: false + t.integer "taxon_id", limit: 3, unsigned: true + t.binary "type_strain", limit: 1, default: 0b0, null: false + t.binary "reference_proteome", limit: 1, default: 0b0, null: false + t.string "strain", limit: 120 + t.string "assembly", limit: 45 + t.string "name", limit: 225 + end + + create_table "refseq_cross_references", id: :integer, unsigned: true, force: :cascade do |t| + t.integer "uniprot_entry_id", null: false, unsigned: true + t.string "protein_id", limit: 25 + t.string "sequence_id", limit: 25 + end + + create_table "sequences", id: :bigint, unsigned: true, force: :cascade do |t| t.string "sequence", limit: 50, null: false - t.integer "lca" - t.integer "lca_il" - t.binary "fa" - t.binary "fa_il" + t.integer "lca", unsigned: true + t.integer "lca_il", unsigned: true + t.binary "fa", limit: 16777215 + t.binary "fa_il", limit: 16777215 t.index ["lca"], name: "idx_sequences_lca" t.index ["lca_il"], name: "idx_sequences_lca_il" t.index ["sequence"], name: "idx_sequences_sequence" @@ -124,14 +156,14 @@ create_table "taxons", id: :integer, unsigned: true, force: :cascade do |t| t.string "name", limit: 120, null: false t.enum "rank", enum_type: "rank_type" - t.integer "parent_id" + t.integer "parent_id", limit: 3, unsigned: true t.integer "valid_taxon", limit: 2, default: 1, null: false end create_table "uniprot_entries", id: :integer, unsigned: true, force: :cascade do |t| t.string "uniprot_accession_number", limit: 10, null: false - t.integer "version", null: false - t.integer "taxon_id", null: false + t.integer "version", limit: 2, null: false, unsigned: true + t.integer "taxon_id", limit: 3, null: false, unsigned: true t.enum "type", null: false, enum_type: "db_type" t.string "name", limit: 150, null: false t.text "protein", null: false