diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
deleted file mode 100644
index 158cb1dc..00000000
--- a/.devcontainer/devcontainer.json
+++ /dev/null
@@ -1,18 +0,0 @@
-// For format details, see https://aka.ms/devcontainer.json. For config options, see the
-// README at: https://github.com/devcontainers/templates/tree/main/src/ruby
-{
- "name": "Ruby",
- // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
- "image": "mcr.microsoft.com/devcontainers/ruby:1-3.2-bookworm",
- // Features to add to the dev container. More info: https://containers.dev/features.
- // "features": {},
- // Use 'forwardPorts' to make a list of ports inside the container available locally.
- // "forwardPorts": [],
- // Use 'postCreateCommand' to run commands after the container is created.
- "postCreateCommand": "ruby --version",
- "postStartCommand": "bundle install"
- // Configure tool-specific properties.
- // "customizations": {},
- // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
- // "remoteUser": "root"
-}
diff --git a/.document b/.document
deleted file mode 100644
index 3d618dd8..00000000
--- a/.document
+++ /dev/null
@@ -1,5 +0,0 @@
-lib/**/*.rb
-bin/*
--
-features/**/*.feature
-LICENSE.txt
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 273acce5..2cb4ced5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,32 +3,38 @@ name: CI
on: [push]
jobs:
- test:
+ lint:
+ name: Lint
runs-on: ubuntu-latest
- strategy:
- matrix:
- ruby: ["2.7", "3.0", "3.1", "3.2"]
- fail-fast: false
- name: Test Ruby ${{ matrix.ruby }}
steps:
- - uses: actions/checkout@v1
- - name: Setup Ruby
- uses: ruby/setup-ruby@v1
+ - uses: actions/checkout@v4
+ - name: Set up Node
+ uses: actions/setup-node@v4
with:
- ruby-version: ${{ matrix.ruby }}
- bundler-cache: true
- - name: Test
+ node-version: 22.x
+ cache: yarn
+ - name: Install dependencies
run: |
- bundle exec rake test
- lint:
- name: Lint
+ yarn install
+ - name: Lint
+ run: |
+ yarn lint
+ - name: Type check
+ run: |
+ yarn typecheck
+ test:
+ name: Test
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v1
- - name: Setup Ruby
- uses: ruby/setup-ruby@v1
+ - uses: actions/checkout@v4
+ - name: Set up Node
+ uses: actions/setup-node@v4
with:
- bundler-cache: true
+ node-version: 22.x
+ cache: yarn
+ - name: Install dependencies
+ run: |
+ yarn install
- name: Test
run: |
- bundle exec rake test_style
+ yarn test
diff --git a/.gitignore b/.gitignore
index bd5f33a8..bc79950e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,3 +47,6 @@ pkg
# For rubinius:
#*.rbc
+
+node_modules/**
+dist/**
diff --git a/.nvmrc b/.nvmrc
new file mode 100644
index 00000000..2bd5a0a9
--- /dev/null
+++ b/.nvmrc
@@ -0,0 +1 @@
+22
diff --git a/.rakeTasks b/.rakeTasks
deleted file mode 100644
index e4870ce1..00000000
--- a/.rakeTasks
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
diff --git a/.rubocop.yml b/.rubocop.yml
deleted file mode 100644
index 02bc2c2a..00000000
--- a/.rubocop.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-AllCops:
- TargetRubyVersion: 2.7
- NewCops: enable
- Exclude:
- - 'unipept.gemspec'
- - 'vendor/**/*'
- - 'Rakefile'
-
-Style/ClassAndModuleChildren:
- EnforcedStyle: compact
- Enabled: false
-Style/FrozenStringLiteralComment:
- Enabled: false
-
-# disable for now
-Lint/NestedMethodDefinition:
- Enabled: false
-Lint/Lint/RedundantStringCoercion:
- Enabled: false
-Style/ClassVars:
- Enabled: false
-Style/Documentation:
- Enabled: false
-Style/RescueModifier:
- Enabled: false
-Metrics/AbcSize:
- Enabled: false
-Metrics/ClassLength:
- Enabled: false
-Layout/LineLength:
- Enabled: false
-Metrics/MethodLength:
- Enabled: false
-Metrics/BlockLength:
- Enabled: false
-Metrics/CyclomaticComplexity:
- Enabled: false
-Metrics/PerceivedComplexity:
- Enabled: false
-Naming/HeredocDelimiterNaming:
- Enabled: false
-Naming/MethodParameterName:
- Enabled: false
diff --git a/.ruby-version b/.ruby-version
deleted file mode 100644
index 4a36342f..00000000
--- a/.ruby-version
+++ /dev/null
@@ -1 +0,0 @@
-3.0.0
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
deleted file mode 100644
index ee038b55..00000000
--- a/.vscode/tasks.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
- "version": "2.0.0",
- "tasks": [
- {
- "type": "rake",
- "task": "test_style:autocorrect_all",
- "group": "test",
- "problemMatcher": [],
- "label": "rake: test_style:autocorrect_all"
- },
- {
- "type": "rake",
- "task": "test",
- "group": {
- "kind": "test",
- "isDefault": true
- },
- "problemMatcher": [],
- "label": "rake: test"
- },
- {
- "type": "shell",
- "command": "bundle exec rake gemspec",
- "label": "generate gemspec",
- "problemMatcher": []
- },
- {
- "type": "shell",
- "command": "BRANCH=main bundle exec rake release",
- "label": "release gem",
- "problemMatcher": []
- }
- ]
-}
diff --git a/Gemfile b/Gemfile
deleted file mode 100644
index b9a8aa44..00000000
--- a/Gemfile
+++ /dev/null
@@ -1,18 +0,0 @@
-source 'http://rubygems.org'
-# Add dependencies required to use your gem here.
-
-gem 'cri', '~>2.15.10'
-gem 'typhoeus', '~> 1.4.0'
-
-# this is not explicitly used, but an older incompatible version comes bundled with ruby
-gem 'psych', '~> 5.1.0'
-
-group :development do
- gem 'minitest', '~> 5.18'
- gem 'rake', '~> 13.0.1'
- gem 'rubocop', '~>1.50'
-end
-
-group :release do
- gem 'juwelier'
-end
diff --git a/Gemfile.lock b/Gemfile.lock
deleted file mode 100644
index 85dda46b..00000000
--- a/Gemfile.lock
+++ /dev/null
@@ -1,109 +0,0 @@
-GEM
- remote: http://rubygems.org/
- specs:
- addressable (2.8.0)
- public_suffix (>= 2.0.2, < 5.0)
- ast (2.4.2)
- builder (3.2.4)
- cri (2.15.11)
- descendants_tracker (0.0.4)
- thread_safe (~> 0.3, >= 0.3.1)
- ethon (0.16.0)
- ffi (>= 1.15.0)
- faraday (1.3.0)
- faraday-net_http (~> 1.0)
- multipart-post (>= 1.2, < 3)
- ruby2_keywords
- faraday-net_http (1.0.1)
- ffi (1.15.5)
- git (1.18.0)
- addressable (~> 2.8)
- rchardet (~> 1.8)
- github_api (0.19.0)
- addressable (~> 2.4)
- descendants_tracker (~> 0.0.4)
- faraday (>= 0.8, < 2)
- hashie (~> 3.5, >= 3.5.2)
- oauth2 (~> 1.0)
- hashie (3.6.0)
- highline (2.0.3)
- json (2.6.3)
- juwelier (2.4.9)
- builder
- bundler
- git
- github_api
- highline
- kamelcase (~> 0)
- nokogiri
- psych
- rake
- rdoc
- semver2
- jwt (2.2.2)
- kamelcase (0.0.2)
- semver2 (~> 3)
- mini_portile2 (2.8.0)
- minitest (5.18.1)
- multi_json (1.15.0)
- multi_xml (0.6.0)
- multipart-post (2.1.1)
- nokogiri (1.13.10)
- mini_portile2 (~> 2.8.0)
- racc (~> 1.4)
- oauth2 (1.4.11)
- faraday (>= 0.17.3, < 3.0)
- jwt (>= 1.0, < 3.0)
- multi_json (~> 1.3)
- multi_xml (~> 0.5)
- rack (>= 1.2, < 4)
- parallel (1.23.0)
- parser (3.2.2.1)
- ast (~> 2.4.1)
- psych (5.1.0)
- stringio
- public_suffix (4.0.6)
- racc (1.6.1)
- rack (3.0.7)
- rainbow (3.1.1)
- rake (13.0.6)
- rchardet (1.8.0)
- rdoc (6.4.0)
- psych (>= 4.0.0)
- regexp_parser (2.8.0)
- rexml (3.2.5)
- rubocop (1.50.2)
- json (~> 2.3)
- parallel (~> 1.10)
- parser (>= 3.2.0.0)
- rainbow (>= 2.2.2, < 4.0)
- regexp_parser (>= 1.8, < 3.0)
- rexml (>= 3.2.5, < 4.0)
- rubocop-ast (>= 1.28.0, < 2.0)
- ruby-progressbar (~> 1.7)
- unicode-display_width (>= 2.4.0, < 3.0)
- rubocop-ast (1.28.0)
- parser (>= 3.2.1.0)
- ruby-progressbar (1.13.0)
- ruby2_keywords (0.0.4)
- semver2 (3.4.2)
- stringio (3.0.6)
- thread_safe (0.3.6)
- typhoeus (1.4.0)
- ethon (>= 0.9.0)
- unicode-display_width (2.4.2)
-
-PLATFORMS
- ruby
-
-DEPENDENCIES
- cri (~> 2.15.10)
- juwelier
- minitest (~> 5.18)
- psych (~> 5.1.0)
- rake (~> 13.0.1)
- rubocop (~> 1.50)
- typhoeus (~> 1.4.0)
-
-BUNDLED WITH
- 2.4.12
diff --git a/README.md b/README.md
index 702999cb..6a30e893 100644
--- a/README.md
+++ b/README.md
@@ -1,36 +1,33 @@
# unipept-cli
-[![Gem Version](https://badge.fury.io/rb/unipept.svg)](http://badge.fury.io/rb/unipept)
+![NPM Version](https://img.shields.io/npm/v/unipept-cli)
Unipept-cli offers a command line interface to the [Unipept](http://unipept.ugent.be) web service.
Documentation about the web service can be found at [http://unipept.ugent.be/apidocs](http://unipept.ugent.be/apidocs), documentation about the command line tools at [http://unipept.ugent.be/clidocs](http://unipept.ugent.be/clidocs).
## Installation
-To use the Unipept CLI, Ruby version 2.7 or higher needs to be installed. You can check this by running `ruby -v` on the commandline:
+To use the Unipept CLI, node 22 or higher needs to be installed. You can check this by running `node -v` on the commandline:
```
-$ ruby -v
-ruby 3.0.0p0 (2020-12-25 revision 95aff21468) [arm64-darwin21]
+$ node -v
+v22.3.0
```
-More information on installing Ruby can be found at https://www.ruby-lang.org/en/installation/
+More information on installing Node can be found at https://nodejs.org/en/download/package-manager
-The Unipept CLI is available as a _gem_. This means it can easily be installed with the following command:
+The Unipept CLI is available as an npm package. This means it can easily be installed with the following command:
```bash
-$ gem install unipept
-Successfully installed unipept-1.0.1
-Parsing documentation for unipept-1.0.1
-Done installing documentation for unipept after 0 seconds
-1 gem installed
+$ npm install -g unipept-cli
+added 3 packages in 986ms
```
After successful installation, the unipept command should be available:
```bash
-$ unipept -v
-1.0.1
+$ unipept --version
+4.0.0
```
The help can be accessed by running `unipept -h`.
diff --git a/Rakefile b/Rakefile
deleted file mode 100644
index 35780f89..00000000
--- a/Rakefile
+++ /dev/null
@@ -1,58 +0,0 @@
-require 'rubygems'
-require 'bundler'
-begin
- Bundler.setup(:default, :development)
-rescue Bundler::BundlerError => e
- warn e.message
- warn 'Run `bundle install` to install missing gems'
- exit e.status_code
-end
-require 'rake'
-require 'rake/testtask'
-require 'rubocop/rake_task'
-begin
- require 'juwelier'
- Juwelier::Tasks.new do |gem|
- # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
- gem.name = 'unipept'
- gem.executables = %w[unipept prot2pept peptfilter uniprot]
- gem.homepage = 'http://unipept.ugent.be'
- gem.license = 'MIT'
- gem.summary = 'Command line interface to Unipept web services.'
- gem.description = <<-EOS
- Command line interface to the Unipept (http://unipept.ugent.be) web services
- (pept2lca, taxa2lca, pept2taxa, pept2prot and taxonomy) and some utility
- commands for handling proteins using the command line.
- EOS
- gem.email = 'unipept@ugent.be'
- gem.authors = ['Bart Mesuere', 'Pieter Verschaffelt', 'Tibo Vande Moortele', 'Toon Willems', 'Tom Naessens']
- gem.required_ruby_version = '>= 2.7.0'
- end
- Juwelier::RubygemsDotOrgTasks.new
-rescue LoadError
- # do nothing
-end
-
-task :test_unit do
- require './test/helper.rb'
-
- FileList['./test/**/test_*.rb', './test/**/*_spec.rb'].each do |fn|
- require fn
- end
-end
-
-RuboCop::RakeTask.new(:test_style)
-
-task test: %i[test_unit]
-
-task default: :test
-
-require 'rdoc/task'
-Rake::RDocTask.new do |rdoc|
- version = File.exist?('VERSION') ? File.read('VERSION') : ''
-
- rdoc.rdoc_dir = 'rdoc'
- rdoc.title = "unipept #{version}"
- rdoc.rdoc_files.include('README*')
- rdoc.rdoc_files.include('lib/**/*.rb')
-end
diff --git a/VERSION b/VERSION
deleted file mode 100644
index fd2a0186..00000000
--- a/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-3.1.0
diff --git a/bin/peptfilter b/bin/peptfilter
deleted file mode 100755
index 1d98950a..00000000
--- a/bin/peptfilter
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../lib/commands'
-
-Signal.trap('PIPE', 'EXIT') if Signal.list.include? 'PIPE'
-Signal.trap('INT', 'EXIT') if Signal.list.include? 'INT'
-
-Unipept::Commands::Peptfilter.run(ARGV)
diff --git a/bin/peptfilter.ts b/bin/peptfilter.ts
new file mode 100755
index 00000000..007dfdba
--- /dev/null
+++ b/bin/peptfilter.ts
@@ -0,0 +1,6 @@
+#!/usr/bin/env node
+
+import { Peptfilter } from '../lib/commands/peptfilter.js';
+
+const command = new Peptfilter();
+command.run();
diff --git a/bin/prot2pept b/bin/prot2pept
deleted file mode 100755
index 23d24bb4..00000000
--- a/bin/prot2pept
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../lib/commands'
-
-Signal.trap('PIPE', 'EXIT') if Signal.list.include? 'PIPE'
-Signal.trap('INT', 'EXIT') if Signal.list.include? 'INT'
-
-Unipept::Commands::Prot2pept.run(ARGV)
diff --git a/bin/prot2pept.ts b/bin/prot2pept.ts
new file mode 100755
index 00000000..d2d593be
--- /dev/null
+++ b/bin/prot2pept.ts
@@ -0,0 +1,6 @@
+#!/usr/bin/env node
+
+import { Prot2pept } from '../lib/commands/prot2pept.js';
+
+const command = new Prot2pept();
+command.run();
diff --git a/bin/unipept b/bin/unipept
deleted file mode 100755
index 42d7f3c7..00000000
--- a/bin/unipept
+++ /dev/null
@@ -1,8 +0,0 @@
-#!usr/bin/env ruby
-require_relative '../lib/commands'
-
-# Prevent broken pipe errors
-Signal.trap('PIPE', 'EXIT') if Signal.list.include? 'PIPE'
-Signal.trap('INT', 'EXIT') if Signal.list.include? 'INT'
-
-Unipept::Commands::Unipept.run(ARGV)
diff --git a/bin/unipept.ts b/bin/unipept.ts
new file mode 100755
index 00000000..bbb5e051
--- /dev/null
+++ b/bin/unipept.ts
@@ -0,0 +1,6 @@
+#!/usr/bin/env node
+
+import { Unipept } from '../lib/commands/unipept.js';
+
+const command = new Unipept();
+command.run();
diff --git a/bin/uniprot b/bin/uniprot
deleted file mode 100755
index e687cd21..00000000
--- a/bin/uniprot
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../lib/commands'
-
-Signal.trap('PIPE', 'EXIT') if Signal.list.include? 'PIPE'
-Signal.trap('INT', 'EXIT') if Signal.list.include? 'INT'
-
-Unipept::Commands::Uniprot.run(ARGV)
diff --git a/bin/uniprot.ts b/bin/uniprot.ts
new file mode 100755
index 00000000..7746fac9
--- /dev/null
+++ b/bin/uniprot.ts
@@ -0,0 +1,6 @@
+#!/usr/bin/env node
+
+import { Uniprot } from '../lib/commands/uniprot.js';
+
+const command = new Uniprot();
+command.run();
diff --git a/eslint.config.js b/eslint.config.js
new file mode 100644
index 00000000..195212d1
--- /dev/null
+++ b/eslint.config.js
@@ -0,0 +1,17 @@
+import globals from "globals";
+import pluginJs from "@eslint/js";
+import tseslint from "typescript-eslint";
+
+
+export default [
+ { languageOptions: { globals: globals.node } },
+ pluginJs.configs.recommended,
+ ...tseslint.configs.recommended,
+ {
+ rules: {
+ "@typescript-eslint/no-unused-vars": ["error", { argsIgnorePattern: "^_" }],
+ "@typescript-eslint/ban-ts-comment": "off",
+ },
+ ignores: ["dist/"]
+ }
+];
diff --git a/jest.config.ts b/jest.config.ts
new file mode 100644
index 00000000..9b51f878
--- /dev/null
+++ b/jest.config.ts
@@ -0,0 +1,203 @@
+/**
+ * For a detailed explanation regarding each configuration property, visit:
+ * https://jestjs.io/docs/configuration
+ */
+
+import type { Config } from 'jest';
+
+const config: Config = {
+ // All imported modules in your tests should be mocked automatically
+ // automock: false,
+
+ // Stop running tests after `n` failures
+ // bail: 0,
+
+ // The directory where Jest should store its cached dependency information
+ // cacheDirectory: "/private/var/folders/j3/38fskpy159v07np8syk3p_2m0000gn/T/jest_dx",
+
+ // Automatically clear mock calls, instances, contexts and results before every test
+ clearMocks: true,
+
+ // Indicates whether the coverage information should be collected while executing the test
+ // collectCoverage: false,
+
+ // An array of glob patterns indicating a set of files for which coverage information should be collected
+ // collectCoverageFrom: undefined,
+
+ // The directory where Jest should output its coverage files
+ // coverageDirectory: undefined,
+
+ // An array of regexp pattern strings used to skip coverage collection
+ // coveragePathIgnorePatterns: [
+ // "/node_modules/"
+ // ],
+
+ // Indicates which provider should be used to instrument code for coverage
+ coverageProvider: "v8",
+
+ // A list of reporter names that Jest uses when writing coverage reports
+ // coverageReporters: [
+ // "json",
+ // "text",
+ // "lcov",
+ // "clover"
+ // ],
+
+ // An object that configures minimum threshold enforcement for coverage results
+ // coverageThreshold: undefined,
+
+ // A path to a custom dependency extractor
+ // dependencyExtractor: undefined,
+
+ // Make calling deprecated APIs throw helpful error messages
+ // errorOnDeprecated: false,
+
+ // The default configuration for fake timers
+ // fakeTimers: {
+ // "enableGlobally": false
+ // },
+
+ // Force coverage collection from ignored files using an array of glob patterns
+ // forceCoverageMatch: [],
+
+ // A path to a module which exports an async function that is triggered once before all test suites
+ // globalSetup: undefined,
+
+ // A path to a module which exports an async function that is triggered once after all test suites
+ // globalTeardown: undefined,
+
+ // A set of global variables that need to be available in all test environments
+ // globals: {},
+
+ // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
+ // maxWorkers: "50%",
+
+ // An array of directory names to be searched recursively up from the requiring module's location
+ // moduleDirectories: [
+ // "node_modules"
+ // ],
+
+ // An array of file extensions your modules use
+ // moduleFileExtensions: [
+ // "js",
+ // "mjs",
+ // "cjs",
+ // "jsx",
+ // "ts",
+ // "tsx",
+ // "json",
+ // "node"
+ // ],
+
+ // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
+ moduleNameMapper: {
+ "(.+)\\.js": "$1",
+ },
+
+ // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
+ // modulePathIgnorePatterns: [],
+
+ // Activates notifications for test results
+ // notify: false,
+
+ // An enum that specifies notification mode. Requires { notify: true }
+ // notifyMode: "failure-change",
+
+ // A preset that is used as a base for Jest's configuration
+ preset: 'ts-jest/presets/default-esm',
+
+ // Run tests from one or more projects
+ // projects: undefined,
+
+ // Use this configuration option to add custom reporters to Jest
+ // reporters: undefined,
+
+ // Automatically reset mock state before every test
+ // resetMocks: false,
+
+ // Reset the module registry before running each individual test
+ // resetModules: false,
+
+ // A path to a custom resolver
+ // resolver: undefined,
+
+ // Automatically restore mock state and implementation before every test
+ // restoreMocks: false,
+
+ // The root directory that Jest should scan for tests and modules within
+ // rootDir: undefined,
+
+ // A list of paths to directories that Jest should use to search for files in
+ // roots: [
+ // ""
+ // ],
+
+ // Allows you to use a custom runner instead of Jest's default test runner
+ // runner: "jest-runner",
+
+ // The paths to modules that run some code to configure or set up the testing environment before each test
+ // setupFiles: [],
+
+ // A list of paths to modules that run some code to configure or set up the testing framework before each test
+ // setupFilesAfterEnv: [],
+
+ // The number of seconds after which a test is considered as slow and reported as such in the results.
+ // slowTestThreshold: 5,
+
+ // A list of paths to snapshot serializer modules Jest should use for snapshot testing
+ // snapshotSerializers: [],
+
+ // The test environment that will be used for testing
+ // testEnvironment: "jest-environment-node",
+
+ // Options that will be passed to the testEnvironment
+ // testEnvironmentOptions: {},
+
+ // Adds a location field to test results
+ // testLocationInResults: false,
+
+ // The glob patterns Jest uses to detect test files
+ // testMatch: [
+ // "**/__tests__/**/*.[jt]s?(x)",
+ // "**/?(*.)+(spec|test).[tj]s?(x)"
+ // ],
+
+ // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
+ // testPathIgnorePatterns: [
+ // "/node_modules/"
+ // ],
+
+ // The regexp pattern or array of patterns that Jest uses to detect test files
+ // testRegex: [],
+
+ // This option allows the use of a custom results processor
+ // testResultsProcessor: undefined,
+
+ // This option allows use of a custom test runner
+ // testRunner: "jest-circus/runner",
+
+ // A map from regular expressions to paths to transformers
+ transform: {
+ "^.+\\.tsx?$": ["ts-jest", { "useESM": true, "diagnostics": { "ignoreCodes": ["TS151001"] } }],
+ },
+
+ // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
+ // transformIgnorePatterns: [
+ // "/node_modules/",
+ // "\\.pnp\\.[^\\/]+$"
+ // ],
+
+ // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
+ // unmockedModulePathPatterns: undefined,
+
+ // Indicates whether each individual test should be reported during the run
+ // verbose: true,
+
+ // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
+ // watchPathIgnorePatterns: [],
+
+ // Whether to use watchman for file crawling
+ // watchman: true,
+};
+
+export default config;
diff --git a/lib/batch_iterator.rb b/lib/batch_iterator.rb
deleted file mode 100644
index ee6b3da3..00000000
--- a/lib/batch_iterator.rb
+++ /dev/null
@@ -1,89 +0,0 @@
-require 'set'
-
-module Unipept
- class BatchIterator
- attr_reader :batch_size
-
- def initialize(batch_size)
- @batch_size = batch_size
- end
-
- # Splits the input lines into slices, based on the batch_size of the current
- # command. Executes the given block for each of the batches.
- #
- # Supports both normal input and input in the fasta format.
- #
- # @input [Iterator] lines An iterator containing the input lines
- #
- # @input [lambda] block The code to execute on the slices
- def iterate(lines, &block)
- first_line = lines.next rescue return
- if fasta? first_line
- fasta_iterator(first_line, lines, &block)
- elsif csv_taxa2tree? first_line
- csv_taxa_iterator(first_line, lines, &block)
- else
- normal_iterator(first_line, lines, &block)
- end
- end
-
- # Checks if the geven line is a fasta header.
- #
- # @param [String] line The input line
- #
- # @return [Boolean] Whether te input is a fasta header
- def fasta?(line)
- line.start_with? '>'
- end
-
- def csv_taxa2tree?(line)
- line.include? 'taxon_id'
- end
-
- private
-
- # Splits the input lines in fasta format into slices, based on the
- # batch_size of the current command. Executes the given block for each of
- # the batches.
- def fasta_iterator(first_line, next_lines)
- current_fasta_header = first_line.chomp
- next_lines.each_slice(batch_size).with_index do |slice, i|
- fasta_mapper = []
- input_set = Set.new
-
- slice.each do |line|
- line = line.chomp
- if fasta? line
- current_fasta_header = line
- else
- fasta_mapper << [current_fasta_header, line]
- input_set << line
- end
- end
-
- yield(input_set.to_a, i, fasta_mapper)
- end
- end
-
- # Splits the input lines into slices, based on the batch_size of the current
- # command. Executes the given block for each of the batches.
- def normal_iterator(first_line, next_lines, &block)
- Enumerator.new do |y|
- y << first_line
- loop do
- y << next_lines.next
- end
- end.each_slice(batch_size).with_index(&block)
- end
-
- def csv_taxa_iterator(first_line, next_lines, &block)
- # Find index of taxon_id in the first_line and only parse this part from the next lines
- taxon_idx = first_line.rstrip.split(',').find_index('taxon_id')
- Enumerator.new do |y|
- loop do
- y << next_lines.next.rstrip.split(',')[taxon_idx]
- end
- end.each_slice(batch_size).with_index(&block)
- end
- end
-end
diff --git a/lib/batch_order.rb b/lib/batch_order.rb
deleted file mode 100644
index 1e84da2b..00000000
--- a/lib/batch_order.rb
+++ /dev/null
@@ -1,21 +0,0 @@
-module Unipept
- class BatchOrder
- attr_reader :order
-
- def initialize
- @order = {}
- @current = 0
- end
-
- # Executes block if it's its turn, queues the block in the other case.
- def wait(i, &block)
- @order[i] = block
- return unless i == @current
-
- while order[@current]
- order.delete(@current).call
- @current += 1
- end
- end
- end
-end
diff --git a/lib/commands.rb b/lib/commands.rb
deleted file mode 100644
index bf87a4b6..00000000
--- a/lib/commands.rb
+++ /dev/null
@@ -1,10 +0,0 @@
-require 'cri'
-
-module Unipept
- module Commands
- require_relative 'commands/peptfilter'
- require_relative 'commands/prot2pept'
- require_relative 'commands/uniprot'
- require_relative 'commands/unipept'
- end
-end
diff --git a/lib/commands/base_command.ts b/lib/commands/base_command.ts
new file mode 100644
index 00000000..c0480c17
--- /dev/null
+++ b/lib/commands/base_command.ts
@@ -0,0 +1,60 @@
+import { Command } from "commander";
+import { readFileSync } from "fs";
+
+/**
+ * This is a base class which provides a common interface for all commands.
+ * This is mostly used for testing purposes.
+ *
+ * Commands implementing this class should override the run method and call parseArguments
+ * at the beginning of the run method.
+ */
+export abstract class BaseCommand {
+ public program: Command;
+ version: string;
+
+ constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean }) {
+ let p = "";
+ if (import.meta.url.includes("/dist/")) {
+ p = "../";
+ }
+ this.version = JSON.parse(readFileSync(new URL(p + "../../package.json", import.meta.url), "utf8")).version;
+ this.program = this.create(options);
+ }
+
+ abstract run(args?: string[]): void;
+
+ /**
+ * Create sets up the command line program. Implementing classes can add additional options.
+ * to this.program.
+ */
+ create(options?: { exitOverride?: boolean, suppressOutput?: boolean }): Command {
+ const program = new Command();
+
+ // used for debugging
+ if (options?.exitOverride) {
+ program.exitOverride(); // don't exit on error
+ }
+ if (options?.suppressOutput) {
+ // don't write anything to the console
+ program.configureOutput({
+ writeOut: () => { },
+ writeErr: () => { }
+ });
+ }
+ program.version(this.version);
+
+ return program;
+ }
+
+ /**
+ * This allows us to pass a custom list of strings as arguments to the command during testing.
+ */
+ parseArguments(args?: string[]) {
+ if (args) {
+ // custom arg parsing to be able to inject args for testing
+ this.program.parse(args, { from: "user" });
+ } else {
+ this.program.parse();
+ }
+ }
+}
diff --git a/lib/commands/peptfilter.rb b/lib/commands/peptfilter.rb
deleted file mode 100644
index 99326f3d..00000000
--- a/lib/commands/peptfilter.rb
+++ /dev/null
@@ -1,117 +0,0 @@
-module Unipept::Commands
- class Peptfilter
- attr_reader :root_command
-
- @root_command = Cri::Command.define do
- name 'peptfilter'
- summary 'Filter peptides based on specific criteria.'
- usage 'peptfilter [options]'
- description <<-EOS
- The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed to standard input.
-
- The input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled.
- EOS
- # flag :u, :unique, "filter duplicate peptides."
- required nil, :minlen, 'only retain tryptic peptides that have at least min (default: 5) amino acids.'
- required nil, :maxlen, 'only retain tryptic peptides that have at most max (default: 50) amino acids.'
- required :l, :lacks, 'only retain tryptic peptides that lack all amino acids from the string of residues.'
- required :c, :contains, 'only retain tryptic peptides that contain all amino acids from the string of residues.'
- flag :h, :help, 'show help for this command' do |_value, cmd|
- puts cmd.help
- exit 0
- end
- run do |opts, args, _cmd|
- abort "error: peptfilter doesn't support input as arguments. Use standard input instead." unless args.empty?
- minlen = opts.fetch(:minlen, '5').to_i
- maxlen = opts.fetch(:maxlen, '50').to_i
- lacks = opts.fetch(:lacks, '').chars.to_a
- contains = opts.fetch(:contains, '').chars.to_a
- $stdin.each_line do |pept|
- # FASTA headers
- if pept.start_with? '>'
- puts pept
- next
- end
-
- pept = pept.chomp
- puts pept if Peptfilter.filter(pept, minlen, maxlen, lacks, contains)
- end
- end
- end
-
- # Invokes the peptfilter command-line tool with the given arguments.
- #
- # @param [Array] args An array of command-line arguments
- #
- # @return [void]
- def self.run(args)
- @root_command.run(args)
- end
-
- # Checks if a peptide satisfies the min length, max length, lacks and contains requirements.
- # Returns true if
- # - the peptide length is equal or higher than min
- # - the peptide length is equal or lower than max
- # - the peptide doesn't contain any of the amino acids in lacks
- # - the peptide contains all of the amino acids in contains
- #
- # @param [String] peptide The peptide to check
- #
- # @param [Integer] min The minimal length requirement
- #
- # @param [Integer] max The maximal length requirement
- #
- # @param [Array] lacks The forbidden amino acids
- #
- # @param [Array] contains The required amino acids
- #
- # @return [Boolean] true if the peptide satisfies all requirements
- def self.filter(peptide, min, max, lacks, contains)
- filter_length(peptide, min, max) &&
- filter_lacks(peptide, lacks) &&
- filter_contains(peptide, contains)
- end
-
- # Checks if a peptide satisfies the min length and max length requirements.
- # Returns true if
- # - the peptide length is equal or higher than min
- # - the peptide length is equal or lower than max
- #
- # @param [String] peptide The peptide to check
- #
- # @param [Integer] min The minimal length requirement
- #
- # @param [Integer] max The maximal length requirement
- #
- # @return [Boolean] true if the peptide satisfies all requirements
- def self.filter_length(peptide, min, max)
- peptide.length >= min && peptide.length <= max
- end
-
- # Checks if a peptide satisfies lacks requirement.
- # Returns true if
- # - the peptide doesn't contain any of the amino acids in lacks
- #
- # @param [String] peptide The peptide to check
- #
- # @param [Array] lacks The forbidden amino acids
- #
- # @return [Boolean] true if the peptide satisfies all requirements
- def self.filter_lacks(peptide, lacks)
- (peptide.chars.to_a & lacks).empty?
- end
-
- # Checks if a peptide satisfies the contains requirement.
- # Returns true if
- # - the peptide contains all of the amino acids in contains
- #
- # @param [String] peptide The peptide to check
- #
- # @param [Array] contains The required amino acids
- #
- # @return [Boolean] true if the peptide satisfies all requirements
- def self.filter_contains(peptide, contains)
- (peptide.chars.to_a & contains).size == contains.size
- end
- end
-end
diff --git a/lib/commands/peptfilter.ts b/lib/commands/peptfilter.ts
new file mode 100644
index 00000000..0bdb17ec
--- /dev/null
+++ b/lib/commands/peptfilter.ts
@@ -0,0 +1,67 @@
+import { createInterface } from 'node:readline';
+import { BaseCommand } from './base_command.js';
+
+export class Peptfilter extends BaseCommand {
+
+ readonly description = `The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed to standard input.
+
+The input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled.`;
+
+ constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean }) {
+ super(options);
+
+ this.program
+ .summary("Filter peptides based on specific criteria.")
+ .description(this.description)
+ .option("--minlen ", "only retain peptides having at least this many amino acids", (d) => parseInt(d, 10), 5)
+ .option("--maxlen ", "only retain peptides having at most this many amino acids", (d) => parseInt(d, 10), 50)
+ .option("-l, --lacks ", "only retain peptides that lack all of the specified amino acids", (d) => d.split(""))
+ .option("-c, --contains ", "only retain peptides that contain all of the specified amino acids", (d) => d.split(""));
+ }
+
+ /**
+ * Performance note: this implementation takes 4 seconds to run on swissprot. It can be made faster by using line events instead of
+ * async iterators. This alternative implementation runs in 2.5 seconds. However, I decided that the async iterator implementation is
+ * both more readable and more in line with the implementation of the other commands.
+ */
+ async run(args?: string[]) {
+ this.parseArguments(args);
+ const minLen = this.program.opts().minlen;
+ const maxlen = this.program.opts().maxlen;
+ const lacks = this.program.opts().lacks || [];
+ const contains = this.program.opts().contains || [];
+
+ // buffering output makes a big difference in performance
+ let output = [];
+ let i = 0;
+
+ for await (const line of createInterface({ input: process.stdin })) {
+ i++;
+ if (line.startsWith(">")) { // pass through FASTA headers
+ output.push(line);
+ } else if (Peptfilter.checkLength(line, minLen, maxlen) && Peptfilter.checkLacks(line, lacks) && Peptfilter.checkContains(line, contains)) {
+ output.push(line);
+ }
+ if (i % 1000 === 0) {
+ output.push(""); //add a newline at the end of the buffer without additional string copy
+ process.stdout.write(output.join("\n"));
+ output = [];
+ }
+ }
+
+ output.push("");
+ process.stdout.write(output.join("\n"));
+ }
+
+ static checkLength(line: string, minLen: number, maxlen: number): boolean {
+ return line.length >= minLen && line.length <= maxlen;
+ }
+
+ static checkLacks(line: string, lacks: string[]): boolean {
+ return lacks.every((aa: string) => !line.includes(aa));
+ }
+
+ static checkContains(line: string, contains: string[]): boolean {
+ return contains.every((aa: string) => line.includes(aa));
+ }
+}
diff --git a/lib/commands/prot2pept.rb b/lib/commands/prot2pept.rb
deleted file mode 100644
index 974aad99..00000000
--- a/lib/commands/prot2pept.rb
+++ /dev/null
@@ -1,61 +0,0 @@
-module Unipept::Commands
- class Prot2pept
- attr_reader :root_command, :valid_formats
-
- @root_command = Cri::Command.define do
- name 'prot2pept'
- summary 'Split protein sequences into peptides.'
- usage 'prot2pept [options]'
- description <<-EOS
- The prot2pept command splits each protein sequence into a list of peptides according to a given cleavage-pattern. The command expects a list of protein sequences that are passed to standard input.
-
- The input should have either one protein sequence per line or contain a FASTA formatted list of protein sequences. FASTA headers are preserved in the output, so that peptides can be bundled per protein sequence.
-
- EOS
- required :p, :pattern, 'specify cleavage-pattern (regex) as the pattern after which the next peptide will be cleaved (default: ([KR])([^P]) for tryptic peptides).'
- flag :h, :help, 'show help for this command' do |_value, cmd|
- puts cmd.help
- exit 0
- end
- run do |opts, _args, _cmd|
- pattern = opts.fetch(:pattern, '([KR])([^P])')
- pattern = Regexp.compile(pattern)
-
- # decide if we have FASTA input
- first_char = $stdin.getc
- $stdin.ungetc(first_char)
- if first_char == '>'
- # fasta mode!
- protein = ''
- while (line = $stdin.gets)
- if line.start_with? '>'
- puts Prot2pept.split(protein, pattern)
- protein = ''
- puts line
- else
- protein << line.chomp
- end
- end
- puts Prot2pept.split(protein, pattern)
- else
- $stdin.each_line do |prot|
- puts Prot2pept.split(prot, pattern)
- end
- end
- end
- end
-
- def self.split(protein, pattern)
- protein.tr('*', "\n").gsub(pattern, "\\1\n\\2").gsub(pattern, "\\1\n\\2").split("\n").reject(&:empty?)
- end
-
- # Invokes the uniprot command-line tool with the given arguments.
- #
- # @param [Array] args An array of command-line arguments
- #
- # @return [void]
- def self.run(args)
- @root_command.run(args)
- end
- end
-end
diff --git a/lib/commands/prot2pept.ts b/lib/commands/prot2pept.ts
new file mode 100644
index 00000000..4c33ed7c
--- /dev/null
+++ b/lib/commands/prot2pept.ts
@@ -0,0 +1,79 @@
+import { createInterface } from 'node:readline';
+import { BaseCommand } from './base_command.js';
+
+export class Prot2pept extends BaseCommand {
+
+ readonly description = `The prot2pept command splits each protein sequence into a list of peptides according to a given cleavage-pattern. The command expects a list of protein sequences that are passed to standard input.
+
+The input should have either one protein sequence per line or contain a FASTA formatted list of protein sequences. FASTA headers are preserved in the output, so that peptides can be bundled per protein sequence.
+`;
+
+ constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean }) {
+ super(options);
+
+ this.program
+ .summary("Splits each protein sequence into a list of peptides.")
+ .description(this.description)
+ .option("-p, --pattern ", "specify cleavage-pattern (regex) as the pattern after which the next peptide will be cleaved. By default, it will create tryptic peptides.", "([KR])([^P])")
+ }
+
+ /**
+ * Performance note: Just as with peptfilter, this implementation can be made faster by using line events instead of
+ * async iterators.
+ */
+ async run(args?: string[]) {
+ this.parseArguments(args);
+
+ let pattern;
+ try {
+ pattern = new RegExp(this.program.opts().pattern, "g");
+ } catch (e) {
+ this.program.error(`Your pattern was invalid: ${(e as Error).message}`);
+ }
+
+ let fasta = false;
+ let protein = [];
+
+ // buffering output makes a big difference in performance
+ let output = [];
+ let i = 0;
+
+ for await (const line of createInterface({ input: process.stdin })) {
+ if (i === 0 && line.startsWith(">")) {
+ fasta = true;
+ }
+
+ i++;
+
+ if (fasta) { // if we're in fasta mode, a protein could be split over multiple lines
+ if (line.startsWith(">")) { // if we encounter a new header, process the previous protein and output the current header
+ if (protein.length > 0) {
+ output.push(Prot2pept.splitProtein(protein.join(""), pattern));
+ }
+ output.push(line.trimEnd());
+ protein = [];
+ } else {
+ protein.push(line.trimEnd());
+ }
+ } else { // if we're not in fasta mode, each line is a protein sequence
+ output.push(Prot2pept.splitProtein(line.trimEnd(), pattern));
+ }
+
+ if (i % 1000 === 0) {
+ output.push(""); //add a newline at the end of the buffer without additional string copy
+ process.stdout.write(output.join("\n"));
+ output = [];
+ }
+ }
+
+ if (fasta) { // if in fasta mode, process the last protein
+ output.push(Prot2pept.splitProtein(protein.join(""), pattern));
+ }
+ output.push("");
+ process.stdout.write(output.join("\n"));
+ }
+
+ static splitProtein(line: string, pattern: RegExp): string {
+ return line.replaceAll(pattern, "$1\n$2").replaceAll(pattern, "$1\n$2").replaceAll("\n\n", "\n");
+ }
+}
diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb
deleted file mode 100644
index ebc199bc..00000000
--- a/lib/commands/unipept.rb
+++ /dev/null
@@ -1,400 +0,0 @@
-require 'typhoeus'
-
-require_relative '../batch_order'
-require_relative '../batch_iterator'
-require_relative '../configuration'
-require_relative '../formatters'
-require_relative '../output_writer'
-require_relative '../server_message'
-require_relative '../version'
-
-require_relative 'unipept/config'
-require_relative 'unipept/pept2ec'
-require_relative 'unipept/pept2funct'
-require_relative 'unipept/pept2go'
-require_relative 'unipept/pept2interpro'
-require_relative 'unipept/pept2lca'
-require_relative 'unipept/pept2prot'
-require_relative 'unipept/pept2taxa'
-require_relative 'unipept/peptinfo'
-require_relative 'unipept/protinfo'
-require_relative 'unipept/taxa2lca'
-require_relative 'unipept/taxonomy'
-require_relative 'unipept/taxa2tree'
-
-module Unipept
- class Commands::Unipept
- def initialize
- @root_command = create_root_command
- add_config_command
- add_pept2taxa_command
- add_pept2ec_command
- add_pept2funct_command
- add_pept2go_command
- add_pept2interpro_command
- add_pept2lca_command
- add_peptinfo_command
- add_protinfo_command
- add_taxa2lca_command
- add_pept2prot_command
- add_taxonomy_command
- add_taxa2tree_command
- end
-
- def run(args)
- @root_command.run(args)
- end
-
- def create_root_command
- Cri::Command.new_basic_root.modify do
- name 'unipept'
- summary 'Command line interface to Unipept web services.'
- usage 'unipept subcommand [options]'
- description <<-EOS
- The unipept subcommands are command line wrappers around the Unipept web services.
-
- Subcommands that start with pept expect a list of tryptic peptides as input. Subcommands that start with tax expect a list of NCBI Taxonomy Identifiers as input. Input is passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way the input is passed, in the order as listed above. Text files and standard input should have one tryptic peptide or one NCBI Taxonomy Identifier per line.
- EOS
- flag :v, :version, 'displays the version'
- flag :q, :quiet, 'disable service messages'
- flag nil, :'no-header', 'disable header in csv output', hidden: true
- option :i, :input, 'read input from file', argument: :required
- option nil, :batch, 'specify the batch size', argument: :required, hidden: true
- option nil, :parallel, 'specify the number of parallel requests', argument: :required, hidden: true
- option :o, :output, 'write output to file', argument: :required
- option :f, :format, "define the output format (available: #{Unipept::Formatter.available.select { |f| f != 'html' && f != 'url' }.join(', ')}) (default: #{Unipept::Formatter.default}).", argument: :required
-
- # Configuration options
- option nil, 'host', 'specify the server running the Unipept web service', argument: :required
-
- run do |opts, _args, cmd|
- if opts[:version]
- puts Unipept::VERSION
- else
- abort cmd.help
- end
- end
- end
- end
-
- def add_config_command
- @root_command.define_command('config') do
- summary 'Set configuration options.'
- usage 'config option [value]'
- description <<-EOS
- Sets or shows the value for configuration options. All settings are stored in the .unipeptrc file in the home directory of the user.
-
- Running the command with a value will set that value for the given option, running it without will show the current value.
-
- These options are currently supported:
-
- - host: Set the default host for api calls.
-
- Example: "unipept config host http://api.unipept.ugent.be" will set the default host to the public unipept server.
- EOS
-
- runner Commands::Config
- end
- end
-
- def add_pept2taxa_command
- @root_command.define_command('pept2taxa') do
- usage 'pept2taxa [options]'
- summary 'Fetch taxa of UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Pept2taxa
- end
- end
-
- def add_pept2ec_command
- @root_command.define_command('pept2ec') do
- usage 'pept2ec[options]'
- summary 'Fetch EC numbers of UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept pept2ec command retrieves from Unipept the set of EC numbers from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'Also return the names of the EC numbers. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Pept2ec
- end
- end
-
- def add_pept2funct_command
- @root_command.define_command('pept2funct') do
- usage 'pept2funct[options]'
- summary 'Fetch EC numbers, GO terms and InterPro codes of UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept pept2funct command retrieves from Unipept the set of EC numbers and GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'Also return the names of the EC numbers, GO terms and InterPro codes. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Pept2funct
- end
- end
-
- def add_pept2go_command
- @root_command.define_command('pept2go') do
- usage 'pept2go [options]'
- summary 'Fetch GO terms of UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept pept2go command retrieves from Unipept the set of GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'Also return the names of the GO terms. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Pept2go
- end
- end
-
- def add_pept2interpro_command
- @root_command.define_command('pept2interpro') do
- usage 'pept2interpro [options]'
- summary 'Fetch InterPro entries of UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept pept2interpro command retrieves from Unipept the set of InterPro entries from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'Also return the names and types of the InterPro entries. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Pept2interpro
- end
- end
-
- def add_pept2lca_command
- @root_command.define_command('pept2lca') do
- usage 'pept2lca [options]'
- summary 'Fetch taxonomic lowest common ancestor of UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Pept2lca
- end
- end
-
- def add_peptinfo_command
- @root_command.define_command('peptinfo') do
- usage 'peptinfo [options]'
- summary 'Fetch functional information and the taxonomic lowest common ancestor of UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept peptinfo command retrieves from Unipept the functional information and the lowest common ancestor of the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'report the names of the functional annotations and all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Peptinfo
- end
- end
-
- def add_protinfo_command
- @root_command.define_command('protinfo') do
- usage 'protinfo [options]'
- summary 'Fetch functional and taxonomic information of UniProt ids'
- description <<-EOS
- For each UniProt id the unipept protinfo command retrieves from Unipept the functional information and the NCBI id. The command expects a list of UniProt ids that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Protinfo
- end
- end
-
- def add_taxa2lca_command
- @root_command.define_command('taxa2lca') do
- usage 'taxa2lca [options]'
- summary 'Compute taxonomic lowest common ancestor for given list of taxa.'
- description <<-EOS
- The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line.
- EOS
-
- flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Taxa2lca
- end
- end
-
- def add_taxa2tree_command
- @root_command.define_command('taxa2tree') do
- usage 'taxa2tree [options]'
- summary 'Compute lineage tree for given list of taxa'
- description <<-EOS
- The unipept taxa2tree command computes a lineage tree of a given list of NCBI Taxonomy Identifiers. A frequency table is computed for the given list of taxa. Secondly, the lineages for all taxa are looked up. These are then used to build a lineage tree with all counts set. The command expects a list of NCBI Taxonomy Identifiers that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line.
- EOS
-
- option :f, :format, "define the output format (available: json, url, html) (default: 'json'). Note that xml and csv are not available for taxa2tree. html and url are used as an output format for visualizations.", argument: :required
-
- runner Commands::Taxa2Tree
- end
- end
-
- def add_pept2prot_command
- @root_command.define_command('pept2prot') do
- usage 'pept2prot [options]'
- summary 'Fetch UniProt entries that match tryptic peptides.'
- description <<-EOS
- For each tryptic peptide the unipept pept2prot command retrieves from Unipept all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
- EOS
-
- flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
- flag :a, :all, 'report all information fields of UniProt entries available in Unipept. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
- option nil, :meganize, 'output the results in a BlastTab-like format that MEGAN can understand'
-
- runner Commands::Pept2prot
- end
- end
-
- def add_taxonomy_command
- @root_command.define_command('taxonomy') do
- usage 'taxonomy [options]'
- summary 'Fetch taxonomic information from Unipept Taxonomy.'
- description <<-EOS
- The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed
-
- - as separate command line arguments
-
- - in a text file that is passed as an argument to the -i option
-
- - to standard input
-
- The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line.
- EOS
-
- flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
- option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
-
- runner Commands::Taxonomy
- end
- end
-
- # Invokes the unipept command-line tool with the given arguments.
- #
- # @param [Array] args An array of command-line arguments
- #
- # @return [void]
- def self.run(args)
- new.run(args)
- end
- end
-end
diff --git a/lib/commands/unipept.ts b/lib/commands/unipept.ts
new file mode 100644
index 00000000..0585138f
--- /dev/null
+++ b/lib/commands/unipept.ts
@@ -0,0 +1,48 @@
+import { BaseCommand } from './base_command.js';
+import { Pept2ec } from './unipept/pept2ec.js';
+import { Pept2funct } from './unipept/pept2funct.js';
+import { Pept2go } from './unipept/pept2go.js';
+import { Pept2interpro } from './unipept/pept2interpro.js';
+import { Pept2lca } from './unipept/pept2lca.js';
+import { Pept2prot } from './unipept/pept2prot.js';
+import { Pept2taxa } from './unipept/pept2taxa.js';
+import { Peptinfo } from './unipept/peptinfo.js';
+import { Protinfo } from './unipept/protinfo.js';
+import { Taxa2lca } from './unipept/taxa2lca.js';
+import { Taxonomy } from './unipept/taxonomy.js';
+
+export class Unipept extends BaseCommand {
+
+ readonly description = `The unipept subcommands are command line wrappers around the Unipept web services.
+
+Subcommands that start with pept expect a list of tryptic peptides as input. Subcommands that start with tax expect a list of NCBI Taxonomy Identifiers as input. Input is passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way the input is passed, in the order as listed above. Text files and standard input should have one tryptic peptide or one NCBI Taxonomy Identifier per line.`;
+
+ constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean }) {
+ super(options);
+
+ this.program
+ .summary("Command line interface to Unipept web services.")
+ .description(this.description)
+ .addCommand(new Pept2ec().command)
+ .addCommand(new Pept2funct().command)
+ .addCommand(new Pept2go().command)
+ .addCommand(new Pept2interpro().command)
+ .addCommand(new Pept2lca().command)
+ .addCommand(new Pept2prot().command)
+ .addCommand(new Pept2taxa().command)
+ .addCommand(new Peptinfo().command)
+ .addCommand(new Protinfo().command)
+ .addCommand(new Taxa2lca().command)
+ .addCommand(new Taxonomy().command);
+ }
+
+ async run(args?: string[]) {
+ this.parseArguments(args);
+ }
+}
diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb
deleted file mode 100644
index e2b15a04..00000000
--- a/lib/commands/unipept/api_runner.rb
+++ /dev/null
@@ -1,265 +0,0 @@
-require_relative '../../retryable_typhoeus'
-
-module Unipept
- class Commands::ApiRunner < Cri::CommandRunner
- attr_reader :configuration, :url, :user_agent
-
- def initialize(args, opts, cmd)
- super
- @configuration = Unipept::Configuration.new
-
- @host = host
- @user_agent = "Unipept CLI - unipept #{Unipept::VERSION}"
- @url = "#{@host}/api/v2/#{cmd.name}.json"
- @fasta = false
- end
-
- # Returns the host. If a value is defined by both an option and the config
- # file, the value of the option is used.
- def host
- # find host in opts first
- host = options[:host] || @configuration['host']
- host = 'http://api.unipept.ugent.be' if host.nil? || host.empty?
-
- # add http:// if needed
- if host.start_with?('http://', 'https://')
- host
- else
- "http://#{host}"
- end
- end
-
- # Returns an input iterator to use for the request.
- # - if arguments are given, uses arguments
- # - if the input file option is given, uses file input
- # - if none of the previous are given, uses stdin
- def input_iterator
- return arguments.each unless arguments.empty?
- return File.foreach(options[:input]) if options[:input]
-
- $stdin.each_line
- end
-
- def output_writer
- @output_writer ||= OutputWriter.new(options[:output])
- end
-
- # Returns the default default_batch_size of a command.
- def default_batch_size
- raise NotImplementedError, 'This must be implemented in a subclass.'
- end
-
- # returns the effective batch_size of a command
- def batch_size
- if options[:batch]
- options[:batch].to_i
- else
- default_batch_size
- end
- end
-
- # returns the required fields to do any mapping
- def required_fields
- []
- end
-
- # Returns a new batch_iterator based on the batch_size
- def batch_iterator
- Unipept::BatchIterator.new(batch_size)
- end
-
- def concurrent_requests
- if options[:parallel]
- options[:parallel].to_i
- else
- 10
- end
- end
-
- def queue_size
- concurrent_requests * 20
- end
-
- # Returns an array of regular expressions containing all the selected fields
- def selected_fields
- return @selected_fields unless @selected_fields.nil?
-
- fields = [*options[:select]].map { |f| f.split(',') }.flatten
- fields.concat(required_fields) if @fasta && !fields.empty?
- @selected_fields = fields.map { |f| glob_to_regex(f) }
- end
-
- # Returns a formatter, based on the format specified in the options
- def formatter
- @formatter ||= Unipept::Formatter.new_for_format(options[:format])
- end
-
- # Constructs a request body (a Hash) for set of input strings, using the
- # options supplied by the user.
- def construct_request_body(input)
- names = selected_fields.empty? || selected_fields.any? { |f| f.to_s.include?('name') || f.to_s.include?('.*$') }
- { input: input,
- equate_il: options[:equate] == true,
- extra: options[:all] == true,
- names: options[:all] == true && names }
- end
-
- # Runs the command
- def run
- ServerMessage.new(@host).print unless options[:quiet]
- hydra = Typhoeus::Hydra.new(max_concurrency: concurrent_requests)
- batch_order = Unipept::BatchOrder.new
- last_id = 0
-
- # puts input_iterator.inspect
-
- batch_iterator.iterate(input_iterator) do |input_slice, batch_id, fasta_mapper|
- last_id = batch_id
- @fasta = !fasta_mapper.nil?
- request = ::RetryableTyphoeus::Request.new(
- @url,
- method: :post,
- body: construct_request_body(input_slice),
- accept_encoding: 'gzip',
- followlocation: true,
- postredir: :post_all,
- headers: { 'User-Agent' => @user_agent }
- )
-
- request.on_complete do |resp|
- block = handle_response(resp, batch_id, fasta_mapper)
- batch_order.wait(batch_id, &block)
- end
-
- hydra.queue request
- hydra.run if (batch_id % queue_size).zero?
- end
-
- hydra.run
- batch_order.wait(last_id + 1) { output_writer.write_line formatter.footer }
- end
-
- # Saves an error to a new file in the .unipept directory in the users home
- # directory.
- def save_error(message)
- path = error_file_path
- FileUtils.mkdir_p File.dirname(path)
- File.write(path, message)
- warn "API request failed! log can be found in #{path}"
- end
-
- protected
-
- def error_file_path
- File.expand_path(File.join(Dir.home, '.unipept', "unipept-#{Time.now.strftime('%F-%T')}.log"))
- end
-
- # Handles the response of an API request.
- # Returns a block to execute.
- def handle_response(response, batch_id, fasta_mapper)
- if response.success?
- handle_success_response(response, batch_id, fasta_mapper)
- else
- handle_failed_response(response)
- end
- end
-
- def handle_success_response(response, batch_id, fasta_mapper)
- result = filter_result(response.response_body)
-
- lambda do
- unless result.empty?
- output_writer.write_line formatter.header(result, fasta_mapper) if batch_id.zero? && !options[:'no-header']
- output_writer.write_line formatter.format(result, fasta_mapper, batch_id.zero?)
- end
- end
- end
-
- def handle_failed_response(response)
- if response.timed_out?
- -> { save_error('request timed out, continuing anyway, but results might be incomplete') }
- elsif response.code.zero?
- -> { save_error("could not get an http response, continuing anyway, but results might be incomplete#{response.return_message}") }
- else
- -> { save_error("Got #{response.code}: #{response.response_body}\nRequest headers: #{response.request.options}\nRequest body:\n#{response.request.encoded_body}\n\n") }
- end
- end
-
- # Parses the json_response, wraps it in an array if needed and filters the
- # fields based on the selected_fields
- def filter_result(json_response)
- result = JSON[json_response] rescue []
- result = [result] unless result.is_a? Array
- key_order = result.first.keys if result.first
- result = flatten_functional_fields(result) if formatter.instance_of?(Unipept::CSVFormatter)
- result.map! { |r| r.select! { |k, _v| selected_fields.any? { |f| f.match k } } } unless selected_fields.empty?
- result = inflate_functional_fields(result, key_order) if formatter.instance_of?(Unipept::CSVFormatter) && result.first
- result
- end
-
- # Transforms the hierarchical input to something without hierarchy. All fields
- # associated with functional annotations are transformed to a flat alternative.
- # Example: {"go" => {"go_term": xxx, "protein_count": yyy}} --> {"go_term" => [xxx], "protein_count" => [yyy]}
- def flatten_functional_fields(data)
- output = []
- data.each do |row|
- output_row = {}
- row.each do |k, v|
- if %w[ec go ipr].include? k
- v.each do |item|
- item.each do |field_name, field_value|
- new_field_name = %w[ec_number go_term ipr_code].include?(field_name) ? field_name : "#{k}_#{field_name}"
- output_row[new_field_name] = [] unless output_row.key? new_field_name
- output_row[new_field_name] << field_value
- end
- end
- else
- output_row[k] = v
- end
- end
- output << output_row
- end
- output
- end
-
- # Transforms a flattened input created by flatten_functional_fields to the original
- # hierarchy.
- def inflate_functional_fields(data, original_key_order)
- output = []
- data.each do |row|
- output_row = {}
-
- processed_keys = []
- original_key_order.each do |original_key|
- if %w[ec go ipr].include? original_key
- # First, we take all distinct keys that start with "ec", "go" or "ipr"
- annotation_keys = row.keys.select { |key| key.start_with? original_key }
- processed_keys += annotation_keys
- unless annotation_keys.empty?
- # Each of the values of the annotation_keys is an array. All respective values of each of
- # these arrays need to be put together into one hash. (E.g. {a => [1, 2], b=> [x, y]} --> [{a: 1, b: x}, {a: 2, b: y}])
- reconstructed_objects = []
- (0..row[annotation_keys[0]].length).each do |i|
- reconstructed_object = {}
- annotation_keys.each do |annotation_key|
- reconstructed_object[%w[ec_number go_term ipr_code].include?(annotation_key) ? annotation_key : annotation_key[annotation_key.index('_') + 1, annotation_key.length]] = row[annotation_key][i]
- end
- reconstructed_objects << reconstructed_object
- end
- output_row[original_key] = reconstructed_objects
- end
- elsif row.key? original_key
- output_row[original_key] = row[original_key]
- end
- end
-
- output << output_row
- end
- output
- end
-
- def glob_to_regex(string)
- /^#{string.gsub('*', '.*')}$/
- end
- end
-end
diff --git a/lib/commands/unipept/config.rb b/lib/commands/unipept/config.rb
deleted file mode 100644
index 83494b96..00000000
--- a/lib/commands/unipept/config.rb
+++ /dev/null
@@ -1,29 +0,0 @@
-module Unipept
- class Commands::Config < Cri::CommandRunner
- def run
- abort command.help if arguments.empty? || arguments.size > 2
-
- key, value = *arguments
-
- if arguments.size == 2
- set_config(key, value)
- puts "#{key} was set to #{value}"
- else
- puts get_config(key)
- end
- end
-
- def config
- @config ||= Unipept::Configuration.new
- end
-
- def set_config(key, value)
- config[key] = value
- config.save
- end
-
- def get_config(key)
- config[key]
- end
- end
-end
diff --git a/lib/commands/unipept/pept2ec.rb b/lib/commands/unipept/pept2ec.rb
deleted file mode 100644
index bab32c13..00000000
--- a/lib/commands/unipept/pept2ec.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Pept2ec < ApiRunner
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- if options[:all]
- 100
- else
- 1000
- end
- end
- end
-end
diff --git a/lib/commands/unipept/pept2ec.ts b/lib/commands/unipept/pept2ec.ts
new file mode 100644
index 00000000..437a311b
--- /dev/null
+++ b/lib/commands/unipept/pept2ec.ts
@@ -0,0 +1,38 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Pept2ec extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept pept2ec command retrieves from Unipept the set of EC numbers from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("pept2ec");
+
+ this.command
+ .summary("Fetch EC numbers of UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "Also return the names of the EC numbers. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ if (this.options.all) {
+ return 100;
+ } else {
+ return 1000;
+ }
+ }
+}
diff --git a/lib/commands/unipept/pept2funct.rb b/lib/commands/unipept/pept2funct.rb
deleted file mode 100644
index 74bffa59..00000000
--- a/lib/commands/unipept/pept2funct.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Pept2funct < ApiRunner
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- if options[:all]
- 100
- else
- 1000
- end
- end
- end
-end
diff --git a/lib/commands/unipept/pept2funct.ts b/lib/commands/unipept/pept2funct.ts
new file mode 100644
index 00000000..ae11ff57
--- /dev/null
+++ b/lib/commands/unipept/pept2funct.ts
@@ -0,0 +1,38 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Pept2funct extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept pept2funct command retrieves from Unipept the set of EC numbers and GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("pept2funct");
+
+ this.command
+ .summary("Fetch EC numbers, GO terms and InterPro codes of UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "Also return the names of the EC numbers, GO terms and InterPro codes. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ if (this.options.all) {
+ return 100;
+ } else {
+ return 1000;
+ }
+ }
+}
diff --git a/lib/commands/unipept/pept2go.rb b/lib/commands/unipept/pept2go.rb
deleted file mode 100644
index 26d81d17..00000000
--- a/lib/commands/unipept/pept2go.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Pept2go < ApiRunner
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- if options[:all]
- 100
- else
- 1000
- end
- end
- end
-end
diff --git a/lib/commands/unipept/pept2go.ts b/lib/commands/unipept/pept2go.ts
new file mode 100644
index 00000000..c14fcf15
--- /dev/null
+++ b/lib/commands/unipept/pept2go.ts
@@ -0,0 +1,38 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Pept2go extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept pept2go command retrieves from Unipept the set of GO terms from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("pept2go");
+
+ this.command
+ .summary("Fetch GO terms of UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "Also return the names of the GO terms. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ if (this.options.all) {
+ return 100;
+ } else {
+ return 1000;
+ }
+ }
+}
diff --git a/lib/commands/unipept/pept2interpro.rb b/lib/commands/unipept/pept2interpro.rb
deleted file mode 100644
index fc273904..00000000
--- a/lib/commands/unipept/pept2interpro.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Pept2interpro < ApiRunner
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- if options[:all]
- 100
- else
- 1000
- end
- end
- end
-end
diff --git a/lib/commands/unipept/pept2interpro.ts b/lib/commands/unipept/pept2interpro.ts
new file mode 100644
index 00000000..55b9e766
--- /dev/null
+++ b/lib/commands/unipept/pept2interpro.ts
@@ -0,0 +1,38 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Pept2interpro extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept pept2interpro command retrieves from Unipept the set of InterPro entries from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("pept2interpro");
+
+ this.command
+ .summary("Fetch InterPro entries of UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "Also return the names of the InterPro entries. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ if (this.options.all) {
+ return 100;
+ } else {
+ return 1000;
+ }
+ }
+}
diff --git a/lib/commands/unipept/pept2lca.rb b/lib/commands/unipept/pept2lca.rb
deleted file mode 100644
index 76d49beb..00000000
--- a/lib/commands/unipept/pept2lca.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Pept2lca < ApiRunner
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- if options[:all]
- 100
- else
- 1000
- end
- end
- end
-end
diff --git a/lib/commands/unipept/pept2lca.ts b/lib/commands/unipept/pept2lca.ts
new file mode 100644
index 00000000..97b8a871
--- /dev/null
+++ b/lib/commands/unipept/pept2lca.ts
@@ -0,0 +1,34 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Pept2lca extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("pept2lca");
+
+ this.command
+ .summary("Fetch taxonomic lowest common ancestor of UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ return 100;
+ }
+}
diff --git a/lib/commands/unipept/pept2prot.rb b/lib/commands/unipept/pept2prot.rb
deleted file mode 100644
index d6d7b513..00000000
--- a/lib/commands/unipept/pept2prot.rb
+++ /dev/null
@@ -1,26 +0,0 @@
-require_relative 'api_runner'
-
-module Unipept::Commands
- class Pept2prot < ApiRunner
- def initialize(args, opts, cmd)
- if args[:meganize]
- args[:all] = true
- args[:select] = ['peptide,refseq_protein_ids']
- args[:format] = 'blast'
- end
- super
- end
-
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- if options[:all]
- 5
- else
- 10
- end
- end
- end
-end
diff --git a/lib/commands/unipept/pept2prot.ts b/lib/commands/unipept/pept2prot.ts
new file mode 100644
index 00000000..02f880d4
--- /dev/null
+++ b/lib/commands/unipept/pept2prot.ts
@@ -0,0 +1,38 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Pept2prot extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept pept2prot command retrieves from Unipept all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("pept2prot");
+
+ this.command
+ .summary("Fetch UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "report all information fields of UniProt entries available in Unipept. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ if (this.options.all) {
+ return 5;
+ } else {
+ return 10;
+ }
+ }
+}
diff --git a/lib/commands/unipept/pept2taxa.rb b/lib/commands/unipept/pept2taxa.rb
deleted file mode 100644
index efbb6f78..00000000
--- a/lib/commands/unipept/pept2taxa.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Pept2taxa < ApiRunner
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- 5
- end
- end
-end
diff --git a/lib/commands/unipept/pept2taxa.ts b/lib/commands/unipept/pept2taxa.ts
new file mode 100644
index 00000000..19e3adfd
--- /dev/null
+++ b/lib/commands/unipept/pept2taxa.ts
@@ -0,0 +1,34 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Pept2taxa extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("pept2taxa");
+
+ this.command
+ .summary("Fetch taxa of UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ return 5;
+ }
+}
diff --git a/lib/commands/unipept/peptinfo.rb b/lib/commands/unipept/peptinfo.rb
deleted file mode 100644
index be90c152..00000000
--- a/lib/commands/unipept/peptinfo.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Peptinfo < ApiRunner
- def required_fields
- ['peptide']
- end
-
- def default_batch_size
- if options[:all]
- 100
- else
- 1000
- end
- end
- end
-end
diff --git a/lib/commands/unipept/peptinfo.ts b/lib/commands/unipept/peptinfo.ts
new file mode 100644
index 00000000..62cd9cd0
--- /dev/null
+++ b/lib/commands/unipept/peptinfo.ts
@@ -0,0 +1,38 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Peptinfo extends UnipeptSubcommand {
+
+ readonly description = `For each tryptic peptide the unipept peptinfo command retrieves from Unipept the functional information and the lowest common ancestor of the set of taxa from all UniProt entries whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.`;
+
+ constructor() {
+ super("peptinfo");
+
+ this.command
+ .summary("Fetch functional information and the taxonomic lowest common ancestor of UniProt entries that match tryptic peptides.")
+ .description(this.description)
+ .option("-e, --equate", "equate isoleucine (I) and leucine (L) when matching peptides")
+ .option("-a, --all", "report the names of the functional annotations and all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["peptide"];
+ }
+
+ defaultBatchSize(): number {
+ if (this.options.all) {
+ return 100;
+ } else {
+ return 1000;
+ }
+ }
+}
diff --git a/lib/commands/unipept/protinfo.rb b/lib/commands/unipept/protinfo.rb
deleted file mode 100644
index fc85c4d2..00000000
--- a/lib/commands/unipept/protinfo.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Protinfo < ApiRunner
- def required_fields
- ['protein']
- end
-
- def default_batch_size
- 1000
- end
- end
-end
diff --git a/lib/commands/unipept/protinfo.ts b/lib/commands/unipept/protinfo.ts
new file mode 100644
index 00000000..df8f6f8c
--- /dev/null
+++ b/lib/commands/unipept/protinfo.ts
@@ -0,0 +1,32 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Protinfo extends UnipeptSubcommand {
+
+ readonly description = `For each UniProt id the unipept protinfo command retrieves from Unipept the functional information and the NCBI id. The command expects a list of UniProt ids that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way protein id's are passed, in the order as listed above. Text files and standard input should have one protein id per line.`;
+
+ constructor() {
+ super("protinfo");
+
+ this.command
+ .summary("Fetch functional and taxonomic information of UniProt ids")
+ .description(this.description)
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[proteins...]", "optionally, 1 or more UniProt ids")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["protein"];
+ }
+
+ defaultBatchSize(): number {
+ return 1000;
+ }
+}
diff --git a/lib/commands/unipept/taxa2lca.rb b/lib/commands/unipept/taxa2lca.rb
deleted file mode 100644
index 32bb4af6..00000000
--- a/lib/commands/unipept/taxa2lca.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Taxa2lca < ApiRunner
- def batch_iterator
- SimpleBatchIterator.new
- end
-
- def default_batch_size
- raise 'NOT NEEDED FOR TAXA2LCA'
- end
- end
-
- class SimpleBatchIterator
- def iterate(input)
- yield(input.to_a, 0)
- end
- end
-end
diff --git a/lib/commands/unipept/taxa2lca.ts b/lib/commands/unipept/taxa2lca.ts
new file mode 100644
index 00000000..a52c4625
--- /dev/null
+++ b/lib/commands/unipept/taxa2lca.ts
@@ -0,0 +1,31 @@
+import { Option } from "commander";
+import { UnipeptSubcommand } from "./unipept_subcommand.js";
+
+export class Taxa2lca extends UnipeptSubcommand {
+
+ readonly description = `The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed
+
+- as separate command line arguments
+- in a text file that is passed as an argument to the -i option
+- to standard input
+
+The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one taxon id per line.`;
+
+ castInput = true;
+
+ constructor() {
+ super("taxa2lca");
+
+ this.command
+ .summary("Compute taxonomic lowest common ancestor for given list of taxa.")
+ .description(this.description)
+ .option("-a, --all", "report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.")
+ .addOption(new Option("-s --select ", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[taxonids...]", "optionally, 1 or more taxon ids")
+ .action((args, options) => this.run(args, options));
+ }
+
+ defaultBatchSize(): number {
+ throw new Error("Batch size not needed for this command.");
+ }
+}
diff --git a/lib/commands/unipept/taxa2tree.rb b/lib/commands/unipept/taxa2tree.rb
deleted file mode 100644
index 30d0993a..00000000
--- a/lib/commands/unipept/taxa2tree.rb
+++ /dev/null
@@ -1,74 +0,0 @@
-require_relative 'api_runner'
-module Unipept::Commands
- class Taxa2Tree < ApiRunner
- def initialize(args, opts, cmd)
- super
-
- # JSON is the default format for this command
- args[:format] = 'json' unless args[:format]
-
- unless %w[url html json].include? args[:format]
- warn "Format #{args[:format]} is not supported by taxa2tree. Use html, url or json (default)."
- exit 1
- end
-
- if options[:format] == 'html'
- # Overwrite the URL for this command, since it's possible that it uses HTML generated by the server.
- @url = "#{@host}/api/v2/#{cmd.name}.html"
- elsif args[:format] == 'url'
- @link = true
- end
- end
-
- def required_fields
- ['taxon_id']
- end
-
- def batch_size
- return arguments.length unless arguments.empty?
- return File.foreach(options[:input]).inject(0) { |c, _| c + 1 } if options[:input]
-
- @stdin_contents = $stdin.readlines
- @stdin_contents.length
- end
-
- def input_iterator
- return arguments.each unless arguments.empty?
- return File.foreach(options[:input]) if options[:input]
-
- @stdin_contents.each
- end
-
- protected
-
- def filter_result(response)
- return response if response.start_with?('", "select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used."))
+ .argument("[peptides...]", "optionally, 1 or more peptides")
+ .action((args, options) => this.run(args, options));
+ }
+
+ requiredFields(): string[] {
+ return ["taxon_id"];
+ }
+
+ defaultBatchSize(): number {
+ return 100;
+ }
+}
diff --git a/lib/commands/unipept/unipept_subcommand.ts b/lib/commands/unipept/unipept_subcommand.ts
new file mode 100644
index 00000000..cb30f2dd
--- /dev/null
+++ b/lib/commands/unipept/unipept_subcommand.ts
@@ -0,0 +1,314 @@
+import { Command, Option } from "commander";
+import { createReadStream, createWriteStream, readFileSync } from "fs";
+import { createInterface } from "node:readline";
+import { Interface } from "readline";
+import { Formatter } from "../../formatters/formatter.js";
+import { FormatterFactory } from "../../formatters/formatter_factory.js";
+import { CSVFormatter } from "../../formatters/csv_formatter.js";
+import path from "path";
+import os from "os";
+import { appendFile, mkdir } from "fs/promises";
+
+export abstract class UnipeptSubcommand {
+ public command: Command;
+ static readonly VALID_FORMATS = ["blast", "csv", "json", "xml"];
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ options: any = {};
+ name: string;
+ user_agent: string;
+ host = "https://api.unipept.ugent.be";
+ url?: string;
+ formatter?: Formatter;
+ outputStream: NodeJS.WritableStream = process.stdout;
+ firstBatch = true;
+ selectedFields?: RegExp[];
+ fasta: boolean;
+ castInput = false;
+
+ // we must save this to be able to close it properly in tests
+ private streamInterface?: Interface;
+
+ constructor(name: string) {
+ this.name = name;
+ let p = "";
+ if (import.meta.url.includes("/dist/")) {
+ p = "../";
+ }
+ const version = JSON.parse(readFileSync(new URL(p + "../../../package.json", import.meta.url), "utf8")).version;
+ this.user_agent = `unipept-cli/${version}`;
+ this.command = this.create(name);
+ this.fasta = false;
+ }
+
+ abstract defaultBatchSize(): number;
+
+ requiredFields(): string[] {
+ return [];
+ }
+
+ create(name: string): Command {
+ const command = new Command(name);
+
+ command.option("-q, --quiet", "disable service messages");
+ command.option("-i, --input ", "read input from file");
+ command.option("-o, --output ", "write output to file");
+ command.addOption(new Option("-f, --format ", "define the output format").choices(UnipeptSubcommand.VALID_FORMATS).default("csv"));
+ command.option("--host ", "specify the server running the Unipept web service");
+
+ // internal options
+ command.addOption(new Option("--no-header", "disable the header in csv output").hideHelp());
+ command.addOption(new Option("--batch ", "specify the batch size").hideHelp());
+
+ return command;
+ }
+
+ async run(args: string[], options: { [key: string]: unknown }): Promise {
+ this.options = options;
+ this.host = this.getHost();
+ this.url = `${this.host}/api/v2/${this.name}.json`;
+ this.formatter = FormatterFactory.getFormatter(this.options.format);
+
+ if (this.options.output) {
+ this.outputStream = createWriteStream(this.options.output);
+ } else {
+ // if we write to stdout, we need to handle the EPIPE error
+ // this happens when the output is piped to another command that stops reading
+ process.stdout.on("error", (err) => {
+ if (err.code === "EPIPE") {
+ process.exit(0);
+ }
+ })
+ }
+
+ const iterator = this.getInputIterator(args, options.input as string);
+ const firstLine = (await iterator.next()).value;
+ if (this.command.name() === "taxa2lca") {
+ // this subcommand is an exception where the entire input is read before processing
+ await this.simpleInputProcessor(firstLine, iterator);
+ } else if (firstLine.startsWith(">")) {
+ this.fasta = true;
+ await this.fastaInputProcessor(firstLine, iterator);
+ } else {
+ await this.normalInputProcessor(firstLine, iterator);
+ }
+ }
+
+ async processBatch(slice: string[], fastaMapper?: { [key: string]: string }): Promise {
+ if (!this.formatter) throw new Error("Formatter not set");
+
+ let r;
+ try {
+ r = await this.fetchWithRetry(this.url as string, {
+ method: "POST",
+ body: this.constructRequestBody(slice),
+ headers: {
+ "Content-Type": "application/json",
+ "Accept-Encoding": "gzip",
+ "User-Agent": this.user_agent,
+ }
+ });
+ } catch (e) {
+ await this.saveError(e as string);
+ return;
+ }
+
+ let result;
+ try {
+ result = await r.json();
+ } catch (e) {
+ result = [];
+ }
+ if (Array.isArray(result) && result.length === 0) return;
+ result = this.filterResult(result);
+
+ if (this.firstBatch && this.options.header) {
+ this.outputStream.write(this.formatter.header(result, this.fasta));
+ }
+
+ this.outputStream.write(this.formatter.format(result, fastaMapper, this.firstBatch));
+
+ if (this.firstBatch) this.firstBatch = false;
+ }
+
+ /**
+ * Filter the result based on the selected fields
+ */
+ filterResult(result: unknown): object[] {
+ if (!Array.isArray(result)) {
+ result = [result];
+ }
+ if (this.formatter && this.formatter instanceof CSVFormatter) {
+ result = this.formatter.flatten(result as { [key: string]: unknown }[]);
+ }
+ if (this.getSelectedFields().length > 0) {
+ (result as { [key: string]: string }[]).forEach(entry => {
+ for (const key of Object.keys(entry)) {
+ if (!this.getSelectedFields().some(regex => regex.test(key))) {
+ delete entry[key];
+ }
+ }
+ });
+ }
+ return result as object[];
+ }
+
+ /**
+ * Reads batchSize lines from the input and processes them
+ */
+ async normalInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) {
+ let slice = [firstLine];
+
+ for await (const line of iterator) {
+ slice.push(line);
+ if (slice.length >= this.batchSize) {
+ await this.processBatch(slice);
+ slice = [];
+ }
+ }
+ await this.processBatch(slice);
+ }
+
+ /**
+ * Reads batchSize lines from the input and processes them,
+ * but takes into account the fasta headers.
+ */
+ async fastaInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) {
+ let currentFastaHeader = firstLine;
+ let slice = [];
+ let fastaMapper: { [key: string]: string } = {};
+ for await (const line of iterator) {
+ if (line.startsWith(">")) {
+ currentFastaHeader = line;
+ } else {
+ fastaMapper[line] = currentFastaHeader;
+ slice.push(line);
+ if (slice.length >= this.batchSize) {
+ await this.processBatch(slice, fastaMapper);
+ slice = [];
+ fastaMapper = {};
+ }
+ }
+ }
+ await this.processBatch(slice, fastaMapper);
+ }
+
+ /**
+ * Reads the entire input and processes it in one go
+ */
+ async simpleInputProcessor(firstLine: string, iterator: IterableIterator | AsyncIterableIterator) {
+ const slice = [firstLine];
+ for await (const line of iterator) {
+ slice.push(line);
+ }
+ await this.processBatch(slice);
+ }
+
+ /**
+ * Appends the error message to the log file of today and prints it to the console
+ */
+ async saveError(message: string) {
+ const errorPath = this.errorFilePath();
+ mkdir(path.dirname(errorPath), { recursive: true });
+ await appendFile(errorPath, `${message}\n`);
+ console.error(`API request failed! log can be found in ${errorPath}`);
+ }
+
+ /**
+ * Uses fetch to get data from the Unipept API.
+ * Has a retry mechanism that retries the request up to 5 times with a delay of 0-5 seconds.
+ * In addition, handles failed requests by returning a rejected promise.
+ */
+ fetchWithRetry(url: string, options: RequestInit, retries = 5): Promise {
+ return fetch(url, options)
+ .then(response => {
+ if (response.ok) {
+ return response;
+ } else {
+ return Promise.reject(`${response.status} ${response.statusText}`);
+ }
+ })
+ .catch(async error => {
+ if (retries > 0) {
+ // retry with delay
+ // console.error("retrying");
+ const delay = 5000 * Math.random();
+ await new Promise(resolve => setTimeout(resolve, delay));
+ return this.fetchWithRetry(url, options, retries - 1);
+ } else {
+ return Promise.reject(`Failed to fetch data from the Unipept API: ${error}`);
+ }
+ });
+ }
+
+ private constructRequestBody(slice: string[]): string {
+ const names = this.getSelectedFields().length === 0 || this.getSelectedFields().some(regex => regex.toString().includes("name") || regex.toString().includes(".*$"));
+ let input: string[] | number[] = slice;
+ if (this.castInput) {
+ input = slice.map(s => parseInt(s, 10));
+ }
+ return JSON.stringify({
+ input: input,
+ equate_il: this.options.equate,
+ extra: this.options.all,
+ names: this.options.all && names
+ });
+ }
+
+ private getSelectedFields(): RegExp[] {
+ if (this.selectedFields) return this.selectedFields;
+
+ const fields = (this.options.select as string[])?.flatMap(f => f.split(",")) ?? [];
+ if (this.fasta && fields.length > 0) {
+ fields.push(...this.requiredFields());
+ }
+ this.selectedFields = fields.map(f => this.globToRegex(f));
+
+ return this.selectedFields;
+ }
+
+ private get batchSize(): number {
+ if (this.options.batch) {
+ return +this.options.batch;
+ } else {
+ return this.defaultBatchSize();
+ }
+ }
+
+ private errorFilePath(): string {
+ const timestamp = new Date().toISOString().split('T')[0];
+ return path.join(os.homedir(), '.unipept', `unipept-${timestamp}.log`);
+ }
+
+ /**
+ * Returns an input iterator to use for the request.
+ * - if arguments are given, use arguments
+ * - if an input file is given, use the file
+ * - otherwise, use standard input
+ */
+ private getInputIterator(args: string[], input?: string): IterableIterator | AsyncIterableIterator {
+ if (args.length > 0) {
+ return args.values();
+ } else if (input) {
+ this.streamInterface = createInterface({ input: createReadStream(input) });
+ return this.streamInterface[Symbol.asyncIterator]();
+ } else {
+ this.streamInterface = createInterface({ input: process.stdin });
+ return this.streamInterface[Symbol.asyncIterator]();
+ }
+ }
+
+ private getHost(): string {
+ const host = this.options.host || this.host;
+
+ // add http:// if needed
+ if (host.startsWith("http://") || host.startsWith("https://")) {
+ return host;
+ } else {
+ return `http://${host}`;
+ }
+ }
+
+ private globToRegex(glob: string): RegExp {
+ return new RegExp(`^${glob.replace(/\*/g, ".*")}$`);
+ }
+}
diff --git a/lib/commands/uniprot.rb b/lib/commands/uniprot.rb
deleted file mode 100644
index 2a78c22e..00000000
--- a/lib/commands/uniprot.rb
+++ /dev/null
@@ -1,68 +0,0 @@
-require 'typhoeus'
-
-module Unipept::Commands
- class Uniprot
- attr_reader :root_command, :valid_formats
-
- valid_formats = Set.new %w[fasta txt xml rdf gff sequence]
- @root_command = Cri::Command.define do
- name 'uniprot'
- summary 'Command line interface to UniProt web services.'
- usage 'uniprot [options]'
- description <<-EOS
- The uniprot command fetches UniProt entries from the UniProt web services. The command expects a list of UniProt Accession Numbers that are passed
-
- - as separate command line arguments
-
- - to standard input
-
- The command will give priority to the first way UniProt Accession Numbers are passed, in the order as listed above. The standard input should have one UniProt Accession Number per line.
-
- The uniprot command yields just the protein sequences as a default, but can return several formats.
- EOS
- required :f, :format, "specify output format (available: #{valid_formats.to_a.join(', ')}) (default: sequence)"
- flag :h, :help, 'show help for this command' do |_value, cmd|
- puts cmd.help
- exit 0
- end
- run do |opts, args, _cmd|
- format = opts.fetch(:format, 'sequence')
- unless valid_formats.include? format
- warn "#{format} is not a valid output format. Available formats are: #{valid_formats.to_a.join(', ')}"
- exit 1
- end
- iterator = args.empty? ? $stdin.each_line : args
- iterator.each do |accession|
- puts Uniprot.get_uniprot_entry(accession.chomp, format)
- end
- end
- end
-
- # Invokes the uniprot command-line tool with the given arguments.
- #
- # @param [Array] args An array of command-line arguments
- #
- # @return [void]
- def self.run(args)
- @root_command.run(args)
- end
-
- # Fetches a UniProt entry from the UniProt website with the given accession
- # number in the requested format.
- #
- # @param [String] accession The accession number of the record to fetch
- #
- # @param [String] format The format of of the record. If the format is 'sequence', the sequence will be returned in as a single line
- #
- # @return [String] The requested UniProt entry in the requested format
- def self.get_uniprot_entry(accession, format)
- if format == 'sequence'
- get_uniprot_entry(accession, 'fasta').lines.map(&:chomp)[1..].join
- else
- # other format has been specified, just download and output
- resp = Typhoeus.get("https://rest.uniprot.org/uniprotkb/#{accession}.#{format}")
- resp.response_body if resp.success?
- end
- end
- end
-end
diff --git a/lib/commands/uniprot.ts b/lib/commands/uniprot.ts
new file mode 100644
index 00000000..e0946251
--- /dev/null
+++ b/lib/commands/uniprot.ts
@@ -0,0 +1,74 @@
+import { Option } from 'commander';
+import { createInterface } from 'node:readline';
+import { BaseCommand } from './base_command.js';
+
+export class Uniprot extends BaseCommand {
+ static readonly VALID_FORMATS = ["fasta", "gff", "json", "rdf", "sequence", "xml"];
+
+ readonly description = `Command line interface to UniProt web services.
+
+The uniprot command fetches UniProt entries from the UniProt web services. The command expects a list of UniProt Accession Numbers that are passed
+
+- as separate command line arguments
+- to standard input
+
+The command will give priority to the first way UniProt Accession Numbers are passed, in the order as listed above. The standard input should have one UniProt Accession Number per line.
+
+The uniprot command yields just the protein sequences as a default, but can return several formats.`;
+
+ constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean }) {
+ super(options);
+
+ this.program
+ .summary("Command line interface to UniProt web services.")
+ .description(this.description)
+ .argument("[accessions...]", "UniProt Accession Numbers")
+ .addOption(new Option("-f, --format ", `output format`).choices(Uniprot.VALID_FORMATS).default("sequence"));
+ }
+
+ async run(args?: string[]) {
+ this.parseArguments(args);
+ const format = this.program.opts().format;
+ const accessions = this.program.args;
+
+ // alternatively, we can also wrap the array in a Readable stream with ReadableStream.from()
+ const input = accessions.length !== 0 ? accessions : createInterface({ input: process.stdin });
+ for await (const line of input) {
+ await Uniprot.processUniprotEntry(line.trim(), format);
+ }
+ }
+
+ /**
+ * Fetches a UniProt entry and writes it to standard output.
+ *
+ * @param accession UniProt Accession Number
+ */
+ static async processUniprotEntry(accession: string, format: string) {
+ process.stdout.write(await Uniprot.getUniprotEntry(accession, format) + "\n");
+ }
+
+ /**
+ * Fetches a UniProt entry in the requested format.
+ *
+ * @param accession UniProt Accession Number
+ * @param format output format
+ * @returns UniProt entry in the requested format
+ */
+ static async getUniprotEntry(accession: string, format: string): Promise {
+ // The UniProt REST API does not support the "sequence" format, so fetch fasta and remove the header
+ if (format === "sequence") {
+ return (await Uniprot.getUniprotEntry(accession, "fasta"))
+ .split("\n")
+ .slice(1)
+ .join("");
+ } else {
+ const r = await fetch(`https://rest.uniprot.org/uniprotkb/${accession}.${format}`);
+ if (r.ok) {
+ return r.text();
+ } else {
+ process.stderr.write(`Error fetching ${accession}: ${r.status} ${r.statusText}\n`);
+ return "";
+ }
+ }
+ }
+}
diff --git a/lib/configuration.rb b/lib/configuration.rb
deleted file mode 100644
index 078df616..00000000
--- a/lib/configuration.rb
+++ /dev/null
@@ -1,44 +0,0 @@
-require 'yaml'
-
-module Unipept
- class Configuration
- attr_reader :config, :file_name
-
- # Creates a new config object, based on a given YAML file. If no filename
- # given, '.unipeptrc' in the home dir of the user will be used.
- #
- # If the file doesn't exist, an empty config will be loaded.
- #
- # @param [String] file An optional file name of the YAML file to create the
- # config from
- def initialize(file = nil)
- @file_name = file || File.join(Dir.home, '.unipeptrc')
- @config = if File.exist? file_name
- YAML.load_file file_name, permitted_classes: [Time]
- else
- {}
- end
- end
-
- # Saves the config to disk. If the file doesn't exist yet, a new one will be
- # created
- def save
- File.write(file_name, config.to_yaml)
- end
-
- # Deletes a key
- def delete(key)
- config.delete(key)
- end
-
- # forwards [] to the internal config hash
- def [](*args)
- config.[](*args)
- end
-
- # forwards =[] to the internal config hash
- def []=(*args)
- config.[]=(*args) # rubocop:disable Layout/SpaceBeforeBrackets
- end
- end
-end
diff --git a/lib/formatters.rb b/lib/formatters.rb
deleted file mode 100644
index 951b4887..00000000
--- a/lib/formatters.rb
+++ /dev/null
@@ -1,411 +0,0 @@
-require 'json'
-
-module Unipept
- class Formatter
- # The Hash of available formatters
- #
- # @return [Hash] A hash of the available formatters
- def self.formatters
- @@formatters ||= {}
- end
-
- # Returns a new formatter of the given format. If the given format is not available, the
- # default formatter is returned
- #
- # @param [String] format The type of the formatter we want
- #
- # @return [Formatter] The requested formatter
- def self.new_for_format(format)
- formatters[format].new
- rescue StandardError
- formatters[default].new
- end
-
- # Adds a new formatter to the list of available formats
- #
- # @param [Symbol] format The type of the format we want to register
- def self.register(format)
- formatters[format.to_s] = self
- end
-
- # Returns a list of the available formatters
- #
- # @return [Array] The list of available formatters
- def self.available
- formatters.reject { |_key, value| value.hidden? }.keys
- end
-
- # @return [String] The type of the default formatter: csv
- def self.default
- 'csv'
- end
-
- # @return [String] The type of the current formatter
- def type
- raise NotImplementedError, 'This must be implemented in a subclass.'
- end
-
- def self.hidden?
- false
- end
-
- # Returns the header row for the given sample_data and fasta_mapper. This
- # row is output only once at the beginning of the output
- #
- # @param [Object] _sample_data The data that we will output after this
- # header. Can be used to extract the keys.
- #
- # @param [Array>] _fasta_mapper Optional mapping between input
- # data and corresponding fasta header. The data is represented as a list
- # containing tuples where the first element is the fasta header and second
- # element is the input data
- #
- # @return [String] The header row
- def header(_sample_data, _fasta_mapper = nil)
- raise NotImplementedError, 'This must be implemented in a subclass.'
- end
-
- # Returns the footer row. This row is output only once at the end of the
- # output
- #
- # @return [String] The footer row
- def footer
- raise NotImplementedError, 'This must be implemented in a subclass.'
- end
-
- # Converts the given input data and corresponding fasta headers to another
- # format.
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Array>] fasta_mapper Optional mapping between input
- # data and corresponding fasta header. The data is represented as a list
- # containing tuples where the first element is the fasta header and second
- # element is the input data
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data
- def format(data, fasta_mapper, first)
- data = integrate_fasta_headers(data, fasta_mapper) if fasta_mapper
- convert(data, first)
- end
-
- # Converts the given input data to another format.
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data
- def convert(_data, _first)
- raise NotImplementedError, 'This must be implemented in a subclass.'
- end
-
- # Integrates the fasta headers into the data object
- def integrate_fasta_headers(data, fasta_mapper)
- data_dict = group_by_first_key(data)
- data = fasta_mapper.map do |header, key|
- result = data_dict[key]
- unless result.nil?
- result = result.map do |row|
- copy = { fasta_header: header }
- copy.merge(row)
- end
- end
- result
- end
- data.compact.flatten(1)
- end
-
- # Groups the data by the first key of each element, for example
- # [{key1: v1, key2: v2},{key1: v1, key2: v3},{key1: v4, key2: v2}]
- # to {v1 => [{key1: v1, key2: v2},{key1: v1, key2: v3}], v4 => [{key1: v4, key2: v2}]}
- #
- # @param [Array] data The data we wish to group
- #
- # @return [Hash] The input data grouped by the first key
- def group_by_first_key(data)
- data.group_by { |el| el.values.first.to_s }
- end
- end
-
- class JSONFormatter < Formatter
- require 'json'
- register :json
-
- # @return [String] The type of the current formatter: json
- def type
- 'json'
- end
-
- def header(_data, _fasta_mapper = nil)
- '['
- end
-
- def footer
- "]\n"
- end
-
- # Converts the given input data to the JSON format.
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data in the JSON format
- def convert(data, first)
- output = data.map(&:to_json).join(',')
- first ? output : ",#{output}"
- end
- end
-
- class CSVFormatter < Formatter
- require 'csv'
- register :csv
-
- # @return [String] The type of the current formatter: csv
- def type
- 'csv'
- end
-
- def get_keys(data, fasta_mapper = nil)
- # This global variable is necessary because we need to know how many items should be
- # nil in the convert function.
- $keys_length = 0 # rubocop:disable Style/GlobalVars
- # This array keeps track of items that are certainly filled in for each type of annotation
- non_empty_items = { 'ec' => nil, 'go' => nil, 'ipr' => nil }
-
- # First we look for items for both ec numbers, go terms and ipr codes that are fully filled in.
- data.each do |row|
- non_empty_items.each_key do |annotation_type|
- non_empty_items[annotation_type] = row if row[annotation_type] && !row[annotation_type].empty?
- end
- end
-
- keys = fasta_mapper ? ['fasta_header'] : []
- keys += (data.first.keys - %w[ec go ipr])
- processed_keys = keys
-
- non_empty_items.each do |annotation_type, non_empty_item|
- next unless non_empty_item
-
- keys += (non_empty_item.keys - processed_keys)
- processed_keys += non_empty_item.keys
-
- idx = keys.index(annotation_type)
- keys.delete_at(idx)
- keys.insert(idx, *non_empty_item[annotation_type].first.keys.map { |el| %w[ec_number go_term ipr_code].include?(el) ? el : "#{annotation_type}_#{el}" })
- $keys_length = *non_empty_item[annotation_type].first.keys.length # rubocop:disable Style/GlobalVars
- end
-
- keys
- end
-
- # Returns the header row for the given data and fasta_mapper. This row
- # contains all the keys of the first element of the data, preceded by
- # 'fasta_header' if a fasta_mapper is given.
- #
- # @param [Array] data The data that we will use to extract the keys from.
- #
- # @param [Array>] fasta_mapper Optional mapping between input
- # data and corresponding fasta header. The data is represented as a list
- # containing tuples where the first element is the fasta header and second
- # element is the input data If a fasta_mapper is given, the output will be
- # preceded with 'fasta_header'.
- #
- # @return [String] The header row
- def header(data, fasta_mapper = nil)
- keys = get_keys(data, fasta_mapper)
-
- CSV.generate do |csv|
- csv << keys.map(&:to_s) if keys.length.positive?
- end
- end
-
- def footer
- ''
- end
-
- # Converts the given input data to the CSV format.
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data in the CSV format
- def convert(data, _first)
- keys = get_keys(data)
-
- CSV.generate do |csv|
- data.each do |o|
- row = {}
- o.each do |k, v|
- if %w[ec go ipr].include? k
- if v && !v.empty?
- v.first.each_key do |key|
- row[key == 'protein_count' ? "#{k}_protein_count" : key] = (v.map { |el| el[key] }).join(' ').strip
- end
- else
- row[k] = row.concat(Array.new($keys_length[0], nil)) # rubocop:disable Style/GlobalVars
- end
- else
- row[k] = (v == '' ? nil : v)
- end
- end
- csv << keys.map { |k| row[k] }
- end
- end
- end
- end
-
- class XMLFormatter < Formatter
- # Monkey patch (do as to_xml, but saner)
-
- class ::Object
- def to_xml(name = nil)
- name ? %(<#{name}>#{self}#{name}>) : to_s
- end
- end
-
- class ::Array
- def to_xml(array_name = :array, _item_name = :item)
- %(<#{array_name}>) + map { |n| n.to_xml(:item) }.join + "#{array_name}>"
- end
- end
-
- class ::Hash
- def to_xml(name = nil)
- data = to_a.map { |k, v| v.to_xml(k) }.join
- name ? "<#{name}>#{data}#{name}>" : data
- end
- end
-
- register :xml
-
- # @return [String] The type of the current formatter: xml
- def type
- 'xml'
- end
-
- def header(_data, _fasta_mapper = nil)
- ''
- end
-
- def footer
- "\n"
- end
-
- # Converts the given input data to the XML format.
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data in the XML format
- def convert(data, _first)
- data.map { |row| "#{row.to_xml}" }.join
- end
- end
-
- class BlastFormatter < Formatter
- register :blast
-
- # @return [String] The type of the current formatter: blast
- def type
- 'blast'
- end
-
- def self.hidden?
- true
- end
-
- def header(_data, _fasta_mapper = nil)
- ''
- end
-
- def footer
- ''
- end
-
- # Converts the given input data to the Blast format.
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data in the Blast format
- def convert(data, _first)
- data
- .reject { |o| o['refseq_protein_ids'].empty? }
- .map do |o|
- "#{o['peptide']}\tref|#{o['refseq_protein_ids']}|\t100\t10\t0\t0\t0\t10\t0\t10\t1e-100\t100\n"
- end
- .join
- end
- end
-
- class HtmlFormatter < Formatter
- register :html
-
- # @return [String] The type of the current formatter: html
- def type
- 'html'
- end
-
- def self.hidden?
- false
- end
-
- def header(_data, _fasta_mapper = nil)
- ''
- end
-
- def footer
- ''
- end
-
- # Converts the given input data to an HTML page that contains the Unipept visualizations
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data in the Blast format
- def convert(data, _first)
- data
- end
- end
-
- class UrlFormatter < Formatter
- register :url
-
- # @return [String] The type of the current formatter: html
- def type
- 'url'
- end
-
- def self.hidden?
- false
- end
-
- def header(_data, _fasta_mapper = nil)
- ''
- end
-
- def footer
- ''
- end
-
- # Converts the given input data to an HTML page that contains the Unipept visualizations
- #
- # @param [Array] data The data we wish to convert
- #
- # @param [Boolean] Is this the first output batch?
- #
- # @return [String] The converted input data in the Blast format
- def convert(data, _first)
- "#{data[0]['gist'].sub!('https://gist.github.com/', 'https://bl.ocks.org/')}\n"
- end
- end
-end
diff --git a/lib/formatters/csv_formatter.ts b/lib/formatters/csv_formatter.ts
new file mode 100644
index 00000000..c2713353
--- /dev/null
+++ b/lib/formatters/csv_formatter.ts
@@ -0,0 +1,39 @@
+import { Formatter } from "./formatter.js";
+import { stringify } from "csv-stringify/sync";
+
+export class CSVFormatter extends Formatter {
+
+ header(sampleData: { [key: string]: string }[], fastaMapper?: boolean | undefined): string {
+ return stringify([this.getKeys(sampleData, fastaMapper)]);
+ }
+
+ footer(): string {
+ return "";
+ }
+
+ convert(data: object[]): string {
+ return stringify(data);
+ }
+
+ getKeys(data: { [key: string]: unknown }[], fastaMapper?: boolean | undefined): string[] {
+ return fastaMapper ? ["fasta_header", ...Object.keys(data[0])] : Object.keys(data[0]);
+ }
+
+ flatten(data: { [key: string]: unknown }[]): { [key: string]: unknown }[] {
+ const prefixes = ["ec", "go", "ipr"];
+ prefixes.forEach(prefix => {
+ if (this.getKeys(data).includes(prefix)) {// @ts-ignore
+ const keys = Object.keys(data[0][prefix][0]);
+ data.forEach(row => {
+ keys.forEach(key => {
+ const newKey = key.startsWith(prefix) ? key : `${prefix}_${key}`;
+ // @ts-ignore
+ row[newKey] = row[prefix].map(e => e[key]).join(" ");
+ });
+ delete row[prefix];
+ });
+ }
+ });
+ return data;
+ }
+}
diff --git a/lib/formatters/formatter.ts b/lib/formatters/formatter.ts
new file mode 100644
index 00000000..d0ca6dca
--- /dev/null
+++ b/lib/formatters/formatter.ts
@@ -0,0 +1,21 @@
+export abstract class Formatter {
+
+ abstract header(sampleData: object, fastaMapper?: boolean): string;
+ abstract footer(): string;
+ abstract convert(data: object[], first?: boolean): string;
+
+ format(data: object[], fastaMapper?: { [key: string]: string }, first?: boolean): string {
+ if (fastaMapper) {
+ data = this.integrateFastaHeaders(data as { [key: string]: string }[], fastaMapper);
+ }
+ return this.convert(data, first);
+ }
+
+ integrateFastaHeaders(data: { [key: string]: string }[], fastaMapper: { [key: string]: string }): object[] {
+ const key = Object.keys(data[0])[0];
+ data.forEach((entry, i) => {
+ data[i] = Object.assign({ fasta_header: fastaMapper[entry[key]] }, entry);
+ });
+ return data;
+ }
+}
diff --git a/lib/formatters/formatter_factory.ts b/lib/formatters/formatter_factory.ts
new file mode 100644
index 00000000..2a1f3cf5
--- /dev/null
+++ b/lib/formatters/formatter_factory.ts
@@ -0,0 +1,17 @@
+import { Formatter } from "./formatter.js";
+import { CSVFormatter } from "./csv_formatter.js";
+import { JSONFormatter } from "./json_formatter.js";
+import { XMLFormatter } from "./xml_formatter.js";
+
+export class FormatterFactory {
+ static getFormatter(name: string): Formatter {
+ if (name === "csv") {
+ return new CSVFormatter();
+ } else if (name === "json") {
+ return new JSONFormatter();
+ } else if (name === "xml") {
+ return new XMLFormatter();
+ }
+ return new CSVFormatter();
+ }
+}
diff --git a/lib/formatters/json_formatter.ts b/lib/formatters/json_formatter.ts
new file mode 100644
index 00000000..a6af5efe
--- /dev/null
+++ b/lib/formatters/json_formatter.ts
@@ -0,0 +1,17 @@
+import { Formatter } from "./formatter.js";
+
+export class JSONFormatter extends Formatter {
+
+ header(_sampleData: { [key: string]: string }[], _fastaMapper?: boolean | undefined): string {
+ return "[";
+ }
+
+ footer(): string {
+ return "]\n";
+ }
+
+ convert(data: object[], first: boolean): string {
+ const output = data.map(d => JSON.stringify(d)).join(",");
+ return first ? output : `,${output}`;
+ }
+}
diff --git a/lib/formatters/to_xml.ts b/lib/formatters/to_xml.ts
new file mode 100644
index 00000000..c806e4f2
--- /dev/null
+++ b/lib/formatters/to_xml.ts
@@ -0,0 +1,254 @@
+// @ts-nocheck
+
+// This file was taken from https://github.com/kawanet/to-xml and modified to have a specific output for arrays.
+
+/**
+ * The toXML() method converts a JavaScript value to an XML string.
+ *
+ * @function toXML
+ * @param value {Object} The value to convert to an XML string.
+ * @param [replacer] {Function} A function that alters the behavior
+ * of the stringification process.
+ * @param [space] {Number|String} A String or Number object that's
+ * used to insert white space into the output XML string for
+ * readability purposes. If this is a Number, it indicates the number
+ * of space characters to use as white space.
+ * If this is a String, the string is used as white space.
+ * @returns {String}
+ */
+
+const TYPES = {
+ "boolean": fromString,
+ "number": fromString,
+ "object": fromObject,
+ "string": fromString
+};
+
+const ESCAPE = {
+ "\t": " ",
+ "\n": "
",
+ "\r": "
",
+ " ": " ",
+ "&": "&",
+ "<": "<",
+ ">": ">",
+ '"': """
+};
+
+const ATTRIBUTE_KEY = "@";
+const CHILD_NODE_KEY = "#";
+const LF = "\n";
+
+const isArray = Array.isArray || _isArray;
+
+const REPLACE = String.prototype.replace;
+
+function _toXML(value, replacer, space) {
+ const job = createJob(replacer, space);
+ fromAny(job, "", value);
+ return job.r;
+}
+
+function createJob(replacer, space) {
+ const job = {
+ f: replacer, // replacer function
+ // s: "", // indent string
+ // i: 0, // indent string length
+ l: "", // current indent string
+ r: "" // result string
+ };
+
+ if (space) {
+ let str = "";
+
+ if (space > 0) {
+ for (let i = space; i; i--) {
+ str += " ";
+ }
+ } else {
+ str += space; // stringify
+ }
+ job.s = str;
+
+ // indent string length
+ job.i = str.length;
+ }
+
+ return job;
+}
+
+function fromAny(job, key, value) {
+ // child node synonym
+ if (key === CHILD_NODE_KEY) key = "";
+
+ if (_isArray(value)) return fromArray(job, key, value);
+
+ const replacer = job.f;
+ if (replacer) value = replacer(key, value);
+
+ const f = TYPES[typeof value];
+ if (f) f(job, key, value);
+}
+
+function fromString(job, key, value) {
+ if (key === "?") {
+ // XML declaration
+ value = "" + value + "?>";
+ } else if (key === "!") {
+ // comment, CDATA section
+ value = "";
+ } else {
+ value = escapeTextNode(value);
+ if (key) {
+ // text element without attributes
+ value = "<" + key + ">" + value + "" + key + ">";
+ }
+ }
+
+ if (key && job.i && job.r) {
+ job.r += LF + job.l; // indent
+ }
+
+ job.r += value;
+}
+
+function fromArray(job, key, value) {
+ if (key !== "item") {
+ fromObject(job, key, { item: value });
+ } else {
+ Array.prototype.forEach.call(value, function (value) {
+ fromAny(job, key, value);
+ });
+ }
+}
+
+function fromObject(job, key, value) {
+ // empty tag
+ const hasTag = !!key;
+ const closeTag = (value === null);
+ if (closeTag) {
+ if (!hasTag) return;
+ value = {};
+ }
+
+ const keys = Object.keys(value);
+ const keyLength = keys.length;
+ const attrs = keys.filter(isAttribute);
+ const attrLength = attrs.length;
+ const hasIndent = job.i;
+ const curIndent = job.l;
+ let willIndent = hasTag && hasIndent;
+ let didIndent;
+
+ // open tag
+ if (hasTag) {
+ if (hasIndent && job.r) {
+ job.r += LF + curIndent;
+ }
+
+ job.r += '<' + key;
+
+ // attributes
+ attrs.forEach(function (name) {
+ writeAttributes(job, name.substr(1), value[name]);
+ });
+
+ // empty element
+ const isEmpty = closeTag || (attrLength && keyLength === attrLength);
+ if (isEmpty) {
+ const firstChar = key[0];
+ if (firstChar !== "!" && firstChar !== "?") {
+ job.r += "/";
+ }
+ }
+
+ job.r += '>';
+
+ if (isEmpty) return;
+ }
+
+ keys.forEach(function (name) {
+ // skip attribute
+ if (isAttribute(name)) return;
+
+ // indent when it has child node but not fragment
+ if (willIndent && ((name && name !== CHILD_NODE_KEY) || isArray(value[name]))) {
+ job.l += job.s; // increase indent level
+ willIndent = 0;
+ didIndent = 1;
+ }
+
+ // child node or text node
+ fromAny(job, name, value[name]);
+ });
+
+ if (didIndent) {
+ // decrease indent level
+ job.l = job.l.substr(job.i);
+
+ job.r += LF + job.l;
+ }
+
+ // close tag
+ if (hasTag) {
+ job.r += '' + key + '>';
+ }
+}
+
+function writeAttributes(job, key, val) {
+ if (isArray(val)) {
+ val.forEach(function (child) {
+ writeAttributes(job, key, child);
+ });
+ } else if (!key && "object" === typeof val) {
+ Object.keys(val).forEach(function (name) {
+ writeAttributes(job, name, val[name]);
+ });
+ } else {
+ writeAttribute(job, key, val);
+ }
+}
+
+function writeAttribute(job, key, val) {
+ const replacer = job.f;
+ if (replacer) val = replacer(ATTRIBUTE_KEY + key, val);
+ if ("undefined" === typeof val) return;
+
+ // empty attribute name
+ if (!key) {
+ job.r += ' ' + val;
+ return;
+ }
+
+ // attribute name
+ job.r += ' ' + key;
+
+ // property attribute
+ if (val === null) return;
+
+ job.r += '="' + escapeAttribute(val) + '"';
+}
+
+function isAttribute(name) {
+ return name && name[0] === ATTRIBUTE_KEY;
+}
+
+function escapeTextNode(str) {
+ return REPLACE.call(str, /(^\s|[&<>]|\s$)/g, escapeRef);
+}
+
+function escapeAttribute(str) {
+ return REPLACE.call(str, /([&"])/g, escapeRef);
+}
+
+function escapeRef(str) {
+ return ESCAPE[str] || str;
+}
+
+function _isArray(array) {
+ return array instanceof Array;
+}
+
+export function toXML(value: object): string {
+ return _toXML(value);
+}
diff --git a/lib/formatters/xml_formatter.ts b/lib/formatters/xml_formatter.ts
new file mode 100644
index 00000000..e30c54e5
--- /dev/null
+++ b/lib/formatters/xml_formatter.ts
@@ -0,0 +1,17 @@
+import { Formatter } from "./formatter.js";
+import { toXML } from "./to_xml.js";
+
+export class XMLFormatter extends Formatter {
+
+ header(_sampleData: { [key: string]: string }[], _fastaMapper?: boolean | undefined): string {
+ return "";
+ }
+
+ footer(): string {
+ return "\n";
+ }
+
+ convert(data: object[], _first: boolean): string {
+ return data.map(d => `${toXML(d)}`).join("");
+ }
+}
diff --git a/lib/output_writer.rb b/lib/output_writer.rb
deleted file mode 100644
index 067b1972..00000000
--- a/lib/output_writer.rb
+++ /dev/null
@@ -1,13 +0,0 @@
-module Unipept
- class OutputWriter
- attr_reader :output
-
- def initialize(file)
- @output = file ? File.open(file, 'a') : $stdout
- end
-
- def write_line(line)
- @output.write line
- end
- end
-end
diff --git a/lib/retryable_typhoeus.rb b/lib/retryable_typhoeus.rb
deleted file mode 100644
index b8aa8db6..00000000
--- a/lib/retryable_typhoeus.rb
+++ /dev/null
@@ -1,34 +0,0 @@
-# Retryable Typheous
-# Inspiration: https://gist.github.com/kunalmodi/2939288
-# Patches the request and hydra to allow requests to get resend when they fail
-
-module RetryableTyphoeus
- require 'typhoeus'
-
- include Typhoeus
-
- DEFAULT_RETRIES = 10
-
- class Request < Typhoeus::Request
- attr_accessor :retries
-
- def initialize(base_url, options = {})
- @retries = (options.delete(:retries) || DEFAULT_RETRIES)
-
- super
- end
-
- def finish(response, bypass_memoization = nil)
- if response.success? || @retries <= 0
- super
- else
- @retries -= 1
- if @hydra
- @hydra.queue_front self
- else
- run
- end
- end
- end
- end
-end
diff --git a/lib/server_message.rb b/lib/server_message.rb
deleted file mode 100644
index a0ad04fa..00000000
--- a/lib/server_message.rb
+++ /dev/null
@@ -1,44 +0,0 @@
-require 'typhoeus'
-
-require_relative 'configuration'
-
-module Unipept
- class ServerMessage
- attr_reader :message_url, :configuration
-
- def initialize(host)
- @message_url = "#{host}/api/v2/messages.json"
- @configuration = Unipept::Configuration.new
- end
-
- # Checks if the server has a message and prints it if not empty.
- # We will only check this once a day and won't print anything if the quiet
- # option is set or if we output to a file.
- def print
- return unless $stdout.tty?
- return if recently_fetched?
-
- resp = fetch_server_message
- update_fetched
- puts resp unless resp.empty?
- end
-
- # Fetches a message from the server and returns it
- def fetch_server_message
- Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp
- end
-
- # Returns true if the last check for a server message was less than a day
- # ago.
- def recently_fetched?
- last_fetched = @configuration['last_fetch_date']
- !last_fetched.nil? && (last_fetched + (60 * 60 * 24)) > Time.now
- end
-
- # Updates the last checked timestamp
- def update_fetched
- @configuration['last_fetch_date'] = Time.now
- @configuration.save
- end
- end
-end
diff --git a/lib/version.rb b/lib/version.rb
deleted file mode 100644
index 2986a649..00000000
--- a/lib/version.rb
+++ /dev/null
@@ -1,3 +0,0 @@
-module Unipept
- VERSION = File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).chomp
-end
diff --git a/package.json b/package.json
new file mode 100644
index 00000000..9c7c5329
--- /dev/null
+++ b/package.json
@@ -0,0 +1,50 @@
+{
+ "name": "unipept-cli",
+ "version": "4.0.1",
+ "description": "Command line interface to the Unipept web services",
+ "repository": "git@github.com:unipept/unipept-cli.git",
+ "author": "Bart Mesuere ",
+ "license": "MIT",
+ "private": false,
+ "type": "module",
+ "bin": {
+ "peptfilter": "./dist/bin/peptfilter.js",
+ "prot2pept": "./dist/bin/prot2pept.js",
+ "unipept": "./dist/bin/unipept.js",
+ "uniprot": "./dist/bin/uniprot.js"
+ },
+ "scripts": {
+ "build": "yarn run tsc",
+ "lint": "yarn run eslint",
+ "test": "NODE_OPTIONS='--experimental-vm-modules --no-warnings' yarn run jest",
+ "typecheck": "yarn tsc --skipLibCheck --noEmit",
+ "peptfilter": "yarn run tsx bin/peptfilter.ts",
+ "prot2pept": "yarn run tsx bin/prot2pept.ts",
+ "unipept": "yarn run tsx bin/unipept.ts",
+ "uniprot": "yarn run tsx bin/uniprot.ts"
+ },
+ "files": [
+ "dist"
+ ],
+ "dependencies": {
+ "commander": "^12.1.0",
+ "csv-stringify": "^6.5.0"
+ },
+ "devDependencies": {
+ "@eslint/js": "^9.5.0",
+ "@types/jest": "^29.5.12",
+ "@types/node": "^20.14.2",
+ "@typescript-eslint/eslint-plugin": "^7.13.1",
+ "@typescript-eslint/parser": "^7.13.1",
+ "eslint": "9.x",
+ "globals": "^15.6.0",
+ "jest": "^29.7.0",
+ "mock-stdin": "^1.0.0",
+ "np": "^10.0.7",
+ "ts-jest": "^29.1.5",
+ "ts-node": "^10.9.2",
+ "tsx": "^4.15.6",
+ "typescript": "^5.4.5",
+ "typescript-eslint": "^7.13.1"
+ }
+}
diff --git a/test/commands/test_peptfilter.rb b/test/commands/test_peptfilter.rb
deleted file mode 100644
index b2e3282c..00000000
--- a/test/commands/test_peptfilter.rb
+++ /dev/null
@@ -1,180 +0,0 @@
-require_relative '../../lib/commands'
-
-module Unipept
- class PeptfilterTestCase < Unipept::TestCase
- def test_length_filter
- # min length
- assert(Commands::Peptfilter.filter_length('AALER', 4, 10))
- assert(Commands::Peptfilter.filter_length('AALER', 5, 10))
- assert(!Commands::Peptfilter.filter_length('AALER', 6, 10))
-
- # max length
- assert(!Commands::Peptfilter.filter_length('AALER', 1, 4))
- assert(Commands::Peptfilter.filter_length('AALER', 1, 5))
- assert(Commands::Peptfilter.filter_length('AALER', 1, 6))
- end
-
- def test_lacks_filter
- assert(Commands::Peptfilter.filter_lacks('AALER', ''.chars.to_a))
- assert(Commands::Peptfilter.filter_lacks('AALER', 'BCD'.chars.to_a))
- assert(!Commands::Peptfilter.filter_lacks('AALER', 'A'.chars.to_a))
- assert(!Commands::Peptfilter.filter_lacks('AALER', 'AE'.chars.to_a))
- end
-
- def test_contains_filter
- assert(Commands::Peptfilter.filter_contains('AALER', ''.chars.to_a))
- assert(Commands::Peptfilter.filter_contains('AALER', 'A'.chars.to_a))
- assert(Commands::Peptfilter.filter_contains('AALER', 'AE'.chars.to_a))
- assert(!Commands::Peptfilter.filter_contains('AALER', 'BCD'.chars.to_a))
- assert(!Commands::Peptfilter.filter_contains('AALER', 'AB'.chars.to_a))
- end
-
- def test_filter
- assert(Commands::Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'AL'.chars.to_a))
- assert(!Commands::Peptfilter.filter('AALTER', 7, 10, 'BCD.chars.to_a', 'AL'.chars.to_a))
- assert(!Commands::Peptfilter.filter('AALTER', 4, 5, 'BCD'.chars.to_a, 'AL'.chars.to_a))
- assert(!Commands::Peptfilter.filter('AALTER', 4, 10, 'ABC'.chars.to_a, 'AL'.chars.to_a))
- assert(!Commands::Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'ALC'.chars.to_a))
- end
-
- def test_default_min_length_argument
- out, _err = capture_io_with_input('A' * 6) do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal('A' * 6, out.chomp)
-
- out, _err = capture_io_with_input('A' * 5) do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal('A' * 5, out.chomp)
-
- out, _err = capture_io_with_input('A' * 4) do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal('', out.chomp)
- end
-
- def test_default_max_length_argument
- out, _err = capture_io_with_input('A' * 49) do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal('A' * 49, out.chomp)
-
- out, _err = capture_io_with_input('A' * 50) do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal('A' * 50, out.chomp)
-
- out, _err = capture_io_with_input('A' * 51) do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal('', out.chomp)
- end
-
- def test_with_min_argument
- out, _err = capture_io_with_input('A' * 6) do
- Commands::Peptfilter.run(%w[--minlen 7])
- end
- assert_equal('', out.chomp)
-
- out, _err = capture_io_with_input('A' * 4) do
- Commands::Peptfilter.run(%w[--minlen 3])
- end
- assert_equal('A' * 4, out.chomp)
- end
-
- def test_with_max_argument
- out, _err = capture_io_with_input('A' * 45) do
- Commands::Peptfilter.run(%w[--maxlen 40])
- end
- assert_equal('', out.chomp)
-
- out, _err = capture_io_with_input('A' * 55) do
- Commands::Peptfilter.run(%w[--maxlen 60])
- end
- assert_equal('A' * 55, out.chomp)
- end
-
- def test_with_lacks_argument
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[--lacks B])
- end
- assert_equal('A' * 10, out.chomp)
-
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[-l B])
- end
- assert_equal('A' * 10, out.chomp)
-
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[--lacks A])
- end
- assert_equal('', out.chomp)
-
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[-l A])
- end
- assert_equal('', out.chomp)
- end
-
- def test_with_contains_argument
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[--contains A])
- end
- assert_equal('A' * 10, out.chomp)
-
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[-c A])
- end
- assert_equal('A' * 10, out.chomp)
-
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[--contains B])
- end
- assert_equal('', out.chomp)
-
- out, _err = capture_io_with_input('A' * 10) do
- Commands::Peptfilter.run(%w[-c B])
- end
- assert_equal('', out.chomp)
- end
-
- def test_fasta_input
- out, _err = capture_io_with_input('>') do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal('>', out.chomp)
-
- out, _err = capture_io_with_input(['>', 'A', 'AALTER', '>']) do
- Commands::Peptfilter.run(%w[])
- end
- assert_equal(">\nAALTER\n>", out.chomp)
- end
-
- def test_no_input
- out, err = capture_io_while do
- assert_raises SystemExit do
- Commands::Peptfilter.run(%w[some argument])
- end
- end
- assert_equal('', out.chomp)
- assert_equal("error: peptfilter doesn't support input as arguments. Use standard input instead.", err.chomp)
- end
-
- def test_normal_input
- out, _err = capture_io_with_input(['A', 'A' * 11, 'AAAAB', 'BBBBB', 'CCCCC', 'CCCCCA']) do
- Commands::Peptfilter.run(%w[--minlen 4 --maxlen 10 --lacks B --contains A])
- end
- assert_equal('CCCCCA', out.chomp)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Peptfilter.run(%w[-h])
- end
- end
- assert(out.include?('show help for this command'))
- end
- end
-end
diff --git a/test/commands/test_prot2pept.rb b/test/commands/test_prot2pept.rb
deleted file mode 100644
index 2baccf64..00000000
--- a/test/commands/test_prot2pept.rb
+++ /dev/null
@@ -1,87 +0,0 @@
-require_relative '../../lib/commands'
-
-module Unipept
- class Prot2peptTestCase < Unipept::TestCase
- def test_normal_input
- out, _err = capture_io_with_input('AALTERAALTERPAALTER') do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal("AALTER\nAALTERPAALTER", out.chomp)
-
- out, _err = capture_io_with_input('AALTERAAL*TERPAALTER') do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal("AALTER\nAAL\nTERPAALTER", out.chomp)
-
- out, _err = capture_io_with_input('KRKPR') do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal("K\nR\nKPR", out.chomp)
-
- out, _err = capture_io_with_input(%w[AALTERAALTERPAALTER AALTERAA]) do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal("AALTER\nAALTERPAALTER\nAALTER\nAA", out.chomp)
- end
-
- def test_fasta_input
- out, _err = capture_io_with_input(">AKA\nAALTERAALTERPAALTER") do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal(">AKA\nAALTER\nAALTERPAALTER", out.chomp)
-
- out, _err = capture_io_with_input(">AKA\nAAL\nT\nERAALTER\nP\nAALTER") do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal(">AKA\nAALTER\nAALTERPAALTER", out.chomp)
-
- out, _err = capture_io_with_input(">AKA\nAAL\nT\n>\nERAALTER\nP\nAALTER") do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal(">AKA\nAALT\n>\nER\nAALTERPAALTER", out.chomp)
- end
-
- def test_default_pattern
- default_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal("AALTER\nAALTERPAALTER", default_out.chomp)
-
- pattern_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do
- Commands::Prot2pept.run(['-p', '([KR])([^P])'])
- end
- assert_equal(default_out, pattern_out)
-
- pattern_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do
- Commands::Prot2pept.run(['--pattern', '([KR])([^P])'])
- end
- assert_equal(default_out, pattern_out)
- end
-
- def test_pattern
- out, _err = capture_io_with_input('AALTERAALTERPAALTER') do
- Commands::Prot2pept.run(%w[])
- end
- assert_equal("AALTER\nAALTERPAALTER", out.chomp)
-
- out, _err = capture_io_with_input('AALTERAALTERPAALTER') do
- Commands::Prot2pept.run(%w(-p ([KR])([^A])))
- end
- assert_equal("AALTERAALTER\nPAALTER", out.chomp)
-
- out, _err = capture_io_with_input('AALTERAALTERPAALTER') do
- Commands::Prot2pept.run(%w(--pattern ([KR])([^A])))
- end
- assert_equal("AALTERAALTER\nPAALTER", out.chomp)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Prot2pept.run(%w[-h])
- end
- end
- assert(out.include?('show help for this command'))
- end
- end
-end
diff --git a/test/commands/test_unipept.rb b/test/commands/test_unipept.rb
deleted file mode 100644
index 0055a5a2..00000000
--- a/test/commands/test_unipept.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-require_relative '../../lib/commands'
-
-module Unipept
- class UnipeptTestCase < Unipept::TestCase
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[-h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[--help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_no_valid_subcommand
- _out, err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[])
- end
- end
- assert(err.include?('show help for this command'))
- end
-
- def test_version
- out, _err = capture_io_while do
- Commands::Unipept.run(%w[-v])
- end
- assert_equal(VERSION, out.chomp)
- end
- end
-end
diff --git a/test/commands/test_uniprot.rb b/test/commands/test_uniprot.rb
deleted file mode 100644
index bf5b6ec6..00000000
--- a/test/commands/test_uniprot.rb
+++ /dev/null
@@ -1,136 +0,0 @@
-require_relative '../../lib/commands'
-
-module Unipept
- class UniprotTestCase < Unipept::TestCase
- def test_argument_input
- out, _err = capture_io_while do
- Commands::Uniprot.run(%w[Q6GZX3])
- end
- assert_equal(1, out.split(/\n/).length)
-
- out, _err = capture_io_while do
- Commands::Uniprot.run(%w[Q6GZX3 Q6GZX4])
- end
- assert_equal(2, out.split(/\n/).length)
-
- out, _err = capture_io_while do
- Commands::Uniprot.run(%w[-f fasta Q6GZX3 Q6GZX4])
- end
- assert_equal(2, out.count('>'))
-
- out, _err = capture_io_while do
- Commands::Uniprot.run(%w[--format fasta Q6GZX3 Q6GZX4])
- end
- assert_equal(2, out.count('>'))
- end
-
- def test_stdin_input
- out, _err = capture_io_with_input('Q6GZX3') do
- Commands::Uniprot.run(%w[])
- end
- assert_equal(1, out.split(/\n/).length)
-
- out, _err = capture_io_with_input(%w[Q6GZX3 Q6GZX4]) do
- Commands::Uniprot.run(%w[])
- end
- assert_equal(2, out.split(/\n/).length)
-
- out, _err = capture_io_with_input(%w[Q6GZX3 Q6GZX4]) do
- Commands::Uniprot.run(%w[-f fasta])
- end
- assert_equal(2, out.count('>'))
-
- out, _err = capture_io_with_input(%w[Q6GZX3 Q6GZX4]) do
- Commands::Uniprot.run(%w[--format fasta])
- end
- assert_equal(2, out.count('>'))
- end
-
- def test_argument_input_priority
- out, _err = capture_io_with_input('Q6GZX3') do
- Commands::Uniprot.run(%w[Q6GZX3 Q6GZX4])
- end
- assert_equal(2, out.split(/\n/).length)
-
- out, _err = capture_io_with_input(%w[Q6GZX3 Q6GZX4]) do
- Commands::Uniprot.run(%w[Q6GZX3])
- end
- assert_equal(1, out.split(/\n/).length)
- end
-
- def test_invalid_format
- out, err = capture_io_while do
- assert_raises SystemExit do
- Commands::Uniprot.run(%w[--format xxx])
- end
- end
- assert_equal('', out)
- assert(err.include?('xxx is not a valid output format'))
- end
-
- def test_default_format
- out_default, _err = capture_io_while do
- Commands::Uniprot.run(%w[Q6GZX3])
- end
- assert_equal(1, out_default.split(/\n/).length)
-
- out_sequence, _err = capture_io_while do
- Commands::Uniprot.run(%w[-f sequence Q6GZX3])
- end
- assert_equal(out_default, out_sequence)
-
- out_sequence, _err = capture_io_while do
- Commands::Uniprot.run(%w[--format sequence Q6GZX3])
- end
- assert_equal(out_default, out_sequence)
- end
-
- def test_format_options
- # fasta txt xml rdf gff sequence
- out, err = capture_io_while do
- Commands::Uniprot.run(%w[-f fasta Q6GZX3])
- end
- assert(!out.empty?)
- assert(err.empty?)
-
- out, err = capture_io_while do
- Commands::Uniprot.run(%w[-f txt Q6GZX3])
- end
- assert(!out.empty?)
- assert(err.empty?)
-
- out, err = capture_io_while do
- Commands::Uniprot.run(%w[-f xml Q6GZX3])
- end
- assert(!out.empty?)
- assert(err.empty?)
-
- out, err = capture_io_while do
- Commands::Uniprot.run(%w[-f rdf Q6GZX3])
- end
- assert(!out.empty?)
- assert(err.empty?)
-
- out, err = capture_io_while do
- Commands::Uniprot.run(%w[-f gff Q6GZX3])
- end
- assert(!out.empty?)
- assert(err.empty?)
-
- out, err = capture_io_while do
- Commands::Uniprot.run(%w[-f sequence Q6GZX3])
- end
- assert(!out.empty?)
- assert(err.empty?)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Uniprot.run(%w[-h])
- end
- end
- assert(out.include?('show help for this command'))
- end
- end
-end
diff --git a/test/commands/unipept/test_api_runner.rb b/test/commands/unipept/test_api_runner.rb
deleted file mode 100644
index 89908b28..00000000
--- a/test/commands/unipept/test_api_runner.rb
+++ /dev/null
@@ -1,451 +0,0 @@
-require_relative '../../../lib/commands/unipept/api_runner'
-
-module Unipept
- # make methods public to test them
- class Commands::ApiRunner
- public :glob_to_regex, :handle_response, :error_file_path, :filter_result
- end
-
- class UnipeptAPIRunnerTestCase < Unipept::TestCase
- def test_init
- runner = new_runner('test', { host: 'test_host' }, %w[a b c])
- assert_equal('test', runner.command.name)
- assert_equal('test_host', runner.options[:host])
- assert_equal(%w[a b c], runner.arguments)
- assert(!runner.configuration.nil?)
- assert_equal('http://test_host/api/v2/test.json', runner.url)
- assert(/Unipept CLI - unipept [0-9]*\.[0-9]*\.[0-9]*/.match runner.user_agent)
- end
-
- def test_config_host
- runner = new_runner('test', { host: 'http://param_host' }, %w[a b c])
- runner.options.delete(:host)
- runner.configuration['host'] = 'http://config_host'
- host = runner.host
- assert_equal('http://config_host', host)
- end
-
- def test_param_host
- runner = new_runner('test', { host: 'http://param_host' }, %w[a b c])
- runner.configuration.delete('host')
- host = runner.host
- assert_equal('http://param_host', host)
- end
-
- def test_no_host
- runner = new_runner('test', { host: 'param_host' }, %w[a b c])
- runner.configuration.delete('host')
- runner.options.delete(:host)
- host = runner.host
- assert_equal('http://api.unipept.ugent.be', host)
- end
-
- def test_host_priority
- runner = new_runner('test', { host: 'http://param_host' }, %w[a b c])
- runner.configuration['host'] = 'http://config_host'
- host = runner.host
- assert_equal('http://param_host', host)
- end
-
- def test_http_host
- runner = new_runner('test', { host: 'param_host' }, %w[a b c])
- host = runner.host
- assert_equal('http://param_host', host)
- end
-
- def test_https_host
- runner = new_runner('test', { host: 'https://param_host' }, %w[a b c])
- host = runner.host
- assert_equal('https://param_host', host)
- end
-
- def test_input_iterator_args
- runner = new_runner('test', { host: 'https://param_host' }, %w[a b c])
- output = []
- runner.input_iterator.each { |el| output << el.chomp }
- assert_equal(%w[a b c], output)
- end
-
- def test_input_iterator_file
- File.write('input_file', %w[a b c].join("\n"))
- runner = new_runner('test', host: 'https://param_host', input: 'input_file')
- output = []
- runner.input_iterator.each { |el| output << el.chomp }
- assert_equal(%w[a b c], output)
- end
-
- def test_input_iterator_stdin
- runner = new_runner('test', host: 'https://param_host')
- output = []
- _out, _err = capture_io_with_input(%w[a b c]) do
- runner.input_iterator.each { |el| output << el.chomp }
- end
- assert_equal(%w[a b c], output)
- end
-
- def test_input_iterator_arguments_priority
- File.write('input_file', %w[1 2 3].join("\n"))
- runner = new_runner('test', { host: 'https://param_host', input: 'input_file' }, %w[a b c])
- output = []
- _out, _err = capture_io_with_input(%w[1 2 3]) do
- runner.input_iterator.each { |el| output << el.chomp }
- end
- assert_equal(%w[a b c], output)
- end
-
- def test_input_iterator_file_priority
- File.write('input_file', %w[a b c].join("\n"))
- runner = new_runner('test', host: 'https://param_host', input: 'input_file')
- output = []
- _out, _err = capture_io_with_input(%w[1 2 3]) do
- runner.input_iterator.each { |el| output << el.chomp }
- end
- assert_equal(%w[a b c], output)
- end
-
- def test_required_fields
- assert_equal([], new_runner.required_fields)
- end
-
- def test_required_fields_configurable
- r = new_runner
- def r.required_fields
- ['test']
- end
- assert_equal(['test'], r.required_fields)
- end
-
- def test_default_batch_size
- assert_raises NotImplementedError do
- new_runner.default_batch_size
- end
- end
-
- def test_batch_size
- r = new_runner
- def r.default_batch_size
- 100
- end
- assert_equal(100, r.batch_size)
- end
-
- def test_argument_batch_size
- runner = new_runner('test', host: 'http://param_host', batch: '123')
- assert_equal(123, runner.batch_size)
- end
-
- def test_number_of_parallel_requests
- assert_equal(10, new_runner.concurrent_requests)
- runner = new_runner('test', host: 'http://param_host', parallel: '123')
- assert_equal(123, runner.concurrent_requests)
- end
-
- def test_queue_size
- assert_equal(200, new_runner.queue_size)
- runner = new_runner('test', host: 'http://param_host', parallel: '100')
- assert_equal(2000, runner.queue_size)
- end
-
- def test_default_formatter
- runner = new_runner
- assert_equal('csv', runner.formatter.type)
- end
-
- def test_param_formatter
- runner = new_runner('test', host: 'http://param_host', format: 'json')
- assert_equal('json', runner.formatter.type)
- end
-
- def test_no_selected_fields
- runner = new_runner
- assert_equal([], runner.selected_fields)
- end
-
- def test_required_fields_are_not_selected_with_empty_selection
- runner = new_runner
- def runner.required_fields
- ['test']
- end
- runner.required_fields
- assert_equal([], runner.selected_fields)
- end
-
- def test_required_fields_are_selected_for_fasta
- runner = new_runner('test', host: 'http://param_host', select: 'field')
- def runner.required_fields
- ['test']
- end
- runner.instance_variable_set(:@fasta, true)
- assert_equal([/^field$/, /^test$/], runner.selected_fields)
- end
-
- def test_required_fields_are_not_selected_if_not_fasta
- runner = new_runner('test', host: 'http://param_host', select: 'field')
- def runner.required_fields
- ['test']
- end
- runner.instance_variable_set(:@fasta, false)
- assert_equal([/^field$/], runner.selected_fields)
- end
-
- def test_single_selected_fields
- runner = new_runner('test', host: 'http://param_host', select: 'field')
- assert_equal([/^field$/], runner.selected_fields)
- end
-
- def test_comma_selected_fields
- runner = new_runner('test', host: 'http://param_host', select: 'field1,field2')
- assert_equal([/^field1$/, /^field2$/], runner.selected_fields)
- end
-
- def test_multiple_selected_fields
- runner = new_runner('test', host: 'http://param_host', select: %w[field1 field2])
- assert_equal([/^field1$/, /^field2$/], runner.selected_fields)
- end
-
- def test_combined_selected_fields
- runner = new_runner('test', host: 'http://param_host', select: ['field1', 'field2,field3'])
- assert_equal([/^field1$/, /^field2$/, /^field3$/], runner.selected_fields)
- end
-
- def test_wildcard_selected_fields
- runner = new_runner('test', host: 'http://param_host', select: 'field*')
- assert_equal([/^field.*$/], runner.selected_fields)
- end
-
- def test_basic_construct_request_body
- runner = new_runner('test', host: 'http://param_host')
- body = runner.construct_request_body('test')
- assert_equal('test', body[:input])
- assert_equal(false, body[:equate_il])
- assert_equal(false, body[:extra])
- assert_equal(false, body[:names])
- end
-
- def test_equate_construct_request_body
- runner = new_runner('test', host: 'http://param_host', equate: true)
- body = runner.construct_request_body('test')
- assert_equal('test', body[:input])
- assert_equal(true, body[:equate_il])
- assert_equal(false, body[:extra])
- assert_equal(false, body[:names])
- end
-
- def test_all_no_select_construct_request_body
- runner = new_runner('test', host: 'http://param_host', all: true)
- body = runner.construct_request_body('test')
- assert_equal('test', body[:input])
- assert_equal(false, body[:equate_il])
- assert_equal(true, body[:extra])
- assert_equal(true, body[:names])
- end
-
- def test_all_names_select_construct_request_body
- runner = new_runner('test', host: 'http://param_host', all: true, select: 'test,names')
- body = runner.construct_request_body('test')
- assert_equal('test', body[:input])
- assert_equal(false, body[:equate_il])
- assert_equal(true, body[:extra])
- assert_equal(true, body[:names])
- end
-
- def test_all_names_wildcard_select_construct_request_body
- runner = new_runner('test', host: 'http://param_host', all: true, select: 'test,order*')
- body = runner.construct_request_body('test')
- assert_equal('test', body[:input])
- assert_equal(false, body[:equate_il])
- assert_equal(true, body[:extra])
- assert_equal(true, body[:names])
- end
-
- def test_all_no_names_select_construct_request_body
- runner = new_runner('test', host: 'http://param_host', all: true, select: 'test')
- body = runner.construct_request_body('test')
- assert_equal('test', body[:input])
- assert_equal(false, body[:equate_il])
- assert_equal(true, body[:extra])
- assert_equal(false, body[:names])
- end
-
- def test_glob_to_regex
- runner = new_runner
- assert(/^simple$/, runner.glob_to_regex('simple'))
- assert(/^.*simple.*$/, runner.glob_to_regex('*simple*'))
- end
-
- def test_save_error
- runner = new_runner
- runner.stub(:error_file_path, 'errordir/error.log') do
- _out, err = capture_io_while do
- runner.save_error('error message')
- end
- assert(err.start_with?('API request failed! log can be found in'))
- assert_equal('error message', File.foreach('errordir/error.log').next.chomp)
- end
- end
-
- def test_error_file_path
- runner = new_runner
- assert(runner.error_file_path.include?('/.unipept/'))
- end
-
- def test_invalid_filter_result
- runner = new_runner
- assert_equal([], runner.filter_result('{"key":"value'))
- end
-
- def test_array_wrap_filter_result
- runner = new_runner
- assert_equal([{ 'key' => 'value' }], runner.filter_result('{"key":"value"}'))
- end
-
- def test_filter_filter_result
- runner = new_runner('test', host: 'test', select: 'key1')
- result = runner.filter_result('[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]')
- assert_equal([{ 'key1' => 'value1' }, { 'key1' => 'value2' }], result)
- end
-
- def test_success_header_handle_response
- runner = new_runner
- response = new_response(success: true, response_body: '[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]')
- lambda = runner.handle_response(response, 0, nil)
- assert(lambda.lambda?)
- out, err = capture_io_while(&lambda)
- lines = out.each_line
- assert_equal('', err)
- assert_equal('key1,key2', lines.next.chomp)
- assert_equal('value1,value1', lines.next.chomp)
- assert_equal('value2,value2', lines.next.chomp)
- end
-
- def test_success_no_header_option_handle_response
- runner = new_runner('test', { host: 'test', 'no-header': true })
- response = new_response(success: true, response_body: '[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]')
- lambda = runner.handle_response(response, 0, nil)
- assert(lambda.lambda?)
- out, err = capture_io_while(&lambda)
- lines = out.each_line
- assert_equal('', err)
- assert_equal('value1,value1', lines.next.chomp)
- assert_equal('value2,value2', lines.next.chomp)
- end
-
- def test_success_no_header_handle_response
- runner = new_runner
- response = new_response(success: true, response_body: '[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]')
- lambda = runner.handle_response(response, 1, nil)
- assert(lambda.lambda?)
- out, err = capture_io_while(&lambda)
- lines = out.each_line
- assert_equal('', err)
- assert_equal('value1,value1', lines.next.chomp)
- assert_equal('value2,value2', lines.next.chomp)
- end
-
- def test_time_out_handle_response
- runner = new_runner
- response = new_response(success: false, timed_out: true)
- lambda = runner.handle_response(response, 0, nil)
- assert(lambda.lambda?)
- def runner.save_error(input)
- warn(input)
- end
- out, err = capture_io_while(&lambda)
- assert_equal('', out)
- assert(err.chomp.start_with?('request timed out'))
- end
-
- def test_code_0_handle_response
- runner = new_runner
- response = new_response(success: false, timed_out: false, code: 0)
- lambda = runner.handle_response(response, 0, nil)
- assert(lambda.lambda?)
- def runner.save_error(input)
- warn(input)
- end
- out, err = capture_io_while(&lambda)
- assert_equal('', out)
- assert(err.chomp.start_with?('could not get an http'))
- end
-
- def test_failed_handle_response
- runner = new_runner
- response = new_response(success: false, timed_out: false, code: 10)
- lambda = runner.handle_response(response, 0, nil)
- assert(lambda.lambda?)
- def runner.save_error(input)
- warn(input)
- end
- out, err = capture_io_while(&lambda)
- assert_equal('', out)
- assert(err.chomp.start_with?('Got 10'))
- end
-
- def test_run
- runner = new_runner('taxonomy', host: 'http://api.unipept.ugent.be')
- out, err = capture_io_while do
- def runner.input_iterator
- %w[0 1 2].each
- end
-
- def runner.batch_size
- 2
- end
- runner.run
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('taxon_id'))
- assert(lines.next.start_with?('1,root'))
- assert(lines.next.start_with?('2,Bacteria'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def new_runner(command_name = 'test', options = { host: 'http://param_host' }, arguments = [])
- command = Cri::Command.define { name command_name }
- Commands::ApiRunner.new(options, arguments, command)
- end
-
- def new_response(values)
- response = Class.new do
- def initialize(values)
- super()
- @values = values
- end
-
- def success?
- @values[:success]
- end
-
- def timed_out?
- @values[:timed_out]
- end
-
- def code
- @values[:code]
- end
-
- def response_body
- @values[:response_body]
- end
-
- def return_message
- ''
- end
-
- def request
- o = Object.new
- def o.options
- ''
- end
-
- def o.encoded_body
- ''
- end
- o
- end
- end
- response.new(values)
- end
- end
-end
diff --git a/test/commands/unipept/test_config.rb b/test/commands/unipept/test_config.rb
deleted file mode 100644
index 348ea3bb..00000000
--- a/test/commands/unipept/test_config.rb
+++ /dev/null
@@ -1,64 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptConfigTestCase < Unipept::TestCase
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[config -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[config --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_no_args
- _out, err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[config])
- end
- end
- assert(err.include?('show help for this command'))
- end
-
- def test_too_many_args
- _out, err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[config a b c])
- end
- end
- assert(err.include?('show help for this command'))
- end
-
- def test_setting_config
- value = Random.rand.to_s
- config = Unipept::Configuration.new
- config.delete('test')
- config.save
- out, _err = capture_io_while do
- Commands::Unipept.run(['config', 'test', value])
- end
- assert_equal("test was set to #{value}", out.chomp)
- assert_equal(value, Unipept::Configuration.new['test'])
- end
-
- def test_getting_config
- value = Random.rand.to_s
- config = Unipept::Configuration.new
- config['test'] = value
- config.save
- out, _err = capture_io_while do
- Commands::Unipept.run(%w[config test])
- end
- config.delete('test')
- config.save
- assert_equal(value, out.chomp)
- end
- end
-end
diff --git a/test/commands/unipept/test_pept2ec.rb b/test/commands/unipept/test_pept2ec.rb
deleted file mode 100644
index 0e0160fc..00000000
--- a/test/commands/unipept/test_pept2ec.rb
+++ /dev/null
@@ -1,140 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptPept2ecTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'pept2ec' }
- pept2ec = Commands::Pept2ec.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2ec.default_batch_size)
- pept2ec.options[:all] = true
- assert_equal(100, pept2ec.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'pept2ec' }
- pept2ec = Commands::Pept2ec.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], pept2ec.required_fields)
- end
-
- def test_argument_batch_size
- command = Cri::Command.define { name 'pept2ec' }
- pept2ec = Commands::Pept2ec.new({ host: 'http://api.unipept.ugent.be', batch: '123' }, [], command)
- assert_equal(123, pept2ec.batch_size)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'pept2ec' }
- pept2ec = Commands::Pept2ec.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2ec.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2ec -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2ec --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count,ec_number,ec_protein_count'))
- assert(lines.next.start_with?('AALTER,7,3.1.3.3 6.3.2.13,2 2'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be --batch 2 >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,total_protein_count,ec_number,ec_protein_count'))
- assert(lines.next.start_with?('>test,AALTER,7,3.1.3.3 6.3.2.13,2 2'))
- assert(lines.next.start_with?('>test,AALER,208,6.1.1.16 2.7.7.38,44 13'))
- assert(lines.next.start_with?('>tost,AALTER,7,3.1.3.3 6.3.2.13,2 2'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be --batch 2 --select ec_number >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,ec_number'))
- assert(lines.next.start_with?('>test,AALTER,3.1.3.3 6.3.2.13'))
- assert(lines.next.start_with?('>test,AALER,6.1.1.16 2.7.7.38'))
- assert(lines.next.start_with?('>tost,AALTER,3.1.3.3 6.3.2.13'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be --batch 2 --format json >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be --batch 2 --format xml >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
-
- def test_run_with_empty_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be AKVYSKY])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_empty_and_existing_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be AKVYSKY AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count,ec_number,ec_protein_count'))
- assert(lines.next.start_with?('AALTER,7,3.1.3.3 6.3.2.13,2 2'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_existing_peptide_no_ec_numbers
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2ec --host http://api.unipept.ugent.be MDGTEYIIVK])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count'))
- assert(lines.next.start_with?('MDGTEYIIVK,35'))
- assert_raises(StopIteration) { lines.next }
- end
- end
-end
diff --git a/test/commands/unipept/test_pept2funct.rb b/test/commands/unipept/test_pept2funct.rb
deleted file mode 100644
index e8141bfb..00000000
--- a/test/commands/unipept/test_pept2funct.rb
+++ /dev/null
@@ -1,140 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptPept2functTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'pept2funct' }
- pept2funct = Commands::Pept2funct.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2funct.default_batch_size)
- pept2funct.options[:all] = true
- assert_equal(100, pept2funct.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'pept2funct' }
- pept2funct = Commands::Pept2funct.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], pept2funct.required_fields)
- end
-
- def test_argument_batch_size
- command = Cri::Command.define { name 'pept2funct' }
- pept2funct = Commands::Pept2funct.new({ host: 'http://api.unipept.ugent.be', batch: '123' }, [], command)
- assert_equal(123, pept2funct.batch_size)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'pept2funct' }
- pept2funct = Commands::Pept2funct.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2funct.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2funct -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2funct --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('peptide,total_protein_count,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('AALTER,7,3.1.3.3 6.3.2.13,2 2,GO:0000287 GO:0005737,5 5,IPR013221,2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be --batch 2 >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('fasta_header,peptide,total_protein_count,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('>test,AALTER,7,3.1.3.3 6.3.2.13,2 2,GO:0000287 GO:0005737,5 5,IPR013221,2', lines.next.rstrip)
- assert_equal('>test,AALER,208,6.1.1.16 2.7.7.38,44 13,GO:0005737 GO:0005524,106 75,IPR014729 IPR009080,48 45', lines.next.rstrip)
- assert_equal('>tost,AALTER,7,3.1.3.3 6.3.2.13,2 2,GO:0000287 GO:0005737,5 5,IPR013221,2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be --batch 2 --select go_term >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('fasta_header,peptide,go_term', lines.next.rstrip)
- assert_equal('>test,AALTER,GO:0000287 GO:0005737', lines.next.rstrip)
- assert_equal('>test,AALER,GO:0005737 GO:0005524', lines.next.rstrip)
- assert_equal('>tost,AALTER,GO:0000287 GO:0005737', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be --batch 2 --format json >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be --batch 2 --format xml >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
-
- def test_run_with_empty_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be AKVYSKY])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_empty_and_existing_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be AKVYSKY AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count,ec_number,ec_protein_count,go_term,go_protein_count'))
- assert_equal('AALTER,7,3.1.3.3 6.3.2.13,2 2,GO:0000287 GO:0005737,5 5,IPR013221,2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_existing_peptide_no_go_terms
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2funct --host http://api.unipept.ugent.be AAEVALVGTEK])
- end
- lines = out.each_line
- assert_equal('', err)
- assert('peptide,total_protein_count', lines.next.rstrip)
- assert_equal('AAEVALVGTEK,0', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
- end
-end
diff --git a/test/commands/unipept/test_pept2go.rb b/test/commands/unipept/test_pept2go.rb
deleted file mode 100644
index 5f1f6ae3..00000000
--- a/test/commands/unipept/test_pept2go.rb
+++ /dev/null
@@ -1,140 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptPept2goTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'pept2go' }
- pept2go = Commands::Pept2go.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2go.default_batch_size)
- pept2go.options[:all] = true
- assert_equal(100, pept2go.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'pept2go' }
- pept2go = Commands::Pept2go.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], pept2go.required_fields)
- end
-
- def test_argument_batch_size
- command = Cri::Command.define { name 'pept2go' }
- pept2go = Commands::Pept2go.new({ host: 'http://api.unipept.ugent.be', batch: '123' }, [], command)
- assert_equal(123, pept2go.batch_size)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'pept2go' }
- pept2go = Commands::Pept2go.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2go.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2go -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2go --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count,go_term,go_protein_count'))
- assert(lines.next.start_with?('AALTER,7,GO:0000287 GO:0005737 GO:0042803,5 5 1'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be --batch 2 >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,total_protein_count,go_term,go_protein_count'))
- assert(lines.next.start_with?('>test,AALTER,7,GO:0000287 GO:0005737 GO:0042803,5 5 1'))
- assert(lines.next.start_with?('>test,AALER,208,GO:0005737 GO:0005524 GO:0008270,106 75 48'))
- assert(lines.next.start_with?('>tost,AALTER,7,GO:0000287 GO:0005737 GO:0042803,5 5 1'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be --batch 2 --select go_term >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,go_term'))
- assert(lines.next.start_with?('>test,AALTER,GO:0000287 GO:0005737 GO:0042803'))
- assert(lines.next.start_with?('>test,AALER,GO:0005737 GO:0005524 GO:0008270'))
- assert(lines.next.start_with?('>tost,AALTER,GO:0000287 GO:0005737 GO:0042803'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be --batch 2 --format json >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be --batch 2 --format xml >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
-
- def test_run_with_empty_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be AKVYSKY])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_empty_and_existing_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be AKVYSKY AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count,go_term,go_protein_count'))
- assert(lines.next.start_with?('AALTER,7,GO:0000287 GO:0005737 GO:0042803,5 5 1'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_existing_peptide_no_go_terms
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2go --host http://api.unipept.ugent.be AAEVALVGTEK])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count'))
- assert(lines.next.start_with?('AAEVALVGTEK,0'))
- assert_raises(StopIteration) { lines.next }
- end
- end
-end
diff --git a/test/commands/unipept/test_pept2interpro.rb b/test/commands/unipept/test_pept2interpro.rb
deleted file mode 100644
index d0bfac97..00000000
--- a/test/commands/unipept/test_pept2interpro.rb
+++ /dev/null
@@ -1,140 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class Unipeptpept2interproTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'pept2interpro' }
- pept2interpro = Commands::Pept2interpro.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2interpro.default_batch_size)
- pept2interpro.options[:all] = true
- assert_equal(100, pept2interpro.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'pept2interpro' }
- pept2interpro = Commands::Pept2interpro.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], pept2interpro.required_fields)
- end
-
- def test_argument_batch_size
- command = Cri::Command.define { name 'pept2interpro' }
- pept2interpro = Commands::Pept2interpro.new({ host: 'http://api.unipept.ugent.be', batch: '123' }, [], command)
- assert_equal(123, pept2interpro.batch_size)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'pept2interpro' }
- pept2interpro = Commands::Pept2interpro.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2interpro.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2interpro -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2interpro --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('peptide,total_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('AALTER,7,IPR013221 IPR036565 IPR023214,2 2 2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be --batch 2 >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('fasta_header,peptide,total_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('>test,AALTER,7,IPR013221 IPR036565 IPR023214,2 2 2', lines.next.rstrip)
- assert_equal('>test,AALER,208,IPR014729 IPR009080 IPR015803,48 45 44', lines.next.rstrip)
- assert_equal('>tost,AALTER,7,IPR013221 IPR036565 IPR023214,2 2 2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be --batch 2 --select ipr_code >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('fasta_header,peptide,ipr_code', lines.next.rstrip)
- assert_equal('>test,AALTER,IPR013221 IPR036565 IPR023214', lines.next.rstrip)
- assert_equal('>test,AALER,IPR014729 IPR009080 IPR015803', lines.next.rstrip)
- assert_equal('>tost,AALTER,IPR013221 IPR036565 IPR023214', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be --batch 2 --format json >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be --batch 2 --format xml >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
-
- def test_run_with_empty_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be AKVYSKY])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_empty_and_existing_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be AKVYSKY AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('peptide,total_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('AALTER,7,IPR013221 IPR036565 IPR023214,2 2 2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_existing_peptide_no_ipr_codes
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2interpro --host http://api.unipept.ugent.be VAQFLL])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,total_protein_count'))
- assert(lines.next.start_with?('VAQFLL,0'))
- assert_raises(StopIteration) { lines.next }
- end
- end
-end
diff --git a/test/commands/unipept/test_pept2lca.rb b/test/commands/unipept/test_pept2lca.rb
deleted file mode 100644
index 5d4abed1..00000000
--- a/test/commands/unipept/test_pept2lca.rb
+++ /dev/null
@@ -1,109 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptPept2lcaTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'pept2lca' }
- pept2lca = Commands::Pept2lca.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2lca.default_batch_size)
- pept2lca.options[:all] = true
- assert_equal(100, pept2lca.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'pept2lca' }
- pept2lca = Commands::Pept2lca.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], pept2lca.required_fields)
- end
-
- def test_argument_batch_size
- command = Cri::Command.define { name 'pept2lca' }
- pept2lca = Commands::Pept2lca.new({ host: 'http://api.unipept.ugent.be', batch: '123' }, [], command)
- assert_equal(123, pept2lca.batch_size)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'pept2lca' }
- pept2lca = Commands::Pept2lca.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, pept2lca.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2lca -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2lca --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2lca --host http://api.unipept.ugent.be AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,taxon_id'))
- assert(lines.next.start_with?('AALTER,1,root,no rank'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2lca --host http://api.unipept.ugent.be --batch 2 >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,taxon_id'))
- assert(lines.next.start_with?('>test,AALTER,1,root,no rank'))
- assert(lines.next.start_with?('>test,AALER,1,root,no rank'))
- assert(lines.next.start_with?('>tost,AALTER,1,root,no rank'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2lca --host http://api.unipept.ugent.be --batch 2 --select taxon_id >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,taxon_id'))
- assert(lines.next.start_with?('>test,AALTER,1'))
- assert(lines.next.start_with?('>test,AALER,1'))
- assert(lines.next.start_with?('>tost,AALTER,1'))
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2lca --host http://api.unipept.ugent.be --batch 2 --format json >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2lca --host http://api.unipept.ugent.be --batch 2 --format xml >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
- end
-end
diff --git a/test/commands/unipept/test_pept2prot.rb b/test/commands/unipept/test_pept2prot.rb
deleted file mode 100644
index 26f287dd..00000000
--- a/test/commands/unipept/test_pept2prot.rb
+++ /dev/null
@@ -1,110 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptPept2protTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'pept2prot' }
- pept2prot = Commands::Pept2prot.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(10, pept2prot.default_batch_size)
- pept2prot.options[:all] = true
- assert_equal(5, pept2prot.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'pept2prot' }
- pept2prot = Commands::Pept2prot.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], pept2prot.required_fields)
- end
-
- def test_meganize_options
- command = Cri::Command.define { name 'pept2prot' }
- pept2prot = Commands::Pept2prot.new({ meganize: true, host: 'http://api.unipept.ugent.be' }, [], command)
- assert(pept2prot.options[:all])
- assert_equal(['peptide,refseq_protein_ids'], pept2prot.options[:select])
- assert_equal('blast', pept2prot.options[:format])
- end
-
- def test_meganize_options_overridecommand
- command = Cri::Command.define { name 'pept2prot' }
- pept2prot = Commands::Pept2prot.new({ meganize: true, format: 'xml', all: false, select: ['something'], host: 'http://api.unipept.ugent.be' }, [], command)
- assert(pept2prot.options[:all])
- assert_equal(['peptide,refseq_protein_ids'], pept2prot.options[:select])
- assert_equal('blast', pept2prot.options[:format])
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2prot -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2prot --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2prot --host http://api.unipept.ugent.be ENFVYIAK])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,uniprot_id,protein_name,taxon_id'))
- assert(lines.next.start_with?('ENFVYIAK,'))
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2prot --host http://api.unipept.ugent.be --batch 2 >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,uniprot_id,protein_name,taxon_id'))
- assert(lines.count { |line| line.start_with? '>test,EGGAGSSTGQR,' } >= 1)
- assert(lines.count { |line| line.start_with? '>test,ENFVYIAK,' } >= 1)
- assert(lines.count { |line| line.start_with? '>tost,EGGAGSSTGQR,' } >= 1)
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2prot --host http://api.unipept.ugent.be --batch 2 --select uniprot_id >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,uniprot_id'))
- assert(lines.count { |line| line.start_with? '>test,EGGAGSSTGQR,' } >= 1)
- assert(lines.count { |line| line.start_with? '>test,ENFVYIAK,' } >= 1)
- assert(lines.count { |line| line.start_with? '>tost,EGGAGSSTGQR,' } >= 1)
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2prot --host http://api.unipept.ugent.be --batch 2 --format json >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2prot --host http://api.unipept.ugent.be --batch 2 --format xml >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
- end
-end
diff --git a/test/commands/unipept/test_pept2taxa.rb b/test/commands/unipept/test_pept2taxa.rb
deleted file mode 100644
index 7de9c0a2..00000000
--- a/test/commands/unipept/test_pept2taxa.rb
+++ /dev/null
@@ -1,92 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptPept2taxaTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'pept2taxa' }
- pept2taxa = Commands::Pept2taxa.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(5, pept2taxa.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'pept2taxa' }
- pept2taxa = Commands::Pept2taxa.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], pept2taxa.required_fields)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2taxa -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[pept2taxa --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2taxa --host http://api.unipept.ugent.be ENFVYIAK])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('peptide,taxon_id,taxon_name,taxon_rank'))
- assert(lines.next.start_with?('ENFVYIAK,'))
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2taxa --host http://api.unipept.ugent.be --batch 2 >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,taxon_id,taxon_name,taxon_rank'))
- assert(lines.count { |line| line.start_with? '>test,EGGAGSSTGQR,' } >= 1)
- assert(lines.count { |line| line.start_with? '>test,ENFVYIAK,' } >= 1)
- assert(lines.count { |line| line.start_with? '>tost,EGGAGSSTGQR,' } >= 1)
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2taxa --host http://api.unipept.ugent.be --batch 2 --select taxon_id >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('fasta_header,peptide,taxon_id'))
- assert(lines.count { |line| line.start_with? '>test,EGGAGSSTGQR,' } >= 1)
- assert(lines.count { |line| line.start_with? '>test,ENFVYIAK,' } >= 1)
- assert(lines.count { |line| line.start_with? '>tost,EGGAGSSTGQR,' } >= 1)
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2taxa --host http://api.unipept.ugent.be --batch 2 --format json >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[pept2taxa --host http://api.unipept.ugent.be --batch 2 --format xml >test EGGAGSSTGQR ENFVYIAK >tost EGGAGSSTGQR])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
- end
-end
diff --git a/test/commands/unipept/test_peptinfo.rb b/test/commands/unipept/test_peptinfo.rb
deleted file mode 100644
index 3adf6b85..00000000
--- a/test/commands/unipept/test_peptinfo.rb
+++ /dev/null
@@ -1,140 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptPeptinfoTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'peptinfo' }
- peptinfo = Commands::Peptinfo.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, peptinfo.default_batch_size)
- peptinfo.options[:all] = true
- assert_equal(100, peptinfo.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'peptinfo' }
- peptinfo = Commands::Peptinfo.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['peptide'], peptinfo.required_fields)
- end
-
- def test_argument_batch_size
- command = Cri::Command.define { name 'peptinfo' }
- peptinfo = Commands::Peptinfo.new({ host: 'http://api.unipept.ugent.be', batch: '123' }, [], command)
- assert_equal(123, peptinfo.batch_size)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'peptinfo' }
- peptinfo = Commands::Peptinfo.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, peptinfo.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[peptinfo -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[peptinfo --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('peptide,total_protein_count,taxon_id,taxon_name,taxon_rank,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('AALTER,7,1,root,no rank,3.1.3.3,2,GO:0000287,5,IPR013221,2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be --batch 2 >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('fasta_header,peptide,total_protein_count,taxon_id,taxon_name,taxon_rank,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('>test,AALTER,7,1,root,no rank,3.1.3.3,2,GO:0000287,5,IPR013221,2', lines.next.rstrip)
- assert_equal('>test,AALER,208,1,root,no rank,6.1.1.16,44,GO:0005737,106,IPR014729 IPR009080 IPR015803,48 45 44', lines.next.rstrip)
- assert_equal('>tost,AALTER,7,1,root,no rank,3.1.3.3,2,GO:0000287,5,IPR013221,2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_and_select
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be --batch 2 --select go_term >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('fasta_header,peptide,go_term', lines.next.rstrip)
- assert_equal('>test,AALTER,GO:0000287', lines.next.rstrip)
- assert_equal('>test,AALER,GO:0005737', lines.next.rstrip)
- assert_equal('>tost,AALTER,GO:0000287', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_fasta_multiple_batches_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be --batch 2 --format json >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(output.include?('fasta_header'))
- end
-
- def test_run_with_fasta_multiple_batches_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be --batch 2 --format xml >test AALTER AALER >tost AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- assert(output.include?(''))
- end
-
- def test_run_with_empty_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be AKVYSKY])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_with_empty_and_existing_peptide
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be AKVYSKY AALTER])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('peptide,total_protein_count,taxon_id,taxon_name,taxon_rank,ec_number,ec_protein_count,go_term,go_protein_count,ipr_code,ipr_protein_count', lines.next.rstrip)
- assert_equal('AALTER,7,1,root,no rank,3.1.3.3,2,GO:0000287,5,IPR013221,2', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
-
- def test_run_existing_peptide_no_go_terms
- out, err = capture_io_while do
- Commands::Unipept.run(%w[peptinfo --host http://api.unipept.ugent.be AAEVALVGTEK])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('peptide,total_protein_count,taxon_id,taxon_name,taxon_rank', lines.next.rstrip)
- assert_equal('AAEVALVGTEK,0,1,root,no rank', lines.next.rstrip)
- assert_raises(StopIteration) { lines.next }
- end
- end
-end
diff --git a/test/commands/unipept/test_protinfo.rb b/test/commands/unipept/test_protinfo.rb
deleted file mode 100644
index d9756f3e..00000000
--- a/test/commands/unipept/test_protinfo.rb
+++ /dev/null
@@ -1,45 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptProtinfoTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'protinfo' }
- protinfo = Commands::Protinfo.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, protinfo.default_batch_size)
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'protinfo' }
- protinfo = Commands::Protinfo.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['protein'], protinfo.required_fields)
- end
-
- def test_argument_batch_size
- command = Cri::Command.define { name 'protinfo' }
- protinfo = Commands::Protinfo.new({ host: 'http://api.unipept.ugent.be', batch: '123' }, [], command)
- assert_equal(123, protinfo.batch_size)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'protinfo' }
- protinfo = Commands::Protinfo.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(1000, protinfo.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[protinfo -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[protinfo --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
- end
-end
diff --git a/test/commands/unipept/test_taxa2lca.rb b/test/commands/unipept/test_taxa2lca.rb
deleted file mode 100644
index 79e8a089..00000000
--- a/test/commands/unipept/test_taxa2lca.rb
+++ /dev/null
@@ -1,69 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptTaxa2lcaTestCase < Unipept::TestCase
- def test_default_batch_size
- command = Cri::Command.define { name 'taxa2lca' }
- taxa2lca = Commands::Taxa2lca.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_raises RuntimeError do
- taxa2lca.default_batch_size
- end
- end
-
- def test_required_fields
- command = Cri::Command.define { name 'taxa2lca' }
- taxa2lca = Commands::Taxa2lca.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal([], taxa2lca.required_fields)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[taxa2lca -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[taxa2lca --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[taxa2lca --host http://api.unipept.ugent.be 216816 1680])
- end
- lines = out.each_line
- assert_equal('', err)
- assert(lines.next.start_with?('taxon_id,taxon_name,taxon_rank'))
- assert(lines.next.start_with?('1678,Bifidobacterium,genus'))
- end
-
- def test_run_xml
- out, err = capture_io_while do
- Commands::Unipept.run(%w[taxa2lca --host http://api.unipept.ugent.be --format xml 216816 1680])
- end
- lines = out.each_line
- output = lines.to_a.join.chomp
- assert_equal('', err)
- assert(output.start_with?(''))
- assert(output.end_with?(''))
- end
-
- def test_run_json
- out, err = capture_io_while do
- Commands::Unipept.run(%w[taxa2lca --host http://api.unipept.ugent.be --format json 216816 1680])
- end
- lines = out.each_line
- output = lines.to_a.join.chomp
- assert_equal('', err)
- assert(output.start_with?('['))
- assert(output.end_with?(']'))
- assert(!output.include?('}{'))
- assert(!output.include?(']['))
- end
- end
-end
diff --git a/test/commands/unipept/test_taxa2tree.rb b/test/commands/unipept/test_taxa2tree.rb
deleted file mode 100644
index 8032e5ab..00000000
--- a/test/commands/unipept/test_taxa2tree.rb
+++ /dev/null
@@ -1,68 +0,0 @@
-require_relative '../../../lib/commands'
-
-module Unipept
- class UnipeptTaxa2TreeTestCase < Unipept::TestCase
- def test_required_fields
- command = Cri::Command.define { name 'taxa2tree' }
- taxa2tree = Commands::Taxa2Tree.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(['taxon_id'], taxa2tree.required_fields)
- end
-
- def test_batch_size
- command = Cri::Command.define { name 'taxa2tree' }
- taxa2tree = Commands::Taxa2Tree.new({ host: 'http://api.unipept.ugent.be' }, [], command)
- assert_equal(0, taxa2tree.batch_size)
- end
-
- def test_help
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[taxa2tree -h])
- end
- end
- assert(out.include?('show help for this command'))
-
- out, _err = capture_io_while do
- assert_raises SystemExit do
- Commands::Unipept.run(%w[taxa2tre --help])
- end
- end
- assert(out.include?('show help for this command'))
- end
- end
-
- def test_run
- out, err = capture_io_while do
- Commands::Unipept.run(%w[taxa2tree --host http://api.unipept.ugent.be 78 57 89 28 67])
- end
- lines = out.each_line
- output = lines.to_a.join.chomp
- assert_equal('', err)
-
- assert(output.start_with?('{'))
- assert(output.end_with?('}'))
- assert(output.include?('Bacteria'))
- assert(output.include?('superkingdom'))
- end
-
- def test_run_url
- out, err = capture_io_while do
- Commands::Unipept.run(%w[taxa2tree --host http://api.unipept.ugent.be --format url 78 57 89 28 67])
- end
- lines = out.each_line
- assert_equal('', err)
- assert_equal('https://bl.ocks.org/8837824df7ef9831a9b4216f3fb547ee', lines.next.rstrip)
- end
-
- def test_run_html
- out, err = capture_io_while do
- Commands::Unipept.run(%w[taxa2tree --host http://api.unipept.ugent.be --format html 78 57 89 28 67])
- end
- lines = out.each_line
- assert_equal('', err)
- output = lines.to_a.join.chomp
- assert(output.start_with?(''))
- assert(output.end_with?('