-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #56 from murphycj/bump-ensembl-version
Update code to support Ensembl 111
- Loading branch information
Showing
10 changed files
with
121 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,6 @@ dist/ | |
*pyc | ||
build/ | ||
*.db | ||
*.ipynb | ||
*clans.tsv | ||
*.db.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
""" | ||
Download and build the agfusion database for a range of releases. | ||
""" | ||
|
||
import subprocess | ||
|
||
|
||
def run_command(command): | ||
"""Run a command and print it.""" | ||
print(f"Running: {command}") | ||
subprocess.run(command, shell=True, check=True) | ||
|
||
|
||
# species = "homo_sapiens" | ||
species = "mus_musculus" | ||
|
||
# Loop over releases from 96 to 110 | ||
for i in range(92, 112): | ||
# Check if the file exists on S3 | ||
s3_check_command = f"aws s3 ls s3://agfusion/agfusion.{species}.{i}.db.gz" | ||
result = subprocess.run( | ||
s3_check_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True | ||
) | ||
|
||
if result.returncode == 0: | ||
print(f"File for release {i} already exists in S3. Skipping...") | ||
continue | ||
|
||
# continue | ||
|
||
print(f"Building release {i}") | ||
|
||
# Install the release using pyensembl | ||
run_command(f"pyensembl install --release {i} --species {species}") | ||
|
||
# Build the agfusion database | ||
run_command(f"agfusion build -d . -s {species} -r {i} --pfam Pfam-A.clans.tsv") | ||
|
||
# Compress the database | ||
run_command(f"gzip agfusion.{species}.{i}.db") | ||
|
||
# Upload the compressed file to S3 | ||
run_command(f"aws s3 cp agfusion.{species}.{i}.db.gz s3://agfusion") | ||
|
||
# Delete all files for the release from pyensembl | ||
run_command(f"pyensembl delete-all-files --release {i} --species {species}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
matplotlib>=3.6.1 | ||
pandas>=1.5.1 | ||
biopython>=1.79 | ||
matplotlib>=3.9.2 | ||
pandas==2.2.3 | ||
biopython>=1.84 | ||
future>=0.16.0 | ||
pyensembl>=1.1.0 | ||
pyensembl>=2.3.13 | ||
numpy<2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters