RasmussenLab · jakobnissen · Feb 15, 2024
diff --git a/vamb/__main__.py b/vamb/__main__.py
@@ -22,13 +22,13 @@
 import pandas as pd
 
 _ncpu = os.cpu_count()
-DEFAULT_THREADS = 8 if _ncpu is None else min(_ncpu, 8)
+DEFAULT_BLAS_THREADS = 16 if _ncpu is None else min(_ncpu, 16)
 
 # These MUST be set before importing numpy
 # I know this is a shitty hack, see https://github.com/numpy/numpy/issues/11826
-os.environ["MKL_NUM_THREADS"] = str(DEFAULT_THREADS)
-os.environ["NUMEXPR_NUM_THREADS"] = str(DEFAULT_THREADS)
-os.environ["OMP_NUM_THREADS"] = str(DEFAULT_THREADS)
+os.environ["MKL_NUM_THREADS"] = str(DEFAULT_BLAS_THREADS)
+os.environ["NUMEXPR_NUM_THREADS"] = str(DEFAULT_BLAS_THREADS)
+os.environ["OMP_NUM_THREADS"] = str(DEFAULT_BLAS_THREADS)
 
 # Append vamb to sys.path to allow vamb import even if vamb was not installed
 # using pip
@@ -771,9 +771,11 @@ def cluster_and_write_files(
             print(
                 str(i + 1),
                 None if cluster.radius is None else round(cluster.radius, 3),
-                None
-                if cluster.observed_pvr is None
-                else round(cluster.observed_pvr, 2),
+                (
+                    None
+                    if cluster.observed_pvr is None
+                    else round(cluster.observed_pvr, 2)
+                ),
                 cluster.kind_str,
                 sum(sequence_lens[i] for i in cluster.members),
                 len(cluster.members),
@@ -1686,9 +1688,11 @@ def add_input_output_arguments(subparser):
         dest="nthreads",
         metavar="",
         type=int,
-        default=DEFAULT_THREADS,
+        default=vamb.parsebam.DEFAULT_BAM_THREADS,
         help=(
-            "number of threads to use " "[min(" + str(DEFAULT_THREADS) + ", nbamfiles)]"
+            "number of threads to read BAM files [min("
+            + str(vamb.parsebam.DEFAULT_BAM_THREADS)
+            + ", nbamfiles)]"
         ),
     )
     inputos.add_argument(

diff --git a/vamb/aamb_encode.py b/vamb/aamb_encode.py
@@ -1,6 +1,5 @@
 """Adversarial autoencoders (AAE) for metagenomics binning, this files contains the implementation of the AAE"""
 
-
 import numpy as np
 from math import log, isfinite
 import time

diff --git a/vamb/parsebam.py b/vamb/parsebam.py
@@ -14,12 +14,13 @@
 from typing import Optional, TypeVar, Union, IO, Sequence, Iterable
 from pathlib import Path
 import shutil
-
-_ncpu = _os.cpu_count()
-DEFAULT_THREADS = 8 if _ncpu is None else _ncpu
+import os
 
 A = TypeVar("A", bound="Abundance")
 
+_ncpu = os.cpu_count()
+DEFAULT_BAM_THREADS = 32 if _ncpu is None else min(_ncpu, 32)
+
 
 class Abundance:
     "Object representing contig abundance. Contains a matrix and refhash."
@@ -115,10 +116,10 @@ def from_files(
 
         chunksize = min(nthreads, len(paths))
 
-        # We cap it to 16 threads, max. This will prevent pycoverm from consuming a huge amount
+        # We cap it to DEFAULT_BAM_THREADS threads, max. This will prevent pycoverm from consuming a huge amount
         # of memory if given a crapload of threads, and most programs will probably be IO bound
-        # when reading 16 files at a time.
-        chunksize = min(chunksize, 16)
+        # when reading DEFAULT_BAM_THREADS files at a time.
+        chunksize = min(chunksize, DEFAULT_BAM_THREADS)
 
         # If it can be done in memory, do so
         if chunksize >= len(paths):
@@ -134,7 +135,7 @@ def from_files(
         else:
             if cache_directory is None:
                 raise ValueError(
-                    "If min(16, nthreads) < len(paths), cache_directory must not be None"
+                    "If min(DEFAULT_BAM_THREADS, nthreads) < len(paths), cache_directory must not be None"
                 )
             return cls.chunkwise_loading(
                 paths,

diff --git a/vamb/semisupervised_encode.py b/vamb/semisupervised_encode.py
@@ -1,6 +1,5 @@
 """Semisupervised multimodal VAEs for metagenomics binning, this files contains the implementation of the VAEVAE for MMSEQ predictions"""
 
-
 __cmd_doc__ = """Encode depths and TNF using a VAE to latent representation"""
 
 import numpy as _np

diff --git a/vamb/taxvamb_encode.py b/vamb/taxvamb_encode.py
@@ -1,6 +1,5 @@
 """Hierarchical loss for the labels suggested in https://arxiv.org/abs/2210.10929"""
 
-
 __cmd_doc__ = """Hierarchical loss for the labels"""
 
 

diff --git a/workflow_avamb/src/rip_bins.py b/workflow_avamb/src/rip_bins.py
@@ -183,9 +183,9 @@ def remove_meaningless_edges_from_pairs(
                     contig_length,
                 )
                 print("Cluster ripped because of a meaningless edge ", cluster_updated)
-                clusters_changed_but_not_intersecting_contigs[
-                    cluster_updated
-                ] = cluster_contigs[cluster_updated]
+                clusters_changed_but_not_intersecting_contigs[cluster_updated] = (
+                    cluster_contigs[cluster_updated]
+                )
 
     components: list[set[str]] = list()
     for component in nx.connected_components(graph_clusters):
@@ -295,9 +295,9 @@ def make_all_components_pair(
                     contig_length,
                 )
                 print("Cluster ripped because of a pairing component ", cluster_updated)
-                clusters_changed_but_not_intersecting_contigs[
-                    cluster_updated
-                ] = cluster_contigs[cluster_updated]
+                clusters_changed_but_not_intersecting_contigs[cluster_updated] = (
+                    cluster_contigs[cluster_updated]
+                )
                 component_len = max(
                     [
                         len(nx.node_connected_component(graph_clusters, node_i))
Original file line number	Diff line number	Diff line change
		@@ -1,6 +1,5 @@
		"""Hierarchical loss for the labels suggested in https://arxiv.org/abs/2210.10929"""


		__cmd_doc__ = """Hierarchical loss for the labels"""


Expand Down