Skip to content

Commit

Permalink
Allow to limit CPUs used for pre-processing
Browse files Browse the repository at this point in the history
  • Loading branch information
rkube committed Apr 25, 2022
1 parent fe54f91 commit c993008
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
7 changes: 4 additions & 3 deletions examples/conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@
# will output csvlog, trained model checkpoints, etc.
# in fs_path_output / [username] / results | csv_logs | model_checkpoints | Graph, etc.

fs_path: '/Users/'
fs_path: '/tigress/'
user_subdir: True
fs_path_output: '/Users/'
fs_path_output: '/tigress/'
user_subdir_output: True
target: 'hinge' # 'maxhinge' # 'maxhinge' # 'binary' # 'hinge'
num_gpus: 1 # per node
max_cpus: 32 #Maximum number of threads to use for pre-processing. Set to -1 to use all available CPUs
paths:
signal_prepath: '/signal_data/' # /signal_data/jet/
shot_list_dir: '/shot_lists/'
tensorboard_save_path: '/Graph/'
data: d3d_0D
data: 'd3d_0D'
# if specific_signals: [] left empty, it will use all valid signals defined on a machine. Only use if need a custom set
specific_signals: [] # ['q95','li','ip','betan','energy','lm','pradcore','pradedge','pradtot','pin','torquein','tmamp1','tmamp2','tmfreq1','tmfreq2','pechin','energydt','ipdirect','etemp_profile','edens_profile']
executable: "mpi_learn.py"
Expand Down
9 changes: 8 additions & 1 deletion plasma/preprocessor/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

import numpy as np
from scipy.signal import exponential, correlate
import pathos.multiprocessing as mp
#import pathos.multiprocessing as mp
import multiprocessing as mp

from plasma.primitives.shots import ShotList, Shot

Expand Down Expand Up @@ -128,6 +129,12 @@ def train_on_files(self, shot_files, use_shots, all_machines,
self.load_stats(verbose=True)
print('computing normalization for machines {}'.format(
machines_to_compute))
# Adjust number of threads to use for pre-processing.
# Limits between 1 and mp.cpu_count() - 2
if conf["max_cpus"] == -1:
use_cores = max(1, mp.cpu_count() - 2)
else:
use_cores = min(conf["max_cpus"], mp.cpu_count() - 2)
use_cores = max(1, mp.cpu_count()-2)
pool = mp.Pool(use_cores)
print('running in parallel on {} processes'.format(
Expand Down
10 changes: 8 additions & 2 deletions plasma/preprocessor/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import os

import numpy as np
import pathos.multiprocessing as mp
#import pathos.multiprocessing as mp
import multiprocessing as mp

from plasma.utils.processing import append_to_filename
from plasma.utils.diagnostics import print_shot_list_sizes
Expand Down Expand Up @@ -87,7 +88,12 @@ def preprocess_from_files(self, shot_files, use_shots):
# TODO(KGF): generalize the follwowing line to perform well on
# architecutres other than CPUs, e.g. KNLs
# min( <desired-maximum-process-count>, max(1,mp.cpu_count()-2) )
use_cores = max(1, mp.cpu_count() - 2)
# Adjust number of threads to use for pre-processing.
# Limits between 1 and mp.cpu_count() - 2
if conf["max_cpus"] == -1:
use_cores = max(1, mp.cpu_count() - 2)
else:
use_cores = min(conf["max_cpus"], mp.cpu_count() - 2)
pool = mp.Pool(use_cores)
print('Running in parallel on {} processes'.format(pool._processes))
start_time = time.time()
Expand Down

0 comments on commit c993008

Please sign in to comment.